DHMM_RECOG_MFC.cpp
上传用户:avbj512
上传日期:2013-09-18
资源大小:6239k
文件大小:15k
源码类别:

DSP编程

开发平台:

Visual C++

  1. // DHMM_RECOG_MFC.cpp:
  2. // Implementation of the DHMM_RECOG_MFC Module.
  3. // That is to compute Max_likelihood or Max_Output_Probably.
  4. //
  5. // Created 2001/08, By DongMing, MDSR.
  6. //
  7. //////////////////////////////////////////////////////////////////////
  8. #include "stdafx.h"
  9. #include "DHMM_Recog_MFC.h"
  10. #include "kwspot.h"
  11. #include "DHMM_Model_MFC.h"
  12. #include "DAT_File_Access.h"
  13. #include "DHMM_VQ_MFC.h"
  14. #include "DHMM_GL.h"
  15. #include "DHMM_LHS.h"
  16. #include "DHMM_HQ.h"
  17. extern PRO_CONFIG u_Pro_Config;
  18. //////////////////////////////////////////////////////////////////////
  19. // API functions
  20. int DHMM_Recog_Kitty(void)
  21. {
  22. u_Pro_Config.l_DHMM_Recog_Config_Set = RECOG_CONFIG_DHMM_RECOG_TRAINSET;
  23. PRO_LOG("tRecognition using Train Set...n");
  24. //DHMM_Recog_One_Set();
  25. RESULT_LOG("n--------Train Set. Accuracy Ratet%dttt%8.4fn", u_Pro_Config.n_DHMM_Model_Person_Start_Index, u_Pro_Config.d_Recognition_Accurate_Rate);
  26. u_Pro_Config.l_DHMM_Recog_Config_Set = RECOG_CONFIG_DHMM_RECOG_TESTSET;
  27. PRO_LOG("tRecognition using Test Set...n");
  28. DHMM_Recog_One_Set();
  29. RESULT_LOG("n--------Test Set. Accuracy Ratet%dttt%8.4fn", u_Pro_Config.n_DHMM_Model_Person_Start_Index, u_Pro_Config.d_Recognition_Accurate_Rate);
  30. return 0;
  31. }
  32. int DHMM_Recog_One_Set(void)
  33. {
  34. int n_Recog_Result, n_True_Result;
  35. char sz_Feature_File_Name[256];
  36. int n_Recog_Result_Good_Count = 0, n_Recog_Result_Bad_Count = 0;
  37. int n_Person_Index, n_Word_Sample_Index;
  38. int Total_Model_Num;
  39. Total_Model_Num =u_Pro_Config.n_DHMM_Model_Num;
  40. double* pd_DHMM_Model_Probably;
  41. pd_DHMM_Model_Probably = new double[Total_Model_Num];
  42. ASSERT(pd_DHMM_Model_Probably);
  43. DYNA_2DIM_INT_ARRAY d2dna_Recog_Table = d2dna_New(u_Pro_Config.n_DHMM_Model_Num, Total_Model_Num);
  44. for (int n_Sentence_Index = 0; n_Sentence_Index < u_Pro_Config.n_DHMM_Model_Num; n_Sentence_Index++)
  45. for (int nTmp = 0; nTmp < Total_Model_Num; nTmp++)
  46. d2dna_Recog_Table[n_Sentence_Index][nTmp] = 0;
  47. // 对所有测试集的人进行循环
  48. for (n_Person_Index = 0; n_Person_Index < u_Pro_Config.n_Recog_Person_Num;  n_Person_Index++)//u_Pro_Config.n_Recog_Person_Num; n_Person_Index++)
  49. {
  50. //  判断是否测试集中的人,不是则跳出此轮循环
  51. if(u_Pro_Config.l_DHMM_Recog_Config_Set == RECOG_CONFIG_DHMM_RECOG_TRAINSET)
  52. {
  53. if (u_Pro_Config.n_Recog_Person_Start_Index <= u_Pro_Config.n_Recog_Person_End_Index)
  54. {
  55. if (!((n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index) || (n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index))) continue;
  56. }
  57. else
  58. {
  59. if (!((n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index) && (n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index))) continue;
  60. }
  61. }
  62. else if(u_Pro_Config.l_DHMM_Recog_Config_Set == RECOG_CONFIG_DHMM_RECOG_TESTSET)
  63. {
  64. if (u_Pro_Config.n_Recog_Person_Start_Index <= u_Pro_Config.n_Recog_Person_End_Index)
  65. {
  66. if ((n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index) || (n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index)) continue;
  67. }
  68. else
  69. {
  70. if ((n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index) && (n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index)) continue;
  71. }
  72. }
  73. // 对每个人的每个词,进行一次识别测试
  74. sprintf(sz_Feature_File_Name, u_Pro_Config.sz_Recog_Origin_File_Name_Format, n_Person_Index);
  75. for (n_Word_Sample_Index = 0; n_Word_Sample_Index < u_Pro_Config.n_Recog_Sentence_Num; n_Word_Sample_Index++)
  76. {
  77. int Sen_Index = n_Word_Sample_Index + u_Pro_Config.n_Sentence_Start_Index;
  78. n_Recog_Result = DHMM_Recog_Basic(sz_Feature_File_Name, Sen_Index, pd_DHMM_Model_Probably);
  79. n_True_Result = n_Word_Sample_Index % WORD_ITEM_NUM;
  80. if (u_Pro_Config.WATCH_RECOG_RESULT == 1)
  81. {
  82. DHMM_LLS_Ret(pd_DHMM_Model_Probably, Total_Model_Num, n_True_Result, n_Recog_Result);
  83. }
  84. // 统计识别正确性
  85. d2dna_Recog_Table[n_True_Result][n_Recog_Result]++;
  86. if (n_Recog_Result == n_True_Result)
  87. n_Recog_Result_Good_Count++;
  88. else
  89. {
  90. n_Recog_Result_Bad_Count++;
  91. }
  92. }
  93. PRO_LOG("tNumber:%4d person, recognition %4d right!r", n_Person_Index, n_Recog_Result_Good_Count);
  94.   }
  95. // 计算识别率
  96. u_Pro_Config.d_Recognition_Accurate_Rate = n_Recog_Result_Good_Count / double(n_Recog_Result_Good_Count + n_Recog_Result_Bad_Count) * 100;
  97. PRO_LOG("nAccurate Rate = %d / %d = %10.4f%%.n", n_Recog_Result_Good_Count, (n_Recog_Result_Good_Count + n_Recog_Result_Bad_Count), u_Pro_Config.d_Recognition_Accurate_Rate);
  98. /**/
  99. PRO_LOG("  Recognition Table:n");
  100. PRO_LOG("t");
  101. for (int nTmp = 0; nTmp < Total_Model_Num; nTmp++)
  102. PRO_LOG("%2d  ", nTmp);
  103. PRO_LOG("n");
  104. PRO_LOG("t-------------------------------------------n");
  105. for (n_Sentence_Index = 0; n_Sentence_Index < u_Pro_Config.n_DHMM_Model_Num; n_Sentence_Index++)
  106. {
  107. PRO_LOG("t");
  108. for (nTmp = 0; nTmp < Total_Model_Num; nTmp++)
  109. PRO_LOG("%2d  ", d2dna_Recog_Table[n_Sentence_Index][nTmp]);
  110. PRO_LOG("n");
  111. }
  112. PRO_LOG("n");
  113. d2dna_Free(d2dna_Recog_Table, u_Pro_Config.n_DHMM_Model_Num, Total_Model_Num);
  114. delete[] pd_DHMM_Model_Probably;
  115. return 0;
  116. }
  117. int DHMM_LLS_Ret(double* pd_DHMM_Model_Probably, const int Total_Model_Num, const int n_True_Result, const int n_Recog_Result)
  118. {
  119. char RetFile[100];
  120. strcpy(RetFile,"..\..\data\Recog_Ret\Kitty_Recog_LLS_Ret.txt");
  121. FILE* fp;
  122. fp = fopen(RetFile,"a+");
  123. ASSERT(fp!=NULL);
  124. for (int n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  125. {
  126. fprintf(fp, "%7.0f, ", pd_DHMM_Model_Probably[n_Model_Index]);
  127. }
  128. fprintf(fp, "  %2d ", n_Recog_Result);
  129. fprintf(fp, "%2d ", n_True_Result);
  130. fclose(fp);
  131. return 0;
  132. }
  133. //////////////////////////////////////////////////////////////////////
  134. // 函数名称:DHMM_Recog_Basic
  135. // 函数功能:基本识别(一次识别)
  136. // 函数性质:API
  137. // 输入参数:
  138. // sz_Feature_File_Name,待识别说话人的特征文件名
  139. // n_Word_Sample_Index,待识别词条标号
  140. // 输出参数:
  141. // 无
  142. // 返回值:
  143. // 识别结果
  144. // 备注:仅仅使用Viterbi或前后向,比较似然分数
  145. int DHMM_Recog_Basic(char * sz_Feature_File_Name, int n_Word_Sample_Index, double* pd_DHMM_Model_Probably)
  146. {
  147. int nRetCode;
  148. DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Book;
  149. DHMM_MODEL * pu_DHMM_Model;
  150. WORD_SAMPLE u_Word_Sample;
  151. //double * pd_DHMM_Model_Probably;
  152. DYNA_2DIM_INT_ARRAY d2dna_DHMM_Model_Sequence;
  153. int n_Model_Index;
  154. int n_Recog_Result;
  155. double d_Recog_Result_Probably;
  156. static int n_file_count = 0;
  157. int Total_Model_Num;
  158. Total_Model_Num =u_Pro_Config.n_DHMM_Model_Num;
  159. // 读入码书
  160. d2dda_Code_Book = d2dda_New(u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
  161. ASSERT(d2dda_Code_Book != NULL);
  162. nRetCode = DHMM_VQ_Load_Code_Book_File(u_Pro_Config.sz_Toload_Code_Book_File_Name,
  163. d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
  164. ASSERT(nRetCode == 0);
  165. if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
  166. u_Pro_Config.n_DHMM_Model_State_Num += 2;
  167. else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) ==  MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
  168. u_Pro_Config.n_DHMM_Model_State_Num += 2;
  169. // 为各个词条的模型准备内存,并读入模型
  170. pu_DHMM_Model = new DHMM_MODEL[Total_Model_Num];
  171. ASSERT(pu_DHMM_Model);
  172. for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  173. {
  174. pu_DHMM_Model[n_Model_Index].n_State_Num = u_Pro_Config.n_DHMM_Model_State_Num;
  175. pu_DHMM_Model[n_Model_Index].n_Code_Book_Size = u_Pro_Config.n_VQ_Code_Book_Size;
  176. pu_DHMM_Model[n_Model_Index].pdPi = new double[u_Pro_Config.n_DHMM_Model_State_Num];
  177. pu_DHMM_Model[n_Model_Index].d2dda_A = d2dda_New(u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
  178. pu_DHMM_Model[n_Model_Index].d2dda_B = d2dda_New(u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
  179. ASSERT((pu_DHMM_Model[n_Model_Index].pdPi != NULL)
  180. && (pu_DHMM_Model[n_Model_Index].d2dda_A != NULL)
  181. && (pu_DHMM_Model[n_Model_Index].d2dda_B != NULL));
  182. }
  183. if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL ||
  184. (u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL)
  185. nRetCode = DHMM_Model_Load_DHMM_Model_File_With_Silence(u_Pro_Config.sz_Toload_DHMM_Model_File_Name, pu_DHMM_Model, Total_Model_Num);
  186. ASSERT(nRetCode == 0);
  187. // 准备该词条的内存空间
  188. u_Word_Sample.n_Feature_Sequence_Len = dfa_Feature_Get_Sentence_Frame_Num(sz_Feature_File_Name, n_Word_Sample_Index);
  189. u_Word_Sample.n_Feature_Dim = u_Pro_Config.n_Feature_Dim;
  190. int word_len = u_Word_Sample.n_Feature_Sequence_Len;
  191. //ASSERT(word_len >= 0);
  192. if(word_len <= 0)
  193. {
  194. // 释放模型内存,释放码书内存
  195. for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  196. {
  197. delete[] pu_DHMM_Model[n_Model_Index].pdPi;
  198. d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_A, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
  199. d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_B, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
  200. }
  201. if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
  202. u_Pro_Config.n_DHMM_Model_State_Num -= 2;
  203. else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) ==  MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
  204. u_Pro_Config.n_DHMM_Model_State_Num -= 2;
  205. delete[] pu_DHMM_Model;
  206. d2dda_Free(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
  207. return 0;
  208. }
  209. u_Word_Sample.d2dda_Feature_Sequence =
  210. d2dda_New(u_Word_Sample.n_Feature_Sequence_Len, u_Word_Sample.n_Feature_Dim);
  211. u_Word_Sample.pn_VQed_Feature_Sequence = 
  212. new int[u_Word_Sample.n_Feature_Sequence_Len];
  213. ASSERT((u_Word_Sample.d2dda_Feature_Sequence  != NULL)
  214. && (u_Word_Sample.pn_VQed_Feature_Sequence != NULL));
  215. // 读入词条特征
  216. nRetCode = dfa_Feature_Read_A_Sentence(sz_Feature_File_Name, n_Word_Sample_Index, u_Pro_Config.n_Feature_Dim,
  217. u_Word_Sample.d2dda_Feature_Sequence);
  218. ASSERT(nRetCode == u_Word_Sample.n_Feature_Sequence_Len);
  219. // VQ特征
  220. nRetCode = DHMM_VQ_Encode_A_Word_Sample(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim,
  221. &u_Word_Sample);
  222. ASSERT(nRetCode == 0);
  223. int fea_num = u_Word_Sample.n_Feature_Dim;
  224. // 释放原始特征参数空间
  225. d2dda_Free(u_Word_Sample.d2dda_Feature_Sequence, u_Word_Sample.n_Feature_Sequence_Len, u_Word_Sample.n_Feature_Dim);
  226. // 为匹配分数准备内存
  227. //pd_DHMM_Model_Probably = new double[u_Pro_Config.n_DHMM_Model_Num];
  228. d2dna_DHMM_Model_Sequence = d2dna_New(Total_Model_Num, u_Word_Sample.n_Feature_Sequence_Len);
  229. ASSERT((pd_DHMM_Model_Probably != NULL) && (d2dna_DHMM_Model_Sequence != NULL));
  230. // 计算该词条与每一个模型的匹配分数
  231. for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  232. {
  233. if ((u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_METHOD_MASK) == RECOG_CONFIG_METHOD_VITERBI_ONLY)
  234. {
  235. nRetCode = DHMM_Recog_Viterbi(&pu_DHMM_Model[n_Model_Index], &u_Word_Sample, &pd_DHMM_Model_Probably[n_Model_Index], d2dna_DHMM_Model_Sequence[n_Model_Index]);
  236. ASSERT(nRetCode == 0);
  237. }
  238. }
  239. // 选出最佳匹配
  240. n_Recog_Result = -1;
  241. d_Recog_Result_Probably = -MAX_DOUBLE_VALUE;
  242. for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  243. {
  244. pd_DHMM_Model_Probably[n_Model_Index] /= word_len;
  245. if (pd_DHMM_Model_Probably[n_Model_Index] > d_Recog_Result_Probably)
  246. {
  247. n_Recog_Result = n_Model_Index;
  248. d_Recog_Result_Probably = pd_DHMM_Model_Probably[n_Model_Index];
  249. }
  250. }
  251. // 释放词条VQ后特征参数占用空间
  252. delete[] u_Word_Sample.pn_VQed_Feature_Sequence;
  253. // 释放匹配分数占用空间
  254. //delete[] pd_DHMM_Model_Probably;
  255. d2dna_Free(d2dna_DHMM_Model_Sequence, Total_Model_Num, u_Word_Sample.n_Feature_Sequence_Len);
  256. // 释放模型内存,释放码书内存
  257. for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
  258. {
  259. delete[] pu_DHMM_Model[n_Model_Index].pdPi;
  260. d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_A, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
  261. d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_B, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
  262. }
  263. if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
  264. u_Pro_Config.n_DHMM_Model_State_Num -= 2;
  265. else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) ==  MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
  266. u_Pro_Config.n_DHMM_Model_State_Num -= 2;
  267. delete[] pu_DHMM_Model;
  268. d2dda_Free(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
  269. return n_Recog_Result;
  270. }
  271. //////////////////////////////////////////////////////////////////////
  272. // 函数名称:DHMM_Recog_Viterbi
  273. // 函数功能:计算Viterbi方法的最大似然值
  274. // 函数性质:API
  275. // 输入参数:
  276. // pu_DHMM_Model,DHMM模型
  277. // pu_Word_Sample,要计算的词
  278. // 输出参数:
  279. // pd_Max_Likelihood,存放计算的似然值
  280. // pn_Status_Sequence,存放各帧所经历的状态
  281. // 返回值:
  282. // 0 表示成功
  283. // 备注:该函数是一个分发函数,按照u_Pro_Config.l_DHMM_Recog_Config配置的信息
  284. // 将具体调用不同的函数体
  285. int DHMM_Recog_Viterbi(DHMM_MODEL * pu_DHMM_Model,
  286.    WORD_SAMPLE * pu_Word_Sample,
  287.    double * pd_Max_Likelihood, int * pn_Status_Sequence)
  288. {
  289. switch (u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_RECOG_PROCEDURE_MASK)
  290. {
  291. case RECOG_CONFIG_RECOG_PROCEDURE_STD:
  292. ASSERT(0);
  293. break;
  294. case RECOG_CONFIG_RECOG_PROCEDURE_GL:
  295. ASSERT(0);
  296. break;
  297. case RECOG_CONFIG_RECOG_PROCEDURE_LHS:
  298. return DHMM_Recog_Viterbi_LHS(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood, pn_Status_Sequence);
  299. break;
  300. case RECOG_CONFIG_RECOG_PROCEDURE_HQ:
  301. return DHMM_Recog_Viterbi_HQ(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood, pn_Status_Sequence);
  302. break;
  303. case RECOG_CONFIG_RECOG_PROCEDURE_WP:
  304. ASSERT(0);
  305. break;
  306. default:
  307. ASSERT(0);
  308. break;
  309. }
  310. return 0;
  311. }
  312. //////////////////////////////////////////////////////////////////////
  313. // 函数名称:DHMM_Recog_Forward_Backward
  314. // 函数功能:计算前后向算法的最大似然值
  315. // 函数性质:API
  316. // 输入参数:
  317. // pu_DHMM_Model,DHMM模型
  318. // pu_Word_Sample,要计算的词
  319. // 输出参数:
  320. // pd_Max_Likelihood,存放计算的似然值
  321. // 返回值:
  322. // 0 表示成功
  323. // 备注:该函数是一个分发函数,按照u_Pro_Config.l_DHMM_Recog_Config配置的信息
  324. // 将具体调用不同的函数体
  325. int DHMM_Recog_Forward_Backward(DHMM_MODEL * pu_DHMM_Model,
  326. WORD_SAMPLE * pu_Word_Sample,
  327. double * pd_Max_Likelihood)
  328. {
  329. switch (u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_RECOG_PROCEDURE_MASK)
  330. {
  331. case RECOG_CONFIG_RECOG_PROCEDURE_STD:
  332. ASSERT(0);
  333. break;
  334. case RECOG_CONFIG_RECOG_PROCEDURE_GL:
  335. return DHMM_Recog_Forward_Backward_GL(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood);
  336. break;
  337. case RECOG_CONFIG_RECOG_PROCEDURE_LHS:
  338. ASSERT(0);
  339. break;
  340. case RECOG_CONFIG_RECOG_PROCEDURE_HQ:
  341. return DHMM_Recog_Forward_Backward_HQ(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood);
  342. break;
  343. case RECOG_CONFIG_RECOG_PROCEDURE_WP:
  344. ASSERT(0);
  345. break;
  346. default:
  347. ASSERT(0);
  348. break;
  349. }
  350. return 0;
  351. }