DHMM_RECOG_MFC.cpp
资源名称:VQ-DHMM.rar [点击查看]
上传用户:avbj512
上传日期:2013-09-18
资源大小:6239k
文件大小:15k
源码类别:
DSP编程
开发平台:
Visual C++
- // DHMM_RECOG_MFC.cpp:
- // Implementation of the DHMM_RECOG_MFC Module.
- // That is to compute Max_likelihood or Max_Output_Probably.
- //
- // Created 2001/08, By DongMing, MDSR.
- //
- //////////////////////////////////////////////////////////////////////
- #include "stdafx.h"
- #include "DHMM_Recog_MFC.h"
- #include "kwspot.h"
- #include "DHMM_Model_MFC.h"
- #include "DAT_File_Access.h"
- #include "DHMM_VQ_MFC.h"
- #include "DHMM_GL.h"
- #include "DHMM_LHS.h"
- #include "DHMM_HQ.h"
- extern PRO_CONFIG u_Pro_Config;
- //////////////////////////////////////////////////////////////////////
- // API functions
- int DHMM_Recog_Kitty(void)
- {
- u_Pro_Config.l_DHMM_Recog_Config_Set = RECOG_CONFIG_DHMM_RECOG_TRAINSET;
- PRO_LOG("tRecognition using Train Set...n");
- //DHMM_Recog_One_Set();
- RESULT_LOG("n--------Train Set. Accuracy Ratet%dttt%8.4fn", u_Pro_Config.n_DHMM_Model_Person_Start_Index, u_Pro_Config.d_Recognition_Accurate_Rate);
- u_Pro_Config.l_DHMM_Recog_Config_Set = RECOG_CONFIG_DHMM_RECOG_TESTSET;
- PRO_LOG("tRecognition using Test Set...n");
- DHMM_Recog_One_Set();
- RESULT_LOG("n--------Test Set. Accuracy Ratet%dttt%8.4fn", u_Pro_Config.n_DHMM_Model_Person_Start_Index, u_Pro_Config.d_Recognition_Accurate_Rate);
- return 0;
- }
- int DHMM_Recog_One_Set(void)
- {
- int n_Recog_Result, n_True_Result;
- char sz_Feature_File_Name[256];
- int n_Recog_Result_Good_Count = 0, n_Recog_Result_Bad_Count = 0;
- int n_Person_Index, n_Word_Sample_Index;
- int Total_Model_Num;
- Total_Model_Num =u_Pro_Config.n_DHMM_Model_Num;
- double* pd_DHMM_Model_Probably;
- pd_DHMM_Model_Probably = new double[Total_Model_Num];
- ASSERT(pd_DHMM_Model_Probably);
- DYNA_2DIM_INT_ARRAY d2dna_Recog_Table = d2dna_New(u_Pro_Config.n_DHMM_Model_Num, Total_Model_Num);
- for (int n_Sentence_Index = 0; n_Sentence_Index < u_Pro_Config.n_DHMM_Model_Num; n_Sentence_Index++)
- for (int nTmp = 0; nTmp < Total_Model_Num; nTmp++)
- d2dna_Recog_Table[n_Sentence_Index][nTmp] = 0;
- // 对所有测试集的人进行循环
- for (n_Person_Index = 0; n_Person_Index < u_Pro_Config.n_Recog_Person_Num; n_Person_Index++)//u_Pro_Config.n_Recog_Person_Num; n_Person_Index++)
- {
- // 判断是否测试集中的人,不是则跳出此轮循环
- if(u_Pro_Config.l_DHMM_Recog_Config_Set == RECOG_CONFIG_DHMM_RECOG_TRAINSET)
- {
- if (u_Pro_Config.n_Recog_Person_Start_Index <= u_Pro_Config.n_Recog_Person_End_Index)
- {
- if (!((n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index) || (n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index))) continue;
- }
- else
- {
- if (!((n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index) && (n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index))) continue;
- }
- }
- else if(u_Pro_Config.l_DHMM_Recog_Config_Set == RECOG_CONFIG_DHMM_RECOG_TESTSET)
- {
- if (u_Pro_Config.n_Recog_Person_Start_Index <= u_Pro_Config.n_Recog_Person_End_Index)
- {
- if ((n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index) || (n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index)) continue;
- }
- else
- {
- if ((n_Person_Index > u_Pro_Config.n_Recog_Person_End_Index) && (n_Person_Index < u_Pro_Config.n_Recog_Person_Start_Index)) continue;
- }
- }
- // 对每个人的每个词,进行一次识别测试
- sprintf(sz_Feature_File_Name, u_Pro_Config.sz_Recog_Origin_File_Name_Format, n_Person_Index);
- for (n_Word_Sample_Index = 0; n_Word_Sample_Index < u_Pro_Config.n_Recog_Sentence_Num; n_Word_Sample_Index++)
- {
- int Sen_Index = n_Word_Sample_Index + u_Pro_Config.n_Sentence_Start_Index;
- n_Recog_Result = DHMM_Recog_Basic(sz_Feature_File_Name, Sen_Index, pd_DHMM_Model_Probably);
- n_True_Result = n_Word_Sample_Index % WORD_ITEM_NUM;
- if (u_Pro_Config.WATCH_RECOG_RESULT == 1)
- {
- DHMM_LLS_Ret(pd_DHMM_Model_Probably, Total_Model_Num, n_True_Result, n_Recog_Result);
- }
- // 统计识别正确性
- d2dna_Recog_Table[n_True_Result][n_Recog_Result]++;
- if (n_Recog_Result == n_True_Result)
- n_Recog_Result_Good_Count++;
- else
- {
- n_Recog_Result_Bad_Count++;
- }
- }
- PRO_LOG("tNumber:%4d person, recognition %4d right!r", n_Person_Index, n_Recog_Result_Good_Count);
- }
- // 计算识别率
- u_Pro_Config.d_Recognition_Accurate_Rate = n_Recog_Result_Good_Count / double(n_Recog_Result_Good_Count + n_Recog_Result_Bad_Count) * 100;
- PRO_LOG("nAccurate Rate = %d / %d = %10.4f%%.n", n_Recog_Result_Good_Count, (n_Recog_Result_Good_Count + n_Recog_Result_Bad_Count), u_Pro_Config.d_Recognition_Accurate_Rate);
- /**/
- PRO_LOG(" Recognition Table:n");
- PRO_LOG("t");
- for (int nTmp = 0; nTmp < Total_Model_Num; nTmp++)
- PRO_LOG("%2d ", nTmp);
- PRO_LOG("n");
- PRO_LOG("t-------------------------------------------n");
- for (n_Sentence_Index = 0; n_Sentence_Index < u_Pro_Config.n_DHMM_Model_Num; n_Sentence_Index++)
- {
- PRO_LOG("t");
- for (nTmp = 0; nTmp < Total_Model_Num; nTmp++)
- PRO_LOG("%2d ", d2dna_Recog_Table[n_Sentence_Index][nTmp]);
- PRO_LOG("n");
- }
- PRO_LOG("n");
- d2dna_Free(d2dna_Recog_Table, u_Pro_Config.n_DHMM_Model_Num, Total_Model_Num);
- delete[] pd_DHMM_Model_Probably;
- return 0;
- }
- int DHMM_LLS_Ret(double* pd_DHMM_Model_Probably, const int Total_Model_Num, const int n_True_Result, const int n_Recog_Result)
- {
- char RetFile[100];
- strcpy(RetFile,"..\..\data\Recog_Ret\Kitty_Recog_LLS_Ret.txt");
- FILE* fp;
- fp = fopen(RetFile,"a+");
- ASSERT(fp!=NULL);
- for (int n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- fprintf(fp, "%7.0f, ", pd_DHMM_Model_Probably[n_Model_Index]);
- }
- fprintf(fp, " %2d ", n_Recog_Result);
- fprintf(fp, "%2d ", n_True_Result);
- fclose(fp);
- return 0;
- }
- //////////////////////////////////////////////////////////////////////
- // 函数名称:DHMM_Recog_Basic
- // 函数功能:基本识别(一次识别)
- // 函数性质:API
- // 输入参数:
- // sz_Feature_File_Name,待识别说话人的特征文件名
- // n_Word_Sample_Index,待识别词条标号
- // 输出参数:
- // 无
- // 返回值:
- // 识别结果
- // 备注:仅仅使用Viterbi或前后向,比较似然分数
- int DHMM_Recog_Basic(char * sz_Feature_File_Name, int n_Word_Sample_Index, double* pd_DHMM_Model_Probably)
- {
- int nRetCode;
- DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Book;
- DHMM_MODEL * pu_DHMM_Model;
- WORD_SAMPLE u_Word_Sample;
- //double * pd_DHMM_Model_Probably;
- DYNA_2DIM_INT_ARRAY d2dna_DHMM_Model_Sequence;
- int n_Model_Index;
- int n_Recog_Result;
- double d_Recog_Result_Probably;
- static int n_file_count = 0;
- int Total_Model_Num;
- Total_Model_Num =u_Pro_Config.n_DHMM_Model_Num;
- // 读入码书
- d2dda_Code_Book = d2dda_New(u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
- ASSERT(d2dda_Code_Book != NULL);
- nRetCode = DHMM_VQ_Load_Code_Book_File(u_Pro_Config.sz_Toload_Code_Book_File_Name,
- d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
- ASSERT(nRetCode == 0);
- if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
- u_Pro_Config.n_DHMM_Model_State_Num += 2;
- else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
- u_Pro_Config.n_DHMM_Model_State_Num += 2;
- // 为各个词条的模型准备内存,并读入模型
- pu_DHMM_Model = new DHMM_MODEL[Total_Model_Num];
- ASSERT(pu_DHMM_Model);
- for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- pu_DHMM_Model[n_Model_Index].n_State_Num = u_Pro_Config.n_DHMM_Model_State_Num;
- pu_DHMM_Model[n_Model_Index].n_Code_Book_Size = u_Pro_Config.n_VQ_Code_Book_Size;
- pu_DHMM_Model[n_Model_Index].pdPi = new double[u_Pro_Config.n_DHMM_Model_State_Num];
- pu_DHMM_Model[n_Model_Index].d2dda_A = d2dda_New(u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
- pu_DHMM_Model[n_Model_Index].d2dda_B = d2dda_New(u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
- ASSERT((pu_DHMM_Model[n_Model_Index].pdPi != NULL)
- && (pu_DHMM_Model[n_Model_Index].d2dda_A != NULL)
- && (pu_DHMM_Model[n_Model_Index].d2dda_B != NULL));
- }
- if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL ||
- (u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL)
- nRetCode = DHMM_Model_Load_DHMM_Model_File_With_Silence(u_Pro_Config.sz_Toload_DHMM_Model_File_Name, pu_DHMM_Model, Total_Model_Num);
- ASSERT(nRetCode == 0);
- // 准备该词条的内存空间
- u_Word_Sample.n_Feature_Sequence_Len = dfa_Feature_Get_Sentence_Frame_Num(sz_Feature_File_Name, n_Word_Sample_Index);
- u_Word_Sample.n_Feature_Dim = u_Pro_Config.n_Feature_Dim;
- int word_len = u_Word_Sample.n_Feature_Sequence_Len;
- //ASSERT(word_len >= 0);
- if(word_len <= 0)
- {
- // 释放模型内存,释放码书内存
- for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- delete[] pu_DHMM_Model[n_Model_Index].pdPi;
- d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_A, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
- d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_B, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
- }
- if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
- u_Pro_Config.n_DHMM_Model_State_Num -= 2;
- else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
- u_Pro_Config.n_DHMM_Model_State_Num -= 2;
- delete[] pu_DHMM_Model;
- d2dda_Free(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
- return 0;
- }
- u_Word_Sample.d2dda_Feature_Sequence =
- d2dda_New(u_Word_Sample.n_Feature_Sequence_Len, u_Word_Sample.n_Feature_Dim);
- u_Word_Sample.pn_VQed_Feature_Sequence =
- new int[u_Word_Sample.n_Feature_Sequence_Len];
- ASSERT((u_Word_Sample.d2dda_Feature_Sequence != NULL)
- && (u_Word_Sample.pn_VQed_Feature_Sequence != NULL));
- // 读入词条特征
- nRetCode = dfa_Feature_Read_A_Sentence(sz_Feature_File_Name, n_Word_Sample_Index, u_Pro_Config.n_Feature_Dim,
- u_Word_Sample.d2dda_Feature_Sequence);
- ASSERT(nRetCode == u_Word_Sample.n_Feature_Sequence_Len);
- // VQ特征
- nRetCode = DHMM_VQ_Encode_A_Word_Sample(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim,
- &u_Word_Sample);
- ASSERT(nRetCode == 0);
- int fea_num = u_Word_Sample.n_Feature_Dim;
- // 释放原始特征参数空间
- d2dda_Free(u_Word_Sample.d2dda_Feature_Sequence, u_Word_Sample.n_Feature_Sequence_Len, u_Word_Sample.n_Feature_Dim);
- // 为匹配分数准备内存
- //pd_DHMM_Model_Probably = new double[u_Pro_Config.n_DHMM_Model_Num];
- d2dna_DHMM_Model_Sequence = d2dna_New(Total_Model_Num, u_Word_Sample.n_Feature_Sequence_Len);
- ASSERT((pd_DHMM_Model_Probably != NULL) && (d2dna_DHMM_Model_Sequence != NULL));
- // 计算该词条与每一个模型的匹配分数
- for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- if ((u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_METHOD_MASK) == RECOG_CONFIG_METHOD_VITERBI_ONLY)
- {
- nRetCode = DHMM_Recog_Viterbi(&pu_DHMM_Model[n_Model_Index], &u_Word_Sample, &pd_DHMM_Model_Probably[n_Model_Index], d2dna_DHMM_Model_Sequence[n_Model_Index]);
- ASSERT(nRetCode == 0);
- }
- }
- // 选出最佳匹配
- n_Recog_Result = -1;
- d_Recog_Result_Probably = -MAX_DOUBLE_VALUE;
- for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- pd_DHMM_Model_Probably[n_Model_Index] /= word_len;
- if (pd_DHMM_Model_Probably[n_Model_Index] > d_Recog_Result_Probably)
- {
- n_Recog_Result = n_Model_Index;
- d_Recog_Result_Probably = pd_DHMM_Model_Probably[n_Model_Index];
- }
- }
- // 释放词条VQ后特征参数占用空间
- delete[] u_Word_Sample.pn_VQed_Feature_Sequence;
- // 释放匹配分数占用空间
- //delete[] pd_DHMM_Model_Probably;
- d2dna_Free(d2dna_DHMM_Model_Sequence, Total_Model_Num, u_Word_Sample.n_Feature_Sequence_Len);
- // 释放模型内存,释放码书内存
- for (n_Model_Index = 0; n_Model_Index < Total_Model_Num; n_Model_Index++)
- {
- delete[] pu_DHMM_Model[n_Model_Index].pdPi;
- d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_A, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_DHMM_Model_State_Num);
- d2dda_Free(pu_DHMM_Model[n_Model_Index].d2dda_B, u_Pro_Config.n_DHMM_Model_State_Num, u_Pro_Config.n_VQ_Code_Book_Size);
- }
- if ((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == (MODEL_CONFIG_TRAIN_WITH_SILENCE_MODEL))
- u_Pro_Config.n_DHMM_Model_State_Num -= 2;
- else if((u_Pro_Config.l_DHMM_Model_Config & MODEL_CONFIG_GENERATE_DHMM_MODEL_MASK) == MODEL_CONFIG_LOAD_WITH_SILENCE_MODEL)
- u_Pro_Config.n_DHMM_Model_State_Num -= 2;
- delete[] pu_DHMM_Model;
- d2dda_Free(d2dda_Code_Book, u_Pro_Config.n_VQ_Code_Book_Size, u_Pro_Config.n_Feature_Dim);
- return n_Recog_Result;
- }
- //////////////////////////////////////////////////////////////////////
- // 函数名称:DHMM_Recog_Viterbi
- // 函数功能:计算Viterbi方法的最大似然值
- // 函数性质:API
- // 输入参数:
- // pu_DHMM_Model,DHMM模型
- // pu_Word_Sample,要计算的词
- // 输出参数:
- // pd_Max_Likelihood,存放计算的似然值
- // pn_Status_Sequence,存放各帧所经历的状态
- // 返回值:
- // 0 表示成功
- // 备注:该函数是一个分发函数,按照u_Pro_Config.l_DHMM_Recog_Config配置的信息
- // 将具体调用不同的函数体
- int DHMM_Recog_Viterbi(DHMM_MODEL * pu_DHMM_Model,
- WORD_SAMPLE * pu_Word_Sample,
- double * pd_Max_Likelihood, int * pn_Status_Sequence)
- {
- switch (u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_RECOG_PROCEDURE_MASK)
- {
- case RECOG_CONFIG_RECOG_PROCEDURE_STD:
- ASSERT(0);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_GL:
- ASSERT(0);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_LHS:
- return DHMM_Recog_Viterbi_LHS(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood, pn_Status_Sequence);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_HQ:
- return DHMM_Recog_Viterbi_HQ(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood, pn_Status_Sequence);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_WP:
- ASSERT(0);
- break;
- default:
- ASSERT(0);
- break;
- }
- return 0;
- }
- //////////////////////////////////////////////////////////////////////
- // 函数名称:DHMM_Recog_Forward_Backward
- // 函数功能:计算前后向算法的最大似然值
- // 函数性质:API
- // 输入参数:
- // pu_DHMM_Model,DHMM模型
- // pu_Word_Sample,要计算的词
- // 输出参数:
- // pd_Max_Likelihood,存放计算的似然值
- // 返回值:
- // 0 表示成功
- // 备注:该函数是一个分发函数,按照u_Pro_Config.l_DHMM_Recog_Config配置的信息
- // 将具体调用不同的函数体
- int DHMM_Recog_Forward_Backward(DHMM_MODEL * pu_DHMM_Model,
- WORD_SAMPLE * pu_Word_Sample,
- double * pd_Max_Likelihood)
- {
- switch (u_Pro_Config.l_DHMM_Recog_Config & RECOG_CONFIG_RECOG_PROCEDURE_MASK)
- {
- case RECOG_CONFIG_RECOG_PROCEDURE_STD:
- ASSERT(0);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_GL:
- return DHMM_Recog_Forward_Backward_GL(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_LHS:
- ASSERT(0);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_HQ:
- return DHMM_Recog_Forward_Backward_HQ(pu_DHMM_Model, pu_Word_Sample, pd_Max_Likelihood);
- break;
- case RECOG_CONFIG_RECOG_PROCEDURE_WP:
- ASSERT(0);
- break;
- default:
- ASSERT(0);
- break;
- }
- return 0;
- }