DHMM_GL.cpp
资源名称:VQ-DHMM.rar [点击查看]
上传用户:avbj512
上传日期:2013-09-18
资源大小:6239k
文件大小:25k
源码类别:
DSP编程
开发平台:
Visual C++
- // DHMM_GL.cpp:
- // Implementation of the DHMM_GL Module.
- // That is the transform of previous DHMM Code by GuLiang.
- //
- // Created 2001/08, By DongMing, MDSR.
- //
- //////////////////////////////////////////////////////////////////////
- #include "stdafx.h"
- #include "DHMM_GL.h"
- #include <math.h>
- #define REESTIMATE_ALL (0x0001)
- #define ENDSTATE_ABSORB (0x0002)
- #define REESTIMATE_B (0x0004)
- //////////////////////////////////////////////////////////////////////
- // Private Function Head
- void DHMM_VQ(int *DMNU_Sound_attrib, int DMNU_Speaker_num, char *DMNU_Featurefile_name, char *DMNU_Codefile_name,
- int Codeword_num, double K_means_precision, int DMNU_mode,
- int DMAD_n_Code_Word_Num, int DMAD_n_Code_Word_Dim,
- DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book, DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word);
- void DHMM_train(int *DMNU_Sound_attrib, int Speaker_num, int Train_num, char *DMNU_Featurefile_name,
- char *DMNU_Codefile_name, char *DMNU_Patternfile_name, int Status_num, int mode,
- WORD_SAMPLE * DMAD_pu_Word_Sample,
- DHMM_MODEL * DMAD_pu_DHMM_Model);
- double DHMM_Calculate(double ***Alpha, double ***Belta, double *Pi, double **A,
- double **B, int **Out, int Sample_num, int Output_num, int *T,
- int n, int mode, int End_state_mode);
- double DHMM_Priority(double *Pi, double **A, double **B, int *Out, int T, int n, int mode);
- //////////////////////////////////////////////////////////////////////
- // API functions
- int DHMM_VQ_Train_Code_Book_GL(DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Word, int n_Code_Word_Num, int n_Code_Word_Dim,
- DYNA_2DIM_DOUBLE_ARRAY d2dda_Initial_Code_Book, DYNA_2DIM_DOUBLE_ARRAY d2dda_Code_Book, int n_Code_Book_Size)
- {
- PRO_LOG("tVQ = GL, K-mean precision = %15.4E.n", KMEAN_PRECISION);
- DHMM_VQ(NULL, 0, NULL, NULL,
- n_Code_Book_Size, KMEAN_PRECISION, 0,
- n_Code_Word_Num, n_Code_Word_Dim,
- d2dda_Code_Book, d2dda_Code_Word);
- return 0;
- }
- int DHMM_Model_Train_DHMM_Model_GL(WORD_SAMPLE * pu_Word_Sample, int n_Word_Sample_Num,
- DHMM_MODEL * pu_DHMM_Model)
- {
- PRO_LOG("tModel = GL, max loop = %4d, precision = %15.4E.n", MAX_CALC_NUM, EPSILON_CALC);
- DHMM_train(NULL, n_Word_Sample_Num, n_Word_Sample_Num, NULL,
- NULL, NULL, pu_DHMM_Model->n_State_Num, ~ENDSTATE_ABSORB,
- pu_Word_Sample, pu_DHMM_Model);
- return 0;
- }
- int DHMM_Recog_Forward_Backward_GL(DHMM_MODEL * pu_DHMM_Model,
- WORD_SAMPLE * pu_Word_Sample,
- double * pd_Max_Likelihood)
- {
- (*pd_Max_Likelihood) = DHMM_Priority(pu_DHMM_Model->pdPi, pu_DHMM_Model->d2dda_A, pu_DHMM_Model->d2dda_B,
- pu_Word_Sample->pn_VQed_Feature_Sequence, pu_Word_Sample->n_Feature_Sequence_Len,
- pu_DHMM_Model->n_State_Num, ~ENDSTATE_ABSORB);
- return 0;
- }
- //////////////////////////////////////////////////////////////////////
- // Original Code
- //////////////////////////////////////////////////////////////////////
- // DMNU = DongMing doesn't use this when transforming.
- // DMAD = DongMing addes this when transforming.
- // binary split VQ code-book training (clustering) algorithm
- static void DHMM_VQ(int *DMNU_Sound_attrib, int DMNU_Speaker_num, char *DMNU_Featurefile_name, char *DMNU_Codefile_name,
- int Codeword_num, double K_means_precision, int DMNU_mode,
- int DMAD_n_Code_Word_Num, int DMAD_n_Code_Word_Dim,
- DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Book, DYNA_2DIM_DOUBLE_ARRAY DMAD_d2dda_Code_Word)
- {
- int i, j, p, m;
- int Dimension; // Dimension: 特征维数
- int Nearer;
- int *Counter; // 记录各码字聚类到的训练向量个数
- int Total_num; // 训练向量集的总帧数
- double **Codeword; // VQ码本
- double **Sum;
- double **s; // 训练向量集
- double Min_Dist;
- double Dist; // 某一训练向量和某一码字间的距离
- double New_D; // 所有训练向量的量化失真之和?
- double Old_D;
- // Initial Memory Block
- Dimension = DMAD_n_Code_Word_Dim;
- Total_num = DMAD_n_Code_Word_Num;
- Codeword = DMAD_d2dda_Code_Book;
- if((Sum = new double *[Codeword_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Sum !n");
- ASSERT(0);
- }
- for(i=0; i<Codeword_num; i++)
- {
- if((Sum[i] = new double [Dimension]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Sum[%d] !n", i);
- ASSERT(0);
- }
- }
- if((Counter = new int [Codeword_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Counter !n");
- ASSERT(0);
- }
- s = DMAD_d2dda_Code_Word;
- // k-means Clusteration
- for(p=0; p<Dimension; p++) Codeword[0][p] = 0; // 第一个码字清零
- for(i=0; i<Total_num; i++)
- for(p=0; p<Dimension; p++)
- Codeword[0][p] += s[i][p];
- for(p=0; p<Dimension; p++) Codeword[0][p] /= Total_num; // 第一个码字置为所有训练向量的平均值
- for(m=2; m<=Codeword_num; m*=2) // m: 分裂后码本大小
- {
- DEBUG_PRINTF("tM = %d.n", m);
- for(i=m/2-1; i>=0; i--)
- for(j=0; j<Dimension; j++) // initialize Codeword[0],Codeword[1],...,Codeword[m-1] by splitting current code words
- {
- Codeword[i*2+1][j] = Codeword[i][j] * 0.9; // splitting parameter epsilon set to 0.1
- Codeword[i*2][j] = Codeword[i][j] * 1.1;
- }
- Old_D = 1.0e20;
- New_D = 1.0e15;
- while(((Old_D - New_D) / New_D) > K_means_precision)
- {
- for(j=0; j<m; j++)
- {
- for(p=0; p<Dimension; p++) Sum[j][p] = 0;
- Counter[j] = 0;
- }
- Old_D = New_D;
- New_D = 0;
- for(i=0; i<Total_num; i++)
- {
- Dist = 0;
- for(p=0; p<Dimension; p++)
- Dist += (Codeword[0][p] - s[i][p]) * (Codeword[0][p] - s[i][p]);
- Min_Dist = Dist; // 计算该训练向量与第一个码字间的距离
- Nearer = 0;
- for(j=1; j<m; j++) // 计算该训练向量与第2到第m个码字间的距离,并求最小值
- {
- Dist = 0;
- for(p=0; p<Dimension; p++)
- Dist += (Codeword[j][p] - s[i][p]) * (Codeword[j][p] - s[i][p]);
- if(Dist < Min_Dist)
- {
- Min_Dist = Dist;
- Nearer = j; // 记录距离最近的码字标号
- }
- }
- for(p=0; p<Dimension; p++) Sum[Nearer][p] += s[i][p]; // 累加聚类到标号为Nearer的码字的所有训练向量
- Counter[Nearer] ++; // 记录聚类到标号为Nearer的码字的训练向量的个数
- New_D += Min_Dist;
- }
- for(j=0; j<m; j++)
- {
- if(Counter[j] > 0)
- for(p=0; p<Dimension; p++)
- Codeword[j][p] = Sum[j][p] / Counter[j]; // 用聚类到标号为j的码字的所有训练的平均值更新该码字
- }
- }
- }
- for(i=0; i<Codeword_num; i++)
- {
- delete Sum[i];
- }
- delete Sum;
- delete Counter;
- }
- static void DHMM_train(int *DMNU_Sound_attrib, int Speaker_num, int Train_num, char *DMNU_Featurefile_name,
- char *DMNU_Codefile_name, char *DMNU_Patternfile_name, int Status_num, int mode,
- WORD_SAMPLE * DMAD_pu_Word_Sample,
- DHMM_MODEL * DMAD_pu_DHMM_Model)
- // Speaker_num 说话人个数
- // Train_num 训练集人数(Train_num=Speaker_num-Test_num)
- // Status_num 状态数
- // mode 训练模式
- {
- int i, j, k, /* n, */ m;
- int Output_num; // VQ码本大小(码字个数)
- int **Output; // for each word item model being trained, the output sequence (observation sequence) of each utterance of this model. (每个元素保存该帧的码字标号)
- // 1st dimension: index of the training utterance;
- // 2nd dimension: index of frame
- int *Frame_num; // 各训练句子的帧数
- double *Pi; // 初始概率
- double **A; // A矩阵:Status_num*Status_num
- double **B; // B矩阵:Status_num*Output_num
- double ***Alpha; // Train_num*帧数*状态数的double矩阵
- double ***Belta; // Train_num*帧数*状态数的double矩阵
- double w, u;
- // Generate (A, B, Pi)
- Output_num = DMAD_pu_DHMM_Model->n_Code_Book_Size;
- Pi = DMAD_pu_DHMM_Model->pdPi; // The above block transform to this line. #DongMing#
- // allocate memory for A matrix
- A = DMAD_pu_DHMM_Model->d2dda_A; // The above block transform to this line. #DongMing#
- // allocate memory for B matrix
- B = DMAD_pu_DHMM_Model->d2dda_B; // The above block transform to this line. #DongMing#
- //事实上,此处为Alpha和Belta分配了过多的内存,没有必要为所有用于训练的说话人分配Alpha和Belta
- if((Alpha = new double **[Train_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Alpha !n");
- ASSERT(0);
- }
- if((Belta = new double **[Train_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Belta !n");
- ASSERT(0);
- }
- if((Output = new int *[Train_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Output !n");
- ASSERT(0);
- }
- if((Frame_num = new int [Train_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Frame_num !n");
- ASSERT(0);
- }
- for(i=1; i<Status_num; i++) Pi[i] = 0; // 初始状态概率:第一个状态为1,其余为0
- Pi[0] = 1;
- // 初始化A矩阵、B矩阵
- for(i=0; i<Status_num; i++)
- {
- for(j=0; j<Status_num; j++) A[i][j] = 0;
- if(i < Status_num-1)
- {
- A[i][i] = 0.5;
- A[i][i+1] = 0.5;
- }
- else
- {
- A[i][i] = 1.0;
- }
- w = 0;
- for(j=0; j<Status_num; j++) w = w + A[i][j];
- for(j=0; j<Status_num; j++) A[i][j] = A[i][j] / w; // 归一化A矩阵的每一行
- w = 0;
- for(j=0; j<Output_num; j++) // 随机初始化B矩阵
- {
- B[i][j] = 1.0 / Output_num * (0.9 + ((rand()%1000) / 5000.0)); // RAND_MAX is defined as 0x7fff in "stdlib.h"
- // B[i][j] = 1.0 / Output_num;
- w = w + B[i][j];
- }
- for(j=0; j<Output_num; j++) B[i][j] = B[i][j] / w; // 归一化B矩阵的每一行
- }
- k = 0; // k: training utterance counter for each word item model being trained
- // 装入该词条的训练数据
- for(m=0; m<Speaker_num; m++)
- {
- Frame_num[k] = DMAD_pu_Word_Sample[k].n_Feature_Sequence_Len;
- // ????????????? is it necessary to save alpha and beta for each training utterance ??????????????
- if((Alpha[k] = new double *[Frame_num[k]]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Alpha[%d] !n", k);
- ASSERT(0);
- }
- for(i=0; i<Frame_num[k]; i++)
- if((Alpha[k][i] = new double [Status_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Alpha[%d][%d] !n", k, i);
- ASSERT(0);
- }
- if((Belta[k] = new double *[Frame_num[k]]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Belta[%d] !n", k);
- ASSERT(0);
- }
- for(i=0; i<Frame_num[k]; i++)
- if((Belta[k][i] = new double [Status_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Belta[%d][%d] !n", k, i);
- ASSERT(0);
- }
- if((Output[k] = new int [Frame_num[k]]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Frame_num[%d] !n", k);
- ASSERT(0);
- }
- for(i=0; i<Frame_num[k]; i++)
- Output[k][i] = DMAD_pu_Word_Sample[k].pn_VQed_Feature_Sequence[i];
- k ++;
- }
- w = 1;
- m = 0;
- while(m < MAX_CALC_NUM)
- {
- // Alpha和Belta作为事先分配好内存的变量,并未在调用DHMM_Calculate后使用,因此应该在DHMM_Calculate函数内部分配内存,
- // 而入口参数只传递大小信息(如训练句子数、帧数、状态数等等),甚至可以不必为Alpha和Belta分配如此多的内存。
- u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
- Frame_num, Status_num, REESTIMATE_ALL, mode);
- //u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
- //Frame_num, Status_num, REESTIMATE_ALL, mode, DMAD_pu_Word_Sample);
- // for (i = 0; i < Status_num; i++) for (j = 0; j < Output_num; j++) PRO_LOG("%dt%dt:%15.4E.n", i, j, B[i][j]);
- m ++;
- if((u > w) && (fabs((u-w)/w) < EPSILON_CALC)) break;
- else w = u;
- }
- // for (i = 0; i < Status_num; i++) for (j = 0; j < Output_num; j++) TRACE("%dt%dt:%15.4E.n", i, j, B[i][j]);
- PRO_LOG("tLoop = %4d.n", m);
- // 释放内存
- for(k=0; k<Train_num; k++)
- {
- for(i=0; i<Frame_num[k]; i++)
- delete Alpha[k][i];
- delete Alpha[k];
- for(i=0; i<Frame_num[k]; i++)
- delete Belta[k][i];
- delete Belta[k];
- //delete Output[k];
- }
- delete Alpha;
- delete Belta;
- //delete Output;
- delete Frame_num;
- }
- static double DHMM_Calculate(double ***Alpha, double ***Belta, double *Pi, double **A,
- double **B, int **Out, int Sample_num, int Output_num, int *T,
- int n, int mode, int End_state_mode)
- // 功能:训练某一词条的模型
- // input:
- // Pi 初始时刻各状态的概率
- // Alpha 前向概率
- // Belta 后向概率
- // A 初始A矩阵
- // B 初始B矩阵
- // Out 用于训练的各句子的观察序列(输出序列)
- // Sample_num 训练集人数
- // Output_num VQ码本大小(码字数,即观察值的个数)
- // T 用于训练的各句子的帧数
- // n 状态数
- // mode 训练模式(REESTIMATE_A, REESTIMATE_B, REESTIMATE_ALL)
- // End_state_mode 结尾帧状态模式(ENDSTATE_ABSORB = 0x200)
- // output:
- // A
- // B
- // caller(r_dhmm.cpp): u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
- // Frame_num, Status_num, REESTIMATE_ALL, mode);
- {
- int i, j, k, m;
- double w;
- double **New_A;
- double **New_B;
- double **Ct; // to prevent underflow of Alpha and Belta, Ct is the scaling coefficient
- double Residue;
- if((New_A = new double *[n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_A !n");
- ASSERT(0);
- }
- for(i=0; i<n; i++)
- {
- if((New_A[i] = new double [n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_A[%d] !n", i);
- ASSERT(0);
- }
- }
- if((New_B = new double *[n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_B !n");
- ASSERT(0);
- }
- for(i=0; i<n; i++)
- {
- if((New_B[i] = new double [Output_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_B[%d] !n", i);
- ASSERT(0);
- }
- }
- if((Ct = new double *[Sample_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Ct !n");
- ASSERT(0);
- }
- for(i=0; i<Sample_num; i++)
- if((Ct[i] = new double [T[i]]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Ct[%d] !n", i);
- ASSERT(0);
- }
- // Calculate Alpha & Belta.
- for(m=0; m<Sample_num; m++)
- {
- if (T[m] > 0)
- {
- w = 0;
- for(i=0; i<n; i++)
- {
- Alpha[m][0][i] = Pi[i] * B[i][ Out[m][0] ];
- w = w + Alpha[m][0][i];
- }
- Ct[m][0] = 1.0 / w; // scaling coefficient
- for(i=0; i<n; i++)
- Alpha[m][0][i] = Alpha[m][0][i] * Ct[m][0]; // scaling
- // 计算Alpha
- for(k=1; k<T[m]; k++) // 第m个训练utterance的各帧
- {
- Ct[m][k] = 0;
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<n; j++)
- w = w + Alpha[m][k-1][j] * A[j][i];
- w = w * B[i][ Out[m][k] ];
- Alpha[m][k][i] = w;
- Ct[m][k] = Ct[m][k] + w;
- }
- Ct[m][k] = 1.0 / Ct[m][k];
- for(i=0; i<n; i++)
- Alpha[m][k][i] = Alpha[m][k][i] * Ct[m][k];
- } // end of: for(k=1; k<T[m]; k++)
- // 最后一帧状态为吸收态,即只有最后一个状态概率不为0,其余状态概率均为0
- if(End_state_mode & ENDSTATE_ABSORB) // End_state_mode是在RECOG.CPP中根据用户输入确定的
- {
- for(i=0; i<n-1; i++) Alpha[m][ T[m]-1 ][i] = 0;
- Belta[m][ T[m]-1 ][n-1] = Ct[m][ T[m]-1 ];
- for(i=0; i<n-1; i++) Belta[m][ T[m]-1 ][i] = 0;
- }
- else
- {
- for(i=0; i<n; i++) Belta[m][ T[m]-1 ][i] = Ct[m][ T[m]-1 ];
- }
- // 计算Belta
- for(k=T[m]-2; k>=0; k--)
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<n; j++)
- w = w + A[i][j] * B[j][ Out[m][k+1] ] * Belta[m][k+1][j];
- Belta[m][k][i] = w * Ct[m][k];
- }
- }
- }
- // Re-estimate A[i][j], i = 0 to n-1, j = 0 to n-1.
- /* if((mode == REESTIMATE_A) || (mode == REESTIMATE_ALL))
- {
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- New_A[i][j] = 0;
- for(m=0; m<Sample_num; m++)
- {
- w = 0;
- for(i=0; i<n; i++)
- w += Alpha[m][T[m]-1][i];
- for(k=0; k<T[m]-1; k++)
- {
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- {
- w1 = Alpha[m][k][i] * A[i][j] * B[j][ Out[m][k+1] ] * Belta[m][k+1][j];
- w1 /= w;
- New_A[i][j] += w1;
- }
- }
- }
- for(i=0; i<n; i++)
- {
- for(j=0; j<n; j++)
- if((j < i) || (j > (i+1)))
- New_A[i][j] = 0;
- w = 0;
- for(j=0; j<n; j++) w += New_A[i][j];
- for(j=0; j<n; j++) New_A[i][j] /= w;
- for(j=0; j<n; j++)
- if((j < i) || (j > (i+1)) && (New_A[i][j] < Epsilon_A))
- New_A[i][j] = Epsilon_A;
- w = 0;
- for(j=0; j<n; j++) w += New_A[i][j];
- for(j=0; j<n; j++) New_A[i][j] /= w;
- }
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- A[i][j] = New_A[i][j];
- }*/
- // Re-estimate B[i][j], i = 0 to n-1, j = 0 to m-1.
- if((mode == REESTIMATE_B) || (mode == REESTIMATE_ALL))
- {
- // clear new B matrix
- for(i=0; i<n; i++)
- for(j=0; j<Output_num; j++)
- New_B[i][j] = 0;
- for(m=1; m<Sample_num; m++)
- {
- w = 0;
- for(i=0; i<n; i++)
- if (T[m] > 0) // add by wd @ 030331z
- w += Alpha[m][T[m]-1][i]; // w: 观察序列概率,即P(O)
- // reestimate matrix B
- for(k=0; k<T[m]; k++)
- for(i=0; i<n; i++)
- {
- New_B[i][Out[m][k]] += Alpha[m][k][i] * Belta[m][k][i] /
- Ct[m][k] / w;
- }
- }
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<Output_num; j++) w += New_B[i][j];
- for(j=0; j<Output_num; j++)
- {
- New_B[i][j] /= w; // 归一化
- }
- for(j=0; j<Output_num; j++)
- if(New_B[i][j] < Epsilon_B) // Epsilon_B=1.0e-6
- New_B[i][j] = Epsilon_B;
- w = 0;
- for(j=0; j<Output_num; j++) w += New_B[i][j];
- for(j=0; j<Output_num; j++) New_B[i][j] /= w; // 归一化
- }
- for(i=0; i<n; i++) // 更新B矩阵
- for(j=0; j<Output_num; j++)
- B[i][j] = New_B[i][j];
- }
- Residue = 0;
- for(m=0; m<Sample_num; m++)
- if(T[m] > 0) // add by wd @ 030331
- Residue += DHMM_Priority(Pi, A, B, Out[m], T[m], n, End_state_mode);
- /*if (T[m] > 0)
- {
- Residue += DHMM_Priority(Pi, A, B, Out[m], T[m], n, End_state_mode);
- }
- else
- Residue += -10; */
- for(i=0; i<n; i++) delete New_A[i];
- delete New_A;
- for(i=0; i<n; i++) delete New_B[i];
- delete New_B;
- for(i=0; i<Sample_num; i++) delete Ct[i];
- delete Ct;
- return Residue;
- }
- static double DHMM_Priority(double *Pi, double **A, double **B, int *Out, int T, int n, int mode)
- // input:
- // Pi 初始概率
- // A A矩阵
- // B B矩阵
- // Out 训练集中第m个说话人的观察序列
- // T 训练集中第m个说话人的观察序列帧数
- // n 状态数
- // mode
- // return value:
- // logP(o1,o2,...,oT|Pi,A,B)
- //caller(r_dhmm.cpp): Residue += DHMM_Priority(Pi, A, B, Out[m], T[m], n, End_state_mode);
- {
- int i, j, k;
- double w, p;
- double *Alpha1;
- double *Alpha2;
- if((Alpha1 = new double [n]) == NULL) { DEBUG_PRINTF("Not enough memory for !n"); ASSERT(0); }
- if((Alpha2 = new double [n]) == NULL) { DEBUG_PRINTF("Not enough memory for !n"); ASSERT(0); }
- p = 0;
- w = 0;
- for(i=0; i<n; i++)
- {
- Alpha1[i] = Pi[i] * B[i][ Out[0] ];
- w += Alpha1[i];
- }
- for(i=0; i<n; i++) Alpha1[i] /= w;
- p += log(w);
- for(k=1; k<T; k++)
- {
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<n; j++)
- w = w + Alpha1[j] * A[j][i];
- w = w * B[i][ Out[k] ];
- Alpha2[i] = w;
- }
- if(k < T-1)
- {
- w = 0;
- for(i=0; i<n; i++) w += Alpha2[i];
- for(i=0; i<n; i++) Alpha1[i] = Alpha2[i] / w;
- p += log(w);
- }
- }
- if(mode & ENDSTATE_ABSORB)
- {
- p += log(Alpha2[n-1]);
- }
- else
- {
- w = 0;
- for(i=0; i<n; i++) w += Alpha2[i];
- p += log(w);
- }
- delete Alpha1;
- delete Alpha2;
- return p;
- }
- static double DHMM_Calculate(double ***Alpha, double ***Belta, double *Pi, double **A,
- double **B, int Out[140][150], int Sample_num, int Output_num, int *T,
- int n, int mode, int End_state_mode, WORD_SAMPLE * DMAD_pu_Word_Sample)
- // 功能:训练某一词条的模型
- // input:
- // Pi 初始时刻各状态的概率
- // Alpha 前向概率
- // Belta 后向概率
- // A 初始A矩阵
- // B 初始B矩阵
- // Out 用于训练的各句子的观察序列(输出序列)
- // Sample_num 训练集人数
- // Output_num VQ码本大小(码字数,即观察值的个数)
- // T 用于训练的各句子的帧数
- // n 状态数
- // mode 训练模式(REESTIMATE_A, REESTIMATE_B, REESTIMATE_ALL)
- // End_state_mode 结尾帧状态模式(ENDSTATE_ABSORB = 0x200)
- // output:
- // A
- // B
- // caller(r_dhmm.cpp): u = DHMM_Calculate(Alpha, Belta, Pi, A, B, Output, Train_num, Output_num,
- // Frame_num, Status_num, REESTIMATE_ALL, mode);
- {
- int i, j, k, m;
- double w;
- double **New_A;
- double **New_B;
- double **Ct; // to prevent underflow of Alpha and Belta, Ct is the scaling coefficient
- double Residue;
- if((New_A = new double *[n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_A !n");
- ASSERT(0);
- }
- for(i=0; i<n; i++)
- {
- if((New_A[i] = new double [n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_A[%d] !n", i);
- ASSERT(0);
- }
- }
- if((New_B = new double *[n]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_B !n");
- ASSERT(0);
- }
- for(i=0; i<n; i++)
- {
- if((New_B[i] = new double [Output_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for New_B[%d] !n", i);
- ASSERT(0);
- }
- }
- if((Ct = new double *[Sample_num]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Ct !n");
- ASSERT(0);
- }
- for(i=0; i<Sample_num; i++)
- if((Ct[i] = new double [T[i]]) == NULL)
- {
- DEBUG_PRINTF("Not enough memory for Ct[%d] !n", i);
- ASSERT(0);
- }
- // Calculate Alpha & Belta.
- for(m=0; m<Sample_num; m++)
- {
- w = 0;
- for(i=0; i<n; i++)
- {
- Alpha[m][0][i] = Pi[i] * B[i][ DMAD_pu_Word_Sample[m].pn_VQed_Feature_Sequence[i] ];
- w = w + Alpha[m][0][i];
- }
- Ct[m][0] = 1.0 / w; // scaling coefficient
- for(i=0; i<n; i++)
- Alpha[m][0][i] = Alpha[m][0][i] * Ct[m][0]; // scaling
- // 计算Alpha
- for(k=1; k<T[m]; k++) // 第m个训练utterance的各帧
- {
- Ct[m][k] = 0;
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<n; j++)
- w = w + Alpha[m][k-1][j] * A[j][i];
- w = w * B[i][ Out[m][k] ];
- Alpha[m][k][i] = w;
- Ct[m][k] = Ct[m][k] + w;
- }
- Ct[m][k] = 1.0 / Ct[m][k];
- for(i=0; i<n; i++)
- Alpha[m][k][i] = Alpha[m][k][i] * Ct[m][k];
- } // end of: for(k=1; k<T[m]; k++)
- // 最后一帧状态为吸收态,即只有最后一个状态概率不为0,其余状态概率均为0
- if(End_state_mode & ENDSTATE_ABSORB) // End_state_mode是在RECOG.CPP中根据用户输入确定的
- {
- for(i=0; i<n-1; i++) Alpha[m][ T[m]-1 ][i] = 0;
- Belta[m][ T[m]-1 ][n-1] = Ct[m][ T[m]-1 ];
- for(i=0; i<n-1; i++) Belta[m][ T[m]-1 ][i] = 0;
- }
- else
- {
- for(i=0; i<n; i++) Belta[m][ T[m]-1 ][i] = Ct[m][ T[m]-1 ];
- }
- // 计算Belta
- for(k=T[m]-2; k>=0; k--)
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<n; j++)
- w = w + A[i][j] * B[j][ Out[m][k+1] ] * Belta[m][k+1][j];
- Belta[m][k][i] = w * Ct[m][k];
- }
- }
- // Re-estimate A[i][j], i = 0 to n-1, j = 0 to n-1.
- /* if((mode == REESTIMATE_A) || (mode == REESTIMATE_ALL))
- {
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- New_A[i][j] = 0;
- for(m=0; m<Sample_num; m++)
- {
- w = 0;
- for(i=0; i<n; i++)
- w += Alpha[m][T[m]-1][i];
- for(k=0; k<T[m]-1; k++)
- {
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- {
- w1 = Alpha[m][k][i] * A[i][j] * B[j][ Out[m][k+1] ] * Belta[m][k+1][j];
- w1 /= w;
- New_A[i][j] += w1;
- }
- }
- }
- for(i=0; i<n; i++)
- {
- for(j=0; j<n; j++)
- if((j < i) || (j > (i+1)))
- New_A[i][j] = 0;
- w = 0;
- for(j=0; j<n; j++) w += New_A[i][j];
- for(j=0; j<n; j++) New_A[i][j] /= w;
- for(j=0; j<n; j++)
- if((j < i) || (j > (i+1)) && (New_A[i][j] < Epsilon_A))
- New_A[i][j] = Epsilon_A;
- w = 0;
- for(j=0; j<n; j++) w += New_A[i][j];
- for(j=0; j<n; j++) New_A[i][j] /= w;
- }
- for(i=0; i<n; i++)
- for(j=0; j<n; j++)
- A[i][j] = New_A[i][j];
- }*/
- // Re-estimate B[i][j], i = 0 to n-1, j = 0 to m-1.
- if((mode == REESTIMATE_B) || (mode == REESTIMATE_ALL))
- {
- // clear new B matrix
- for(i=0; i<n; i++)
- for(j=0; j<Output_num; j++)
- New_B[i][j] = 0;
- for(m=1; m<Sample_num; m++)
- {
- w = 0;
- for(i=0; i<n; i++)
- w += Alpha[m][T[m]-1][i]; // w: 观察序列概率,即P(O)
- // reestimate matrix B
- for(k=0; k<T[m]; k++)
- for(i=0; i<n; i++)
- {
- New_B[i][Out[m][k]] += Alpha[m][k][i] * Belta[m][k][i] /
- Ct[m][k] / w;
- }
- }
- for(i=0; i<n; i++)
- {
- w = 0;
- for(j=0; j<Output_num; j++) w += New_B[i][j];
- for(j=0; j<Output_num; j++)
- {
- New_B[i][j] /= w; // 归一化
- }
- for(j=0; j<Output_num; j++)
- if(New_B[i][j] < Epsilon_B) // Epsilon_B=1.0e-6
- New_B[i][j] = Epsilon_B;
- w = 0;
- for(j=0; j<Output_num; j++) w += New_B[i][j];
- for(j=0; j<Output_num; j++) New_B[i][j] /= w; // 归一化
- }
- for(i=0; i<n; i++) // 更新B矩阵
- for(j=0; j<Output_num; j++)
- B[i][j] = New_B[i][j];
- }
- Residue = 0;
- for(m=1; m<Sample_num; m++)
- Residue += DHMM_Priority(Pi, A, B, Out[m], T[m], n, End_state_mode);
- for(i=0; i<n; i++) delete New_A[i];
- delete New_A;
- for(i=0; i<n; i++) delete New_B[i];
- delete New_B;
- for(i=0; i<Sample_num; i++) delete Ct[i];
- delete Ct;
- return Residue;
- }