语音合成与识别

开发平台：
Visual C++

FE_lpcc.cpp：源码内容
							///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
/****************************************************************
LPC cepstral analysis
Original: Communications Research Lab.(CRL),
		  Dept. of Electrical Eng., KAIST.
		  (contributors : Y. K. Park, O. W. Kwon)
Modified by I. J. Choi, Feb. 5, 1996
Modified by O. W. Kwon, Feb. 22, 1998
*************************************************************************/
#include "StdAfx.h"
#include "FE_feature.h"
/* 
Computes LPC and cepstrum coefficients by using the auto-correlation or covariance method 
*/
/*
lpc_cep[0] = log(G)
lpc_cep[i] = c[i], i=1,...,N
*/
int	Fe::lpc_cepstrum_basic(short *sample, int frameSize, float *lpc_cep, int ceporder, int norder)
{
	float G;
	vector<float> acf(norder+1);
	lpc_basic(sample, frameSize, &acf[0], norder, &G);
	lpc_to_cepstrum(&acf[0], norder, lpc_cep, ceporder, G);
	cepstral_window(&lpc_cep[0], ceporder, m_lifter);
	/* owkwon: To make dynamic range consistent with other kinds of cepstral coefficients */
	/* Needs further investigation */
	for (int i=0;i<=ceporder;i++) lpc_cep[i] /= 8;
	return(1);
}
/* 
Auto-correlation method.
*/

/*------------------------------------------------------------------------*
* Routine generates the autocorrelation sequence from the input windowed * 
* data sequence. It uses 'n' products to compute ac[0], 'nsamp'-1    * 
* products to compute ac[1],..., and 'nsamp'-'order' products to compute * 
* ac[order]. Note that the length of the array 'ac' should be order+1.   *
*------------------------------------------------------------------------*/
int Fe::auto_correlation(float *wsamp, float *ac, int n, int norder)
{
	int 	i,j;	
	for (i=0; i<=norder; i++){
		ac[i] = 0.0;
		for (j=i; j<n; j++) ac[i] += wsamp[j]*wsamp[j-i];
		ac[i] /= n; /* normalize */
	}
	return(1);
}

/************************************************************************
*   	modified Levinson algorithm       				*
************************************************************************/
int Fe::levins (float *r, float *kcf, float *acf, int norder)
{
	int 	j,n;
	vector<float> p(norder+1);
	vector<float> q(norder+1);
	vector<float> acf_tmp(norder+1);
	vector<float> a(norder+1);
	
	/* normalize r's to prevent round off error */
	if(!normalize_corr(r,norder)) return 1;
	
	kcf[1] = acf_tmp[1] = acf[1] = a[1] = r[1];
	for(n=2; n<=norder; n++){
		for(j=1; j<n; j++){
			p[j] = r[n-j] * a[j];
			q[j] = r[j] * a[j];
		}
		a[n]=( r[n]-sum(1,n-1,&p[0]) ) / ( r[0]-sum(1,n-1,&q[0]) );  
		kcf[n] = a[n];
		if (fabs(a[n]) >= 1. ) a[n] = 0.;
		for(j=1; j<n; j++) a[j] = acf_tmp[j] - a[n]*acf_tmp[n-j];
		for(j=1; j<=n; j++){
			acf_tmp[j] = a[j];
			if ( n == norder ) acf[j] = a[j];
		}
	}
	
	return(1);
}

/*------------------------------------------------------------------------*
* durbin() - solution of the auto-correlation normal equations           *
*                                    					  *
* Routine obtains the (forward) predictor coefficients from the auto-    *
* correlation or the covariance sequence. As a by-product, it also       *
* obtains the reflection coefficients.  				  *
* 									  *
*------------------------------------------------------------------------*/
int Fe::durbin(float *r, float *kcf, float *acf, int norder)
{
	int 	i,j;
	float 	sum,error;
	vector<float> b(norder+1);
	/* initialize predictor coeffs. */
	acf[0] = 1.0;
	for(i=1; i<=norder; i++) acf[i]=0.0;
	
	/* normalize autocorrs */
	if(!normalize_corr(r,norder)) return 1;
	
	/* durbin's recursion */
	for(error = r[0], i = 1; i <= norder; i++) {
		for(sum = r[i], j=1; j < i; j++)
			sum -= acf[j] * r[i - j];
		acf[i] = kcf[i] = sum / error;
		for(j = 1; j < i; j++)
			b[j] = acf[i - j];
		for(j = 1; j < i; j++)
			acf[j] -= kcf[i] * b[j];
		error = (1 - kcf[i] * kcf[i]) * error;
	} 
	return 1;
}

int Fe::stable_k(float *kcf, int norder) 
{
	int 	i,j;	
	for(i=1; i<=norder; i++){
		if (fabs(kcf[i]) >= 1)
			for(j=1;j<=norder;j++)
				kcf[j]=kcf[j]*power((float)0.99,j);
	}
	return(1);
}

/*------------------------------------------------------------------------*
* 	normalize r's to prevent round off error  
*------------------------------------------------------------------------*/
int Fe::normalize_corr(float *r, int norder)
{
	int	i;
	if(r[0] == (float)0) return 0;
	for(i=1; i<=norder; i++) r[i] /= r[0];
	r[0] = 1;
	return 1;
}
/* 
Covariance method
*/
/*
Calculates an autocorrelation matrix by the covariance matrix.
*/
int Fe::covariance(float *sample, int frameSize, FeMatrix<float>& cov, int norder)
{
    int i,j,n;
	for (i=0; i<=norder; i++) {
		for (j=0; j<=norder; j++) {
			float x,y;
			cov[i][j] = 0.0;
			for (n=0; n<frameSize; n++){
				x = (n-i >= 0 && n-i < frameSize) ? sample[n-i] : 0;
				y = (n-j >= 0 && n-j < frameSize) ? sample[n-j] : 0;
				cov[i][j] += x*y;
			}
		}
	}
	return(1);
}
/* 
Ref: W. H. Press, S. A. Teukolsky, W. T. Vetterling, and B. P. Flannery, 
Numerical Recipes in C, 2ed. pp. 96-98, 1992.
*/
/*
Given a positive-difinite symmetric matrix a[0..n-1][0..n-1], this routine constructs its
Cholesky decomposition, A = L.L'. On input, only the upper triangle of a need be given;
it is not modified. The Cholesky factor L is returned in the lower triangle of a,
except for its diagonal elements which are returned in p[0..n-1].
*/
int Fe::choldc(FeMatrix<float>& a, int n, float *p)
{
	int i,j,k;
	for(i=0;i<n;i++){
		for(j=i;j<n;j++){
			float sum=a[i][j];
			for(k=i-1;k>=0;k--) sum -= a[i][k]*a[j][k];
			if(i==j){
				if(sum <= 0.0) return FALSE; // a, with rounding errors, is not positive definite.	
				p[i] = sqrt(sum);
			}
			else a[j][i] = sum/p[i];
		}
	}
	return TRUE;
}
/* 
Solves the n linear equations A.x = b, where a is a positive-definite symmetric matrix.
a[0..n-1][0..n-1] and p[0..n-1] are input as the output of the routine choldc. Only the lower triangle of a is accessed.
b[0..n-1] is input as the right-hand side vector. The solution vector is returned
in x[0..n-1]. a, n, and p are not modified and can be left in place for successive calls
with different right-hand sides b. b is not modified unless you identify b and x in the calling sequence,
which is allowed. 
*/
int Fe::cholsl(FeMatrix<float>& a, int n, float *p, float *b, float *x)
{
	int i,k;
	float sum;
	// solve L.y = b, storing y in x.
	for(i=0;i<n;i++){
		for(sum=b[i],k=i-1;k>=0;k--) sum -= a[i][k]*x[k];
		x[i] = sum/p[i];
	}
	// solve L'.x = y.
	for(i=n-1;i>=0;i--){
		for(sum=x[i],k=i+1;k<n;k++) sum -= a[k][i]*x[k];
		x[i] = sum/p[i];
	}
	return TRUE;
}
/* Solves A.x = b. */
int Fe::CholeskySol(FeMatrix<float>& a, float *b, float *x, int n)
{
	vector<float> p(n);
	int success = choldc(a, n, &p[0]);
	if(success) cholsl(a, n, &p[0], b, x);
	return success;
}
int Fe::lpc_cov(float *sample, int frameSize, FeMatrix<float>& cov, float *acf, int norder, float *G)
{
	int i;
	covariance(sample, frameSize, cov, norder);
	FeMatrix<float> phi(norder, norder);
	vector<float> psi(norder);
	for(i=1;i<=norder;i++){
		psi[i-1] = cov[i][0];
		for(int j=1;j<=norder;j++){
			phi[i-1][j-1] = cov[i][j];
		}
	}
	vector<float> rorg(norder+1);
	for(i=0;i<=norder;i++) rorg[i] = cov[i][0];
	// Solves phi.a = psi.
	int success = CholeskySol(phi, &psi[0], acf+1, norder);
	if(!success){
		for(i=1;i<=norder;i++) acf[i] = 0;
	}
	/* G denotes LPC gain G */
	acf[0] = 1;
	if(G) (*G) = calc_lpc_gain_basic(&rorg[0], acf, norder);
	return success;
}
int Fe::lpc_cov_error(float *sample, int frameSize, float *acf, int norder, float* residual)
{
	int i,n;
	for(n=0;n<frameSize;n++){
		float si;
		residual[n] = (n>=0 && n<frameSize) ? sample[n] : 0;	
		for(i=1;i<=norder;i++){
			si = (n-i>=0 && n-i<frameSize) ? sample[n-i] : 0;
			residual[n] -= acf[i]*si;
		}
	}
	return(1);
}

/************************************************************************
  This subroutine computes the cepstral coefficients
    cep[1]-cep[ncf] cepstrum coefficients from the predictor coefficients
    acf[1]-acf[norder] predictor coefficients
    norder is the order of the LPC model.
  Equations:
    c(0) = log (G)
    c(n) = a(n) + 1/n * sum_{k=1}^{n-1} k c(k) a(n-k)    for n <= norder
    c(n) = 1/n * sum_{k=n-p}^{n-1} k/n c(k) a(n-k)         for n > norder
  Note that the definition of a[] is 
  y(n) = a(1)y(n-1) + a(2)y(n-2) + ...
  [1] Papamichalis, P.E. Practical Approaches to Speech Coding. Prentice Hall, Englewood Cliffs, NJ, 1987
  [2] Rabiner, L.R. and Juang B.H. Fundamentals of Speech Recognition. Prentice Hall, Englewood Cliffs, NJ, 1993 
*************************************************************************/
int Fe::lpc_to_cepstrum(float *acf, int norder, float *cep, int ncf, float G)
{
	int n,k;
	cep[0] = LogE(G);
	for (n = 1; n <= ncf; n++){
		float sum=0;
		if(n <= norder){
			for (k=1; k <= n-1; k++) sum += k*cep[k]*acf[n-k];
			cep[n] = acf[n] + 1/(float)n*sum;
		}
		else{
			for (k=n-norder; k <= n-1; k++) sum += k*cep[k]*acf[n-k];
			cep[n] = 1/(float)n*sum;
		}
	}	
	return(1);
}
/********************************************************************
 BILINEAR TRANSFORM
 REF) A. V. Oppenheim, D. H. Johnson, "Discrete
 Representation of Signals", Proc. of IEEE,
 Vol. 60, No. 6, June, 1972, pp. 681-691.
 
  Comment) This bilinear transform maintains the form of the  spectrum
  for a discrete-time signal but transforms the frequency axis
  in a nonlinear manner.
  
   Original sequence ( F[n] ) ===> Transformed sequence ( G[k,n] )
   Designed digital filter
   i.e
   G[k] = { sum from n=0 to infinity } F[n] * H[k,n]
   
	Time and Frequency update equations:
	
    	G[0,n] = a * G[0][n-1] + F[-n]
		
		 G[1,n] = a * G[1][n-1] + G[0][n-1]
		 
		  G[k][n] = a * ( G[k][n-1] - G[k-1][n] ) + G[k-1][n-1] 
		  for k = 2, 3, ...
*********************************************************************/
int Fe::bilinear_transform(float *org_seq, int num_org_seq, float *trans_seq, int no_update, float warp_coeff ) 
{
	int 	n, k;
	vector<vector<float> > G; /* G is no_update x num_org_seq matrix. */
	assert(no_update>2);
	G.resize(no_update);
	for(k=0;k<no_update;k++) G[k].resize(num_org_seq);
	
	/*  Compute G[0][n] & G[1][n] */
	G[0][num_org_seq-1] = org_seq[num_org_seq-1];
	for( n = num_org_seq-2 ; n >= 0 ; n-- ) {
		G[0][n] = warp_coeff * G[0][n+1] + org_seq[n];
	}
	G[1][num_org_seq - 1] = 0.0;
	for( n = num_org_seq-2 ; n >= 0 ; n-- )
		G[1][n] = warp_coeff * G[1][n+1] +
		((float)1.0 - warp_coeff * warp_coeff ) * G[0][n+1];
	
	/* Compute G[k][n] */
	for( k = 2 ; k < no_update ; k++ ) {
		/* Initialize G[k][num_org_seq-1] */
		G[k][num_org_seq-1] = (float)(-1.0)*warp_coeff*G[k-1][num_org_seq-1];
		for( n = num_org_seq-2 ; n >= 0 ; n-- )
			G[k][n]=warp_coeff*( G[k][n+1]-G[k-1][n])+G[k-1][n+1];
	} /* end of k-loop */
	
	/* Return Transformed Sequence */
	for( k = 0 ; k < no_update ; k++ ) trans_seq[k] = G[k][0];
	
	return(1);
}