语音合成与识别

开发平台：
C/C++

mfcc.c：源码内容
							#include "mfcc.h"
#include <math.h>
#include <stdio.h>
int temp=1;
int m_cepOrder = 12;
int m_fbOrder = 23;
float m_dctMatrix[(12+1)*23];
int m_sampleRate=32000;
WfMelFB m_MelFB[23];
float  m_MelWeight[512/2+1];
float m_logEnergyFloor;
float m_energyFloor;
//预加重
int preemphasize(float *sample, int sampleN)
{
	/* Setting emphFac=0 turns off preemphasis. */
	int	i;
	float emphFac = (float)0.97;
	for (i = sampleN-1; i > 0; i--) {
		sample[i] = sample[i] - emphFac * sample[i-1];
	}
	sample[0] = (float)(1.0 - emphFac) * sample[0];
	return(1);
}
int preprocessing(short *sample, int sampleN, float *out)
{
	int i;
	for(i=0;i<sampleN;i++)
		out[i]=sample[i];
	//if (m_dither) Dither(out, sampleN);
	preemphasize(out, sampleN);
	return 1;
}
//加窗
int Do_hamm_window(float *inputA, int inputN)
{
	int i;
	float* hamm_window ;
	float temp = (float)(2 * M_PI / (float)(inputN-1));
	hamm_window = (float*)malloc(inputN*4);
    for (i=0 ; i<inputN ; i++ ) hamm_window[i] = (float)(0.54 - 0.46*cos(temp*i));
	for (i=0 ; i<inputN ;i++ ) inputA[i] = hamm_window[i]*inputA[i];
	free(hamm_window);
	return(1);
}
int mel_cepstrum_basic(short *sample, int frameSize, float *mel_cep, int ceporder, int fft_size)
{
	float* frameA = (float*)malloc(frameSize*4);
	preprocessing(sample, frameSize, &frameA[0]);
	Do_hamm_window(&frameA[0], frameSize);
	_mel_cepstrum_basic(&frameA[0], frameSize, mel_cep, m_fbOrder, ceporder, fft_size);
	free(frameA);
	return 1;
}
int MfccInitDCTMatrix (float *dctMatrix, int ceporder, int numChannels)
{
    int i, j;
    for (i = 0; i <= ceporder; i++){//12+1
        for (j = 0; j < numChannels; j++){//23
            dctMatrix[i * numChannels + j] = (float) cos (M_PI * (float) i / (float) numChannels * ((float) j + 0.5));
			if(i==0) dctMatrix[i * numChannels + j]*=(float)sqrt(1/(float)numChannels);
			else     dctMatrix[i * numChannels + j]*=(float)sqrt(2/(float)numChannels);
		}
	}
	return 1;
}
int _mel_cepstrum_basic(float *sample, int frameSize, float *mel_cep, int fborder, int ceporder, int fft_size)
{
		int i;
		float* filter_bank = (float*)malloc(m_fbOrder*4);
		float f;
		MfccInitDCTMatrix (&m_dctMatrix[0], ceporder, fborder);
		_filterbank_basic(sample, frameSize, &filter_bank[0], fborder, fft_size, 0, 0);
		MfccDCT (filter_bank, &m_dctMatrix[0], ceporder, fborder, mel_cep);
		free(filter_bank);
//		/* scale down to be consistent with other kinds of cepstrum coefficients */
		f=fborder/(float)fft_size;
		for(i=0;i<=ceporder;i++)
		mel_cep[i]*=f;
		printf("gogogo%dn",temp);
        temp++;
    for(i=0;i<13;i++)
	{
	   printf("%ft",mel_cep[i]);
	}
	    return 1;
}
/* Supporting routine for MFCC */
#define MfccRound(x) ((int)((x)+0.5))
void MfccInitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels)
{
    int i, k;
    float* freq=(float*)malloc(numChannels*4+8);
	int * bin=(int *)malloc(numChannels*4+8);
	float start_mel, fs_per_2_mel;
	//m_MelWeight = (float*)malloc(fftLength*2+4);
    /* Constants for calculation */
	freq[0] = startingFrequency;
    start_mel = (float)(2595.0 * log10 (1.0 + startingFrequency / 700.0));
	bin[0] = MfccRound(fftLength * freq[0] / samplingRate);
	freq[numChannels+1] = (float)(samplingRate / 2.0);
    fs_per_2_mel = (float)(2595.0 * log10 (1.0 + (samplingRate / 2.0) / 700.0));
	bin[numChannels+1] = MfccRound(fftLength * freq[numChannels+1] / samplingRate);
	/* Calculating mel-scaled frequency and the corresponding FFT-bin */
    /* number for the lower edge of the band                          */
	for (k = 1; k <= numChannels; k++) {
        freq[k] = (float)(700 * (pow (10, (start_mel + (float) k / (numChannels + 1) * (fs_per_2_mel - start_mel)) / 2595.0) - 1.0));
		bin[k] = MfccRound(fftLength * freq[k] / samplingRate);
	}
	/* This part is never used to compute MFCC coefficients */
	/* but initialized for completeness                     */
	for(i = 0; i<bin[0]; i++){
		m_MelWeight[i]=0;
	}
	m_MelWeight[fftLength/2]=1;
	/* Initialize low, center, high indices to FFT-bin */
		for (k = 0; k <= numChannels; k++) {
		if(k<numChannels){
			m_MelFB[k].m_lowX=bin[k];
			m_MelFB[k].m_centerX=bin[k+1];
			m_MelFB[k].m_highX=bin[k+2];
		}
		for(i = bin[k]; i<bin[k+1]; i++){
			m_MelWeight[i]=(i-bin[k]+1)/(float)(bin[k+1]-bin[k]+1);
		}
    }
	free(freq);
	free(bin);
    return;
}
float LogE(float x)
{
	if(x>m_energyFloor) return log(x);
	else return m_logEnergyFloor;
}
void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff)
{
	int	l,lmx,lix,lm,li,j1,j2,ii,jj,nv2,nm1,k,lmxy,n;
	float	scl,s,c,arg,T_a,T_b;
	n = 1 << m;
	for( l=1 ; l<=m ; l++ ) {
		lmx = 1 << (m - l) ;
		lix = 2 * lmx ;
		scl = 2 * (float)M_PI/(float)lix ;
		if(lmx < n_pts) lmxy = lmx ;
		else lmxy = n_pts ;
		for( lm = 1 ; lm <= lmxy ; lm++ ) {
			arg=((float)(lm-1)*scl) ;
			c = (float)cos(arg) ;
			s = iff * (float)sin(arg) ;
			for( li = lix ; li <= n ; li += lix ) {
				j1 = li - lix + lm ;
				j2 = j1 + lmx ;
				if(lmxy != n_pts ) {
					T_a=a[j1-1] - a[j2-1] ;
					/* T_a : real part */
					T_b=b[j1-1] - b[j2-1] ;
					/* T_b : imaginary part */
					a[j1-1] = a[j1-1] + a[j2-1] ;
					b[j1-1] = b[j1-1] + b[j2-1] ;
					a[j2-1] = T_a*c + T_b*s ;
					b[j2-1] = T_b*c - T_a*s ;
				} else{
					a[j2-1] = a[j1-1]*c + b[j1-1]*s ;
					b[j2-1] = b[j1-1]*c - a[j1-1]*s ;
				}
			}
		}
	}
	nv2 = n/2 ;
	nm1 = n - 1 ;
	jj = 1 ;
	for( ii = 1 ; ii <= nm1 ;) {
		if( ii < jj ) {
			T_a = a[jj-1] ;
			T_b = b[jj-1] ;
			a[jj-1] = a[ii-1] ;
			b[jj-1] = b[ii-1] ;
			a[ii-1] = T_a ;
			b[ii-1] = T_b ;
		}
		k = nv2 ;
		while( k < jj ) {
			jj = jj - k ;
			k = k/2 ;
		}
		jj = jj + k ;
		ii = ii + 1 ;
	}
	if(iff == (-1)){
		for( l=0 ; l<n ; l++ ) {
			a[l]/=(float)n;
			b[l]/=(float)n;
		}
	}
}
void MfccMelFilterBank (float *sigFFT, int numChannels, float* output, int normalize)
{
    float sum, wsum;
    int i, k;
	MfccMelFB *melFB;
    for (k=0;k<numChannels;k++){
		melFB=&m_MelFB[k];
        sum = sigFFT[melFB->m_centerX];
		wsum=1;
        for (i = melFB->m_lowX; i < melFB->m_centerX; i++){
            sum += m_MelWeight[i] * sigFFT[i];
			wsum += m_MelWeight[i];
		}
		for (i = melFB->m_centerX+1; i <= melFB->m_highX; i++){
            sum += (1 - m_MelWeight[i-1]) * sigFFT[i];
			wsum += (1 - m_MelWeight[i-1]);
		}
        output[k] = sum;
		if(normalize) {
//			assert(wsum>0);
			output[k] /= wsum;
		}
    }
    return;
}
int	_filterbank_basic(float *sample, int frameSize, float *filter_bank, int fborder, int fftSize, int cep_smooth, int cepFilterLen)
{
	int i;
	float* a=(float*)malloc(fftSize*4);
	float* b=(float*)malloc(fftSize*4);
	int uiLogFFTSize = (int)(log((double)fftSize)/log((double)2)+0.5);
	MfccInitMelFilterBanks ((float)64.0, (float)m_sampleRate, fftSize, fborder);
	for(i=0;i<frameSize;i++){ a[i] = sample[i]; b[i]=0; }
	for(i=frameSize;i<fftSize;i++) a[i] = b[i] = 0;
	PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, frameSize, 1);
	for(i=0;i<fftSize;i++){
		a[i] = a[i]*a[i] + b[i]*b[i];
		b[i] = 0.0;
	}
	MfccMelFilterBank (&a[0], fborder, &filter_bank[0], 1);
	for (i = 0; i < fborder; i++)
		filter_bank[i] = 0.5*LogE(filter_bank[i]);
	free(a);
	free(b);
	return(1);
}
void MfccDCT (float *x, float *dctMatrix, int ceporder, int numChannels, float *mel_cep)
{
    int i, j;
    for (i = 0; i <= ceporder; i++) {
        mel_cep[i] = 0.0;
        for (j = 0; j < numChannels; j++){
            mel_cep[i] += x[j] * dctMatrix[i * numChannels + j];
		}
    }
    return;
}
int main(int argc, char* argv[])
{
	float mel_cep[12+1];
	short sample[512];
    FILE*  fp;
	int templen;
	int i;
	//sample rate=32k 每个样点16BIT frame_size=512  fft_size=512  相邻两帧间重叠128个样点
	//滤波器个数＝23  MFCC个数＝12＋1
	m_logEnergyFloor=FE_MIN_LOG_ENERGY;
	m_energyFloor=(float)exp(m_logEnergyFloor);
	if ((fp=fopen(argv[1],"rb"))==NULL)
	{
		printf("Open Errorn");
	}
	while ((templen=fread(sample,2,512,fp))==512)
	{
		mel_cepstrum_basic(sample, 512, mel_cep, 12,512);
		fseek(fp,-512,SEEK_CUR);
	}
	fclose(fp);
	printf("lastn");
    for(i=0;i<13;i++)
	{
	   printf("%ft",mel_cep[i]);
	}
    return 0;
}