语音合成与识别

开发平台：
Visual C++

FE_spectrum.cpp：源码内容
							///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#include "StdAfx.h"
#include "FE_feature.h"
int Fe::fft_spectrum_basic(short *sample, int frameSize, float *spectrum, int fftSize)
{
	_fft_spectrum_basic(sample, frameSize, spectrum, fftSize, 0, 0);
	return 1;
}
int Fe::_fft_spectrum_basic(short *sample, int frameSize, float *spectrum, int fftSize, int cep_smooth, int cepFilterLen)
{
	int i;	
	vector<float> frameA(frameSize);
	vector<float> power(fftSize);
	for(i=0;i<frameSize;i++) frameA[i]=sample[i];
	preemphasize(&frameA[0], frameSize, m_emphFac);
	m_window.Windowing(&frameA[0], frameSize, WIN_HAMMING);
	int uiLogFFTSize = (int)(log((double)fftSize)/log((double)2)+0.5);
	
	compute_spectrum(&frameA[0], &power[0], frameSize, uiLogFFTSize);
	float log10db=(float)10/log(10);
	for(i=0;i<(int)fftSize;i++)
		spectrum[i] = log10db*LogE(power[i]);
	if(cep_smooth) {
		vector<float> a(fftSize);
		vector<float> b(fftSize);
			
		for(i=0;i<fftSize;i++){
			a[i] = spectrum[i];
			b[i] = 0;
		}
		PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, fftSize, -1);
		if(cepFilterLen <= 0) cepFilterLen = 1;
		for (i=cepFilterLen ; i < fftSize-(cepFilterLen-1); i++){
			a[i] = b[i] = 0;
		}
		for(i=fftSize-1;i>fftSize/2;i--){
			a[i] = a[fftSize-i];
			b[i] = b[fftSize-i];
		}
    	PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, fftSize, 1) ;
		for (i=0 ; i < m_fftSize; i++) 
			spectrum[i] = a[i];
	}
	return 1;
}
int Fe::fft_cepstrum_basic(short *sample, int frameSize, float *fft_cep, int ceporder, int fftSize)
{
	vector<float> frameA(frameSize);
	preprocessing(sample, frameSize, &frameA[0]);
	m_window.Windowing(&frameA[0], frameSize, WIN_HAMMING);
	_fft_cepstrum_basic(&frameA[0],frameSize,fft_cep,ceporder,fftSize);
	cepstral_window(fft_cep, ceporder, m_lifter);
	/* owkwon: To make dynamic range consistent with other kinds of cepstral coefficients */
	/* Needs further study */
	for (int i=0;i<=ceporder;i++) fft_cep[i] /= 8;
	return 1;
}
int Fe::_fft_cepstrum_basic(float *sample, int frameSize, float *fft_cep, int ceporder, int fftSize)
{
	int i;
	int uiLogFFTSize = (int)(log((double)fftSize)/log((double)2)+0.5);
	vector<float> a(fftSize);
	vector<float> b(fftSize);
	for(i=0;i<frameSize;i++){ a[i] = sample[i]; b[i]=0; }
	for(i=frameSize;i<fftSize;i++) a[i] = b[i] = 0;
	PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, frameSize, 1);	
	for(i=0;i<fftSize;i++){
		a[i] = (float)(0.5*LogE(a[i]*a[i]+b[i]*b[i]));
		b[i] = 0;
	}
	PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, fftSize, -1);
	for(i=0;i<=ceporder;i++) fft_cep[i] = a[i];
	return 1;
}
int Fe::filterbank_basic(short *sample, int frameSize, float *filter_bank, int fborder, int fftSize)
{
	vector<float> frameA(frameSize);
	preprocessing(sample, frameSize, &frameA[0]);
	m_window.Windowing(&frameA[0], frameSize, WIN_HAMMING);
	_filterbank_basic(&frameA[0], frameSize, filter_bank, fborder, fftSize, m_cepSmooth, (int)(m_sampleRate/MAX_PITCH_FREQ));
	/* Convert to dB scale */
	float log10db=(float)(10/log(10));
	for(int i=0;i<fborder;i++) filter_bank[i]*=log10db;
	return 1;
}
int	Fe::_filterbank_basic(float *sample, int frameSize, float *filter_bank, int fborder, int fftSize, int cep_smooth, int cepFilterLen)
{
	if(m_MelFB.size() != fborder || fftSize != m_MelFBfftSize){
		m_MelFBfftSize=fftSize;
		MfccInitMelFilterBanks ((float)64.0, (float)m_sampleRate, fftSize, fborder);
	}
	int i;
	int uiLogFFTSize = (int)(log((double)fftSize)/log((double)2)+0.5);
	vector<float> a(fftSize);
	vector<float> b(fftSize);
	for(i=0;i<frameSize;i++){ a[i] = sample[i]; b[i]=0; }
	for(i=frameSize;i<fftSize;i++) a[i] = b[i] = 0;
	PRFFT_NEW( &a[0], &b[0], uiLogFFTSize, frameSize, 1);	
	for(i=0;i<fftSize;i++){
		a[i] = a[i]*a[i] + b[i]*b[i];
		b[i] = 0.0;
	}
	MfccMelFilterBank (&a[0], fborder, &filter_bank[0], 1);	
	for (i = 0; i < fborder; i++) filter_bank[i] = 0.5*LogE(filter_bank[i]);
		
	return(1);
}
int Fe::compute_spectrum(float *input, float *spectrum, int winlength, int log2length) 
{
  int i, log2length_i;
  int npoints, npoints2;
  npoints  = (int) (pow(2.0,(float) log2length) + 0.5);
  npoints2 = npoints / 2;
  vector<FeComplex> complex_in(npoints);
  for(i=0;i<winlength;i++){
    complex_in[i].m_re = input[i];
    complex_in[i].m_im = (float)0.0;
  }
  for(i = winlength; i < npoints; i++){
    complex_in[i].m_re = (float)0.0;
    complex_in[i].m_im = (float)0.0;
  }
  log2length_i = (int)log2length;
  FAST_new(&complex_in[0],log2length_i);
  
  spectrum[0] = complex_in[0].m_re * complex_in[0].m_re;
  spectrum[npoints2] = complex_in[npoints2].m_re * complex_in[npoints2].m_re;
  /*
    Only the first half of the spectrum[] array is filled with data.
    The second half is just a reflection of the first half.
  */
  for(i=1;i<npoints2;i++){
    spectrum[i] = complex_in[i].m_re * complex_in[i].m_re + complex_in[i].m_im * complex_in[i].m_im;
  }
  for(i=npoints-1;i>npoints2;i--){
	  spectrum[i] = spectrum[npoints-i];
  }
  return(0);
}
int Fe::smooth_spectrum(float *spectrum, int pointsN)
{
	vector<float> tmp(pointsN);
	tmp[0]=spectrum[0];
	tmp[1]=(2*spectrum[1]+spectrum[0]+spectrum[2])/(float)4;
	for(int i=2;i<pointsN-2;i++){
		tmp[i]=(float)(4*spectrum[i]+2*spectrum[i-1]+2*spectrum[i+1]+spectrum[i-2]+spectrum[i+2])/(float)10;
	}
	tmp[pointsN-2]=(2*spectrum[pointsN-2]+spectrum[pointsN-3]+spectrum[pointsN-1])/(float)4;
	tmp[pointsN-1]=spectrum[pointsN-1];
	memmove(spectrum, &tmp[0], sizeof(float)*pointsN);
	return(1);
}
/*========================================================================
** Discrete Fourier analysis routine
** from IEEE Programs for Digital Signal Processing
** G. D. Bergland and M. T. Dolan, original authors
** Translated from the FORTRAN with some changes by Paul Kube
** Modified to return the power spectrum by Chuck Wooters
========================================================================*/
/**************************************************************************
FAST_new - In-place radix 2 decimation in time FFT
Requires pointer to complex array, x and power of 2 size of FFT, m
(size of FFT = 2**m).  Places FFT output on top of input FeComplex array.
void FAST_new(FeComplex *x, int m)
*************************************************************************/
void Fe::FAST_new(FeComplex *x, int m)
{
    FeComplex u,temp,tm;
    FeComplex *xi,*xip,*xj,*wptr;
    int i,j,k,l,le,windex;
    double arg,w_real,w_imag,wrecur_real,wrecur_imag,wtemp_real;
	
	if(m == 0) return;       /* if m=0 then done */
	int n = 1 << m; /* length of fft */
    if(n != m_fftW.size()) {		
		m_fftW.resize(n); 
        le = n/2;
		
		for(int nn=0;nn < le-1; nn++)
			m_fftW[nn].m_re = m_fftW[nn].m_im = (float)0.0;
		
        arg = 4.0*atan(1.0)/le;
        wrecur_real = w_real = cos(arg);
        wrecur_imag = w_imag = -sin(arg);
        xj = &m_fftW[0];
        for (j = 1 ; j < le ; j++) {
            xj->m_re = (float)wrecur_real;
            xj->m_im = (float)wrecur_imag;
            xj++;
            wtemp_real = wrecur_real*w_real - wrecur_imag*w_imag;
            wrecur_imag = wrecur_real*w_imag + wrecur_imag*w_real;
            wrecur_real = wtemp_real;
        }
    }
		
    le = n;
    windex = 1;
    for (l = 0 ; l < m ; l++) {
        le = le/2;
		
        for(i = 0 ; i < n ; i = i + 2*le) {
            xi = x + i;
            xip = xi + le;
            temp.m_re = xi->m_re + xip->m_re;
            temp.m_im = xi->m_im + xip->m_im;
            xip->m_re = xi->m_re - xip->m_re;
            xip->m_im = xi->m_im - xip->m_im;
            *xi = temp;
        }
		
        wptr = &m_fftW[0] + windex - 1;
        for (j = 1 ; j < le ; j++) {
            u = *wptr;
            for (i = j ; i < n ; i = i + 2*le) {
                xi = x + i;
                xip = xi + le;
                temp.m_re = xi->m_re + xip->m_re;
                temp.m_im = xi->m_im + xip->m_im;
                tm.m_re = xi->m_re - xip->m_re;
                tm.m_im = xi->m_im - xip->m_im;
                xip->m_re = tm.m_re*u.m_re - tm.m_im*u.m_im;
                xip->m_im = tm.m_re*u.m_im + tm.m_im*u.m_re;
                *xi = temp;
            }
            wptr = wptr + windex;
        }
        windex = 2*windex;
    }
	
    j = 0;
    for (i = 1 ; i < (n-1) ; i++) {
        k = n/2;
        while(k <= j) {
            j = j - k;
            k = k/2;
        }
        j = j + k;
        if (i < j) {
            xi = x + i;
            xj = x + j;
            temp = *xj;
			
            *xj = *xi;
            *xi = temp;
        }
    }
}
/* pruned FFT */
void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff)
{
	int	l,lmx,lix,lm,li,j1,j2,ii,jj,nv2,nm1,k,lmxy,n;
	float	scl,s,c,arg,T_a,T_b;
	
	n = 1 << m;
	for( l=1 ; l<=m ; l++ ) {
		lmx = 1 << (m - l) ;
		lix = 2 * lmx ;
		scl = 2 * (float)M_PI/(float)lix ;
		
		if(lmx < n_pts) lmxy = lmx ;
		else lmxy = n_pts ;
		for( lm = 1 ; lm <= lmxy ; lm++ ) {
			arg=((float)(lm-1)*scl) ;
			c = (float)cos(arg) ;
			s = iff * (float)sin(arg) ;
			
			for( li = lix ; li <= n ; li += lix ) {
				j1 = li - lix + lm ;
				j2 = j1 + lmx ;
				if(lmxy != n_pts ) {
					T_a=a[j1-1] - a[j2-1] ;
					/* T_a : real part */
					T_b=b[j1-1] - b[j2-1] ;
					/* T_b : imaginary part */
					a[j1-1] = a[j1-1] + a[j2-1] ;
					b[j1-1] = b[j1-1] + b[j2-1] ;
					a[j2-1] = T_a*c + T_b*s ;
					b[j2-1] = T_b*c - T_a*s ;
				} else{
					a[j2-1] = a[j1-1]*c + b[j1-1]*s ;
					b[j2-1] = b[j1-1]*c - a[j1-1]*s ;
				}
			}
		}
	}
	nv2 = n/2 ;
	nm1 = n - 1 ;
	jj = 1 ;
	
	for( ii = 1 ; ii <= nm1 ;) {
		if( ii < jj ) {
			T_a = a[jj-1] ;
			T_b = b[jj-1] ;
			a[jj-1] = a[ii-1] ;
			b[jj-1] = b[ii-1] ;
			a[ii-1] = T_a ;
			b[ii-1] = T_b ;
		}
		k = nv2 ;
		while( k < jj ) {
			jj = jj - k ;
			k = k/2 ;
		}
		jj = jj + k ;
		ii = ii + 1 ;
	}
	if(iff == (-1)){
		for( l=0 ; l<n ; l++ ) {
			a[l]/=(float)n;
			b[l]/=(float)n;
		}
	}
}