语音合成与识别

开发平台：
Visual C++

FE_enhance.h：源码内容
							///////////////////////////////////////////////////////////////////////////////
// This is a part of the Feature program.
// Version: 1.0
// Date: February 22, 2003
// Programmer: Oh-Wook Kwon
// Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
///////////////////////////////////////////////////////////////////////////////
#ifndef _FE_ENHANCE_H_
#define _FE_ENHANCE_H_
#include "FE_common.h"
/*-----------------------------------*/
/* Definition for endpoint detectors */
/*-----------------------------------*/
#define NR_MAX_RECORD_TIME      10           /* 10 seconds */
#define NR_WAV_BUF_SIZE         (NR_MAX_RECORD_TIME*16000)   /* 10 seconds at 16 kHz mono sampling frequency */
#define NR_MAX_WIN_SIZE         512          /* maximum window size */
/*----------------------------------*/
/* Definition for noise reduction   */
/*----------------------------------*/
#define NR_MAX_FRAME_SHIFT          320
#define NR_MAX_SPEC_LENGTH          257   /* FFT_LENGTH_1/2+1 */
#define NR_OUT_BUF_SIZE (5*NR_MAX_WIN_SIZE)
#ifdef _DEBUG
#define NR_BUF_SIZE   NR_WAV_BUF_SIZE
#else
#define NR_BUF_SIZE   (2*16000)  /* save only 2 second speech to save memory */
#endif
/*----------------------------------*/
/* Definition for Wiener filter     */
/*----------------------------------*/
#define NR_NUM_CHANNELS             23
#define NR_FL                       17
typedef struct {
    int m_lowX;
    int m_centerX;
    int m_highX;
	float m_sumWeight;
} WfMelFB; /* mel filter bank for noise reduction */
class Wiener {
public:
	/* basic part */
	int m_isWiener;
	int m_sampleRate;
	int m_winSize;
	int m_shiftSize;
	int m_fftSize;
	/* derived part */
	int m_specLength;
	float m_scaleFactor;
	/* for audio/file interface */
	short m_inputSpeech[NR_BUF_SIZE]; /* ring buffer for input speech */
	long m_inputEndX; /* end sample point to input speech */
	/* spectrum estimation */
	float m_HanningWin[NR_MAX_WIN_SIZE];
	float m_buf_in[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
	float m_buf_out[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
	float m_spec[NR_MAX_SPEC_LENGTH];
	float m_spec_re[NR_MAX_WIN_SIZE];
	float m_spec_im[NR_MAX_WIN_SIZE];
	/* Power spectral density mean */
	float m_sqrtInPSD[NR_MAX_SPEC_LENGTH];
	float m_lastSpectrum[NR_MAX_SPEC_LENGTH];
	float m_lastSpectrum2[NR_MAX_SPEC_LENGTH];
	float m_sqrtNoisePSD[NR_MAX_SPEC_LENGTH];
	long m_nbFrameX;
	/* Wiener filter design */
	float m_wienerFilter[NR_MAX_SPEC_LENGTH];
	float m_sqrtDen3PSD[NR_MAX_SPEC_LENGTH];
	/* Spectral subtraction design */
	float m_ssFilter[NR_MAX_SPEC_LENGTH];
	float m_oversubGain; /* over-subtraction gain (fg), usually 4 */
	float m_oversubCutoffFreq; /* over-subtraction cutoff frequency (fc), usually 800 Hz */
	float m_oversubFactor[NR_MAX_SPEC_LENGTH]; /* oversubFac(f) = fg/(1+f/fc) */
	/* for VAD for noise estimation */
	int m_nbSpeechFrame;
	int m_flagVADNest;
	int m_hangOver;
	float m_meanEn;
	long m_nbFrameVADNest;
#ifdef _DEBUG
	short m_denSpeech[NR_BUF_SIZE]; /* ring buffer for denoised speech */
	long m_denEndX; /* end sample point to denoised speech */
#endif
	float m_outSpeech[NR_OUT_BUF_SIZE];
	long m_localFrameX; /* time frame index of noise reduction (for internal use) */
	/* Mel filter-bank */
	int m_NumChannels;
	WfMelFB m_MelFB[NR_NUM_CHANNELS+2];
	float m_MelWeight[NR_MAX_SPEC_LENGTH];
	float m_H2mel[NR_NUM_CHANNELS+2];
	/* Mel IDCT */
	float m_hWFmirr[2*(NR_NUM_CHANNELS+1)+1];
	float m_melIdctMatrix[(NR_NUM_CHANNELS+2)*(NR_NUM_CHANNELS+2)];
	/* Apply filter */
	int m_bufStartX;
	float m_hWFw[NR_FL];
	float m_HanningWin2[NR_FL];
	/*--------------------*/
	/* Member functions */
	/*--------------------*/
	Wiener();
	virtual ~Wiener();
	int Init(int samplingRate, int isWiener);
	FeReturnCode InitNewUtterance(const char *fname);
	FeReturnCode OneFrame(short *sample, int sampleN, float *out, int frameX);
	void Close();
#ifdef _DEBUG
	int SaveInput(const char *fname, int offsetX);
	int SaveDenoised(const char *fname, int offsetX);
#endif
private:
	int GetSample(short *sample, int sampleN);	
	FeReturnCode OneFrameWiener(float *si, float *out);
	FeReturnCode OneFrameSS(float *in, float *out);
	void EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample);
	void ComputeMeanPSD(float *spectrum, float *lastSpectrum, float *lastSpectrum2, int flagVADNest, float *sqrtInPSD);
	void DesignWiener(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
	void DesignSpecsub(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
	void VADNest(int t, const float *s);
	void ApplyFilter(float *re, float *im, float *h, float *out);
	
	void InitHanning (float *win, int len);
	void InitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels);
	int InitMelIDCTMatrix (float *idctMatrix, int numChannels);
	void MelFilterBank(float *h2, float *h2mel);
	void MelIDCT(float *h2mel, float *hWFmirr);
	void ApplyWiener(float *s, float *hWFmirr, float *hWFw, float *out);
	
};
#endif