FE_enhance.h
上传用户:italyroyal
上传日期:2013-05-06
资源大小:473k
文件大小:5k
- ///////////////////////////////////////////////////////////////////////////////
- // This is a part of the Feature program.
- // Version: 1.0
- // Date: February 22, 2003
- // Programmer: Oh-Wook Kwon
- // Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
- ///////////////////////////////////////////////////////////////////////////////
- #ifndef _FE_ENHANCE_H_
- #define _FE_ENHANCE_H_
- #include "FE_common.h"
- /*-----------------------------------*/
- /* Definition for endpoint detectors */
- /*-----------------------------------*/
- #define NR_MAX_RECORD_TIME 10 /* 10 seconds */
- #define NR_WAV_BUF_SIZE (NR_MAX_RECORD_TIME*16000) /* 10 seconds at 16 kHz mono sampling frequency */
- #define NR_MAX_WIN_SIZE 512 /* maximum window size */
- /*----------------------------------*/
- /* Definition for noise reduction */
- /*----------------------------------*/
- #define NR_MAX_FRAME_SHIFT 320
- #define NR_MAX_SPEC_LENGTH 257 /* FFT_LENGTH_1/2+1 */
- #define NR_OUT_BUF_SIZE (5*NR_MAX_WIN_SIZE)
- #ifdef _DEBUG
- #define NR_BUF_SIZE NR_WAV_BUF_SIZE
- #else
- #define NR_BUF_SIZE (2*16000) /* save only 2 second speech to save memory */
- #endif
- /*----------------------------------*/
- /* Definition for Wiener filter */
- /*----------------------------------*/
- #define NR_NUM_CHANNELS 23
- #define NR_FL 17
- typedef struct {
- int m_lowX;
- int m_centerX;
- int m_highX;
- float m_sumWeight;
- } WfMelFB; /* mel filter bank for noise reduction */
- class Wiener {
- public:
- /* basic part */
- int m_isWiener;
- int m_sampleRate;
- int m_winSize;
- int m_shiftSize;
- int m_fftSize;
- /* derived part */
- int m_specLength;
- float m_scaleFactor;
- /* for audio/file interface */
- short m_inputSpeech[NR_BUF_SIZE]; /* ring buffer for input speech */
- long m_inputEndX; /* end sample point to input speech */
- /* spectrum estimation */
- float m_HanningWin[NR_MAX_WIN_SIZE];
- float m_buf_in[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
- float m_buf_out[4*NR_MAX_FRAME_SHIFT]; /* frame 0, frame 1, frame 2, frame 3 */
- float m_spec[NR_MAX_SPEC_LENGTH];
- float m_spec_re[NR_MAX_WIN_SIZE];
- float m_spec_im[NR_MAX_WIN_SIZE];
- /* Power spectral density mean */
- float m_sqrtInPSD[NR_MAX_SPEC_LENGTH];
- float m_lastSpectrum[NR_MAX_SPEC_LENGTH];
- float m_lastSpectrum2[NR_MAX_SPEC_LENGTH];
- float m_sqrtNoisePSD[NR_MAX_SPEC_LENGTH];
- long m_nbFrameX;
- /* Wiener filter design */
- float m_wienerFilter[NR_MAX_SPEC_LENGTH];
- float m_sqrtDen3PSD[NR_MAX_SPEC_LENGTH];
- /* Spectral subtraction design */
- float m_ssFilter[NR_MAX_SPEC_LENGTH];
- float m_oversubGain; /* over-subtraction gain (fg), usually 4 */
- float m_oversubCutoffFreq; /* over-subtraction cutoff frequency (fc), usually 800 Hz */
- float m_oversubFactor[NR_MAX_SPEC_LENGTH]; /* oversubFac(f) = fg/(1+f/fc) */
- /* for VAD for noise estimation */
- int m_nbSpeechFrame;
- int m_flagVADNest;
- int m_hangOver;
- float m_meanEn;
- long m_nbFrameVADNest;
- #ifdef _DEBUG
- short m_denSpeech[NR_BUF_SIZE]; /* ring buffer for denoised speech */
- long m_denEndX; /* end sample point to denoised speech */
- #endif
- float m_outSpeech[NR_OUT_BUF_SIZE];
- long m_localFrameX; /* time frame index of noise reduction (for internal use) */
- /* Mel filter-bank */
- int m_NumChannels;
- WfMelFB m_MelFB[NR_NUM_CHANNELS+2];
- float m_MelWeight[NR_MAX_SPEC_LENGTH];
- float m_H2mel[NR_NUM_CHANNELS+2];
- /* Mel IDCT */
- float m_hWFmirr[2*(NR_NUM_CHANNELS+1)+1];
- float m_melIdctMatrix[(NR_NUM_CHANNELS+2)*(NR_NUM_CHANNELS+2)];
- /* Apply filter */
- int m_bufStartX;
- float m_hWFw[NR_FL];
- float m_HanningWin2[NR_FL];
- /*--------------------*/
- /* Member functions */
- /*--------------------*/
- Wiener();
- virtual ~Wiener();
- int Init(int samplingRate, int isWiener);
- FeReturnCode InitNewUtterance(const char *fname);
- FeReturnCode OneFrame(short *sample, int sampleN, float *out, int frameX);
- void Close();
- #ifdef _DEBUG
- int SaveInput(const char *fname, int offsetX);
- int SaveDenoised(const char *fname, int offsetX);
- #endif
- private:
- int GetSample(short *sample, int sampleN);
- FeReturnCode OneFrameWiener(float *si, float *out);
- FeReturnCode OneFrameSS(float *in, float *out);
- void EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample);
- void ComputeMeanPSD(float *spectrum, float *lastSpectrum, float *lastSpectrum2, int flagVADNest, float *sqrtInPSD);
- void DesignWiener(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
- void DesignSpecsub(int t, int flagVADNest, const float *in, const float *inPSD, float *noisePSD, float *den3PSD, float *filter);
- void VADNest(int t, const float *s);
- void ApplyFilter(float *re, float *im, float *h, float *out);
-
- void InitHanning (float *win, int len);
- void InitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels);
- int InitMelIDCTMatrix (float *idctMatrix, int numChannels);
- void MelFilterBank(float *h2, float *h2mel);
- void MelIDCT(float *h2mel, float *hWFmirr);
- void ApplyWiener(float *s, float *hWFmirr, float *hWFw, float *out);
-
- };
- #endif