FE_enhance.cpp
上传用户:italyroyal
上传日期:2013-05-06
资源大小:473k
文件大小:23k
- ///////////////////////////////////////////////////////////////////////////////
- // This is a part of the Feature program.
- // Version: 1.0
- // Date: February 22, 2003
- // Programmer: Oh-Wook Kwon
- // Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
- ///////////////////////////////////////////////////////////////////////////////
- #include "StdAfx.h"
- #include <stdio.h>
- #include <stdarg.h>
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <assert.h>
- #include "FE_feature.h"
- /*------------------------------------------
- * Definition of Noise Reduction Parameters
- *-----------------------------------------*/
- /* general */
- #ifndef DEFAULT_SAMPLING_RATE
- #define DEFAULT_SAMPLING_RATE 16000
- #endif
- /* Wiener filter design */
- #define NR_NB_FRAME_THRESHOLD_NSE 100
- #define NR_LAMBDA_NSE 0.99
- #define NR_EPS 1e-16 /* owkwon: originally 1e-16 */
- #define NR_BETA 0.98
- #define NR_ETA_TH 0.01 /* owkwon: reduced to -40 dB for kWaves, originally 0.079432823 corresponding to -22 dB */
- #define NR_STARTING_FREQ 0 /* 0Hz */
- /* Spectral subtraction design */
- #define NR_SS_TH 0.01
- /* VAD for noise estimation (VADNest) */
- #define NR_NB_FRAME_THRESHOLD_LTE 10
- #define NR_LAMBDA_LTE 0.97
- #define NR_SNR_THRESHOLD_UPD_LTE 20
- #define NR_ENERGY_FLOOR 80
- #define NR_MIN_FRAME 10
- #define NR_SNR_THRESHOLD_VAD 24 /* owkwon: increased, originally 15 */
- #define NR_MIN_SPEECH_FRAME_HANGOVER 4
- #define NR_HANGOVER 5
- #define NR_SNR_THRESHOLD_NO_UPD_LTE 80 /* owkwon: there sometimes exist only a few silence frames less than NR_MIN_FRAME */
- /*----------------------
- * Constant Definitions
- *----------------------*/
- #ifndef M_PI
- #define M_PI ((float)3.14159265358979323846)
- #endif
- /*----------------------
- * Macro Definitions
- *----------------------*/
- #define NR_T(z) ((z)>0 ? (z) : 0)
- #define NR_SQ(z) ((z)*(z))
- #define Round(x) ((int)((x)+0.5))
- /*---------------------
- * Function Prototypes
- *---------------------*/
- extern void PRFFT_NEW(float *a, float *b, int m, int n_pts, int iff);
- int Fe::EnhanceMain(const char *infile, const char *outfile, int sampleRate, int isWiener)
- {
- FILE *fpin=fopen(infile,"rb");
- if(fpin==0){
- fprintf(stderr, "File open error (%s)n",infile);
- assert(0);
- return 0;
- }
- fseek(fpin,0L,SEEK_END);
- int fsize = ftell(fpin);
- rewind(fpin);
- int sampleN=fsize/sizeof(short);
- vector<short> data(sampleN);
- fread(&data[0], sizeof(short), sampleN, fpin);
- fclose(fpin);
- enhance_basic(&data[0], sampleN, sampleRate, isWiener);
- FILE *fpout=fopen(outfile,"wb");
- if(fpout==0){
- fprintf(stderr, "File open error (%s)n",outfile);
- assert(0);
- return 0;
- }
- fwrite(&data[0], sizeof(short), sampleN, fpout);
- fclose(fpout);
- return 1;
- }
- void Fe::enhance_basic(short *sample, int sampleN, int samplingRate, int isWiener)
- {
- int t;
- Wiener wiener;
- wiener.Init(samplingRate, isWiener);
- float out[NR_MAX_WIN_SIZE];
- wiener.InitNewUtterance(0);
- int frameN=sampleN/wiener.m_shiftSize;
- int outN=0;
- for(t=0; ;t++){
- int i;
- int n=my_min(wiener.m_shiftSize,my_max(0,sampleN-t*wiener.m_shiftSize));
- FeReturnCode status;
- if(n>=wiener.m_shiftSize){
- status=wiener.OneFrame(sample+t*wiener.m_shiftSize, n, &out[0], t);
- }
- else{ /* feed in dummy frames */
- short tmpA[NR_MAX_WIN_SIZE];
- for(i=0;i<n;i++) tmpA[i]=sample[t*wiener.m_shiftSize+i];
- for(i=n;i<wiener.m_shiftSize;i++) tmpA[i]=0;
- status=wiener.OneFrame(tmpA, wiener.m_shiftSize, &out[0], t);
- }
- if(status==FE_NULL) continue;
- int r=my_min(wiener.m_shiftSize,my_max(0,sampleN-outN));
- assert(outN+r <= sampleN);
- for(i=0;i<r;i++){
- sample[outN+i]=(short)out[i];
- }
- outN+=r;
- if(outN>=sampleN) break;
- }
- #if 0
- wiener.SaveInput("test.input.raw",0);
- wiener.SaveDenoised("test.output1.raw",0);
- #endif
- wiener.Close();
- }
- Wiener::Wiener()
- {
- m_sampleRate=DEFAULT_SAMPLING_RATE;
- m_NumChannels=NR_NUM_CHANNELS;
- m_localFrameX=0;
- }
- Wiener::~Wiener()
- {
- }
- void Wiener::Close()
- {
- }
- int Wiener::GetSample(short *sample, int sampleN)
- {
- long startX=(m_inputEndX)%NR_BUF_SIZE;
- if(startX+sampleN <= NR_BUF_SIZE){
- memmove((char*)(m_inputSpeech+startX), sample, 2*(sampleN));
- }
- else{
- memmove((char*)(m_inputSpeech+startX), sample, 2*(NR_BUF_SIZE-startX));
- memmove((char*)(m_inputSpeech), sample+(NR_BUF_SIZE-startX), 2*(sampleN-NR_BUF_SIZE+startX));
- }
- m_inputEndX += sampleN;
- return sampleN;
- }
- int Wiener::Init(int samplingRate, int isWiener)
- {
- int i, kHz;
- m_isWiener=isWiener;
- m_sampleRate=samplingRate;
- /*-------------------------------------------------------------------*/
- /* Set parameters FrameLength, FrameShift and FFTLength */
- /*-------------------------------------------------------------------*/
- kHz=(int)(m_sampleRate/1000.0+0.5);
- if (kHz == SAMPLING_FREQ_1) { /* 8 kHz */
- m_winSize = FRAME_LENGTH_1;
- m_shiftSize = FRAME_SHIFT_1;
- m_fftSize = FFT_LENGTH_1;
- }
- else if (kHz == SAMPLING_FREQ_2) { /* 11 kHz */
- m_winSize = FRAME_LENGTH_2;
- m_shiftSize = FRAME_SHIFT_2;
- m_fftSize = FFT_LENGTH_2;
- }
- else if (kHz == SAMPLING_FREQ_3) { /* 16 kHz */
- m_winSize = FRAME_LENGTH_3;
- m_shiftSize = FRAME_SHIFT_3;
- m_fftSize = FFT_LENGTH_3;
- }
- else {
- float shiftMs=10, winMs=25;
- m_winSize = (int)(winMs/1000*m_sampleRate);
- m_shiftSize = (int)(shiftMs/1000*m_sampleRate);
- m_fftSize = 1; while (m_fftSize<m_winSize) m_fftSize *= 2;
- }
- if(m_shiftSize>NR_MAX_FRAME_SHIFT){
- fprintf(stderr, "ERROR: Too large frame shift size '%d'!rn", m_shiftSize);
- assert(0);
- return 0;
- }
- if(m_isWiener==1){
- m_specLength=m_fftSize/4+1;
- }
- else{
- m_specLength=m_fftSize/2+1;
- }
- m_inputEndX=0;
- #ifdef _DEBUG
- m_denEndX = 0;
- #endif
- m_localFrameX=0;
- /*-------------------------------------------------------*/
- /* Initialize Hanning window and scaling factor */
- /*-------------------------------------------------------*/
- InitHanning(m_HanningWin, m_winSize);
- if(m_isWiener==1){
- m_bufStartX = m_shiftSize-20;
- InitHanning(m_HanningWin2, NR_FL);
- InitMelFilterBanks ((float)NR_STARTING_FREQ, (float)m_sampleRate, 2*(m_specLength-1), m_NumChannels);
- InitMelIDCTMatrix (m_melIdctMatrix, m_NumChannels);
- }
- /* amplitude scaling factor */
- m_scaleFactor=0;
- for(i=0;i<m_shiftSize;i++){
- float sum=0;
- for(int n=0;n+i<m_winSize;n+=m_shiftSize) sum += m_HanningWin[i+n];
- if(sum>m_scaleFactor) m_scaleFactor=sum;
- }
- /* over-subtraction factor */
- if(m_isWiener==0){
- m_oversubGain = 8;
- m_oversubCutoffFreq = 800;
- float fc=m_fftSize*m_oversubCutoffFreq/(float)m_sampleRate;
- for(i=0;i<m_specLength;i++){
- m_oversubFactor[i] = m_oversubGain/(1+i/fc);
- }
- }
- return 1;
- }
- FeReturnCode Wiener::InitNewUtterance(const char *fname)
- {
- int i;
- m_inputEndX = 0;
- #ifdef _DEBUG
- m_denEndX = 0;
- #endif
- m_localFrameX=0;
- /* Wiener filter design */
- m_nbFrameX=1;
- /* VADNest */
- m_nbSpeechFrame=0;
- m_meanEn=0;
- m_flagVADNest=0;
- m_hangOver=0;
- m_nbFrameVADNest=0;
- for(i=0;i<m_specLength;i++){
- m_lastSpectrum2[i]=0;
- m_lastSpectrum[i]=0;
- m_sqrtNoisePSD[i]=(float)NR_EPS;
- m_sqrtDen3PSD[i]=0;
- }
- for(i=0;i<4*m_shiftSize;i++){
- m_buf_in[i]=0;
- m_buf_out[i]=0;
- }
- return FE_OK;
- }
- FeReturnCode Wiener::OneFrame(short *sample, int sampleN, float *out, int frameX)
- {
- int i, k, extraFrameN;
- long startX=m_shiftSize*m_localFrameX;
- long validX;
- k=(m_winSize-m_shiftSize)/m_shiftSize;
- extraFrameN=((m_winSize-m_shiftSize)%m_shiftSize == 0 ? k : k+1);
- sampleN=GetSample(sample,sampleN);
- assert(m_localFrameX-frameX <= 4+extraFrameN);
-
- float si[NR_MAX_WIN_SIZE], so[NR_MAX_WIN_SIZE], den[NR_MAX_WIN_SIZE];
- FeReturnCode status;
-
- /* shift down one frame in the first buffer and insert a new frame */
- for(i=0;i<3*m_shiftSize;i++) m_buf_in[i]=m_buf_in[i+m_shiftSize];
- for(i=0;i<m_shiftSize;i++) m_buf_in[3*m_shiftSize+i]=(float)m_inputSpeech[(startX+i)%NR_BUF_SIZE];
- for(i=0;i<m_winSize;i++) si[i]=m_buf_in[i];
-
- if(m_isWiener==1){
- status=OneFrameWiener(si, so);
- }
- else{
- status=OneFrameSS(si, so);
- }
- if(status==FE_NULL){
- m_localFrameX++;
- return FE_NULL;
- }
-
- if(m_isWiener==0){
- /* overlap and save */
- for(i=0;i<3*m_shiftSize;i++) m_buf_out[i]=m_buf_out[i+m_shiftSize];
- for(i=0;i<m_shiftSize;i++) m_buf_out[3*m_shiftSize+i]=0;
- for(i=0;i<m_winSize;i++) m_buf_out[i]+=so[i];
-
- /* get the result */
- for(i=0;i<m_shiftSize;i++) den[i]=m_buf_out[i]/m_scaleFactor;
- }
- else{
- for(i=0;i<m_shiftSize;i++) den[i]=so[i];
- }
-
- /* save the denoised speech at the ring buffer */
- for(i=0;i<m_shiftSize;i++){
- m_outSpeech[(startX+i)%NR_OUT_BUF_SIZE]=den[i];
- }
- #ifdef _DEBUG
- /* save the denoised speech at the ring buffer */
- validX=startX-2*m_shiftSize;
- for(i=0;i<m_shiftSize;i++){
- m_denSpeech[(validX+i)%NR_BUF_SIZE]=(short)(den[i]);
- }
- #endif
-
- /* NR has 5 frame delay due to one buffer and extraFrameN delay to make one frame for feature extraction */
- if(m_localFrameX<5+extraFrameN){
- m_localFrameX++;
- return FE_NULL;
- }
-
- validX=startX-extraFrameN*m_shiftSize-((m_isWiener==1) ? 2*m_shiftSize : 0);
- for(i=0;i<m_winSize;i++) {
- out[i]=m_outSpeech[(validX+i)%NR_OUT_BUF_SIZE];
- }
-
- #ifdef _DEBUG
- m_denEndX += m_shiftSize;
- #endif
- m_localFrameX++;
- return FE_SPEECH;
-
- }
- FeReturnCode Wiener::OneFrameWiener(float *si, float *out)
- {
- VADNest(m_localFrameX, si);
- EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 1);
- ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
- if(m_localFrameX<2) {
- int i;
- for(i=0;i<m_winSize;i++) out[i]=si[i];
- return FE_NULL;
- }
- DesignWiener(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_wienerFilter);
- if(m_isWiener==0){
- ApplyFilter(m_spec_re, m_spec_im, m_wienerFilter, out);
- }
- else{
- MelFilterBank(m_wienerFilter, m_H2mel);
- MelIDCT(m_H2mel, m_hWFmirr);
- assert(m_shiftSize-m_bufStartX > (NR_FL-1)/2);
- ApplyWiener(si+m_shiftSize-m_bufStartX, m_hWFmirr, m_hWFw, out);
- }
- return FE_SPEECH;
- }
- FeReturnCode Wiener::OneFrameSS(float *si, float *out)
- {
- EstimateSpectrum(si, m_spec, m_spec_re, m_spec_im, 0);
- ComputeMeanPSD(m_spec, m_lastSpectrum, m_lastSpectrum2, m_flagVADNest, m_sqrtInPSD);
- if(m_localFrameX<2) {
- int i;
- for(i=0;i<m_winSize;i++) out[i]=si[i];
- return FE_NULL;
- }
- DesignSpecsub(m_localFrameX, m_flagVADNest, m_spec, m_sqrtInPSD, m_sqrtNoisePSD, m_sqrtDen3PSD, m_ssFilter);
- ApplyFilter(m_spec_re, m_spec_im, m_ssFilter, out);
- return FE_SPEECH;
- }
- void Wiener::InitHanning (float *win, int len)
- {
- int i;
- for (i = 0; i < len; i++){
- win[i] = (float)(0.5 - 0.5 * cos (2 * M_PI * (i + 0.5)/len));
- }
- }
- void Wiener::EstimateSpectrum(float *s, float *spectrum, float *re, float *im, int subSample)
- {
- int i, n, m;
- float x[NR_MAX_WIN_SIZE];
- /* apply Hanning window */
- for(n=0;n<m_winSize;n++){
- re[n] = s[n]*m_HanningWin[n];
- im[n] = 0;
- }
- /* pad zeros */
- for(n=m_winSize;n<m_fftSize;n++){
- re[n]=0;
- im[n]=0;
- }
- /* do FFT */
- m = (int) (log10 (m_fftSize) / log10 (2));
- PRFFT_NEW(re, im, m, m_fftSize, 1);
- /* take power spectrum, P(bin)=|X(bin)|^2 */
- x[0] = re[0] * re[0]; /* DC */
- for (n = 1; n < m_fftSize / 2; n++) {
- x[n] = re[n] * re[n] + im[n] * im[n];
- }
- x[m_fftSize / 2] = re[m_fftSize / 2] * re[m_fftSize / 2]; /* pi/2 */
- if(subSample==1){
- /* smooth and subsample spectrum */
- assert(m_specLength==m_fftSize/4+1);
- for(i=0;i<m_fftSize/4;i++){
- spectrum[i]=(x[2*i]+x[2*i+1])/2;
- }
- spectrum[m_fftSize/4]=x[m_fftSize/2];
- }
- else{
- for(i=0;i<m_fftSize/2+1;i++){
- spectrum[i]=x[i];
- }
- }
- }
- void Wiener::ComputeMeanPSD(float *spectrum, float *lastSpectrum, float *lastSpectrum2, int flagVADNest, float *sqrtInPSD)
- {
- int i;
- for(i=0;i<m_specLength;i++){
- sqrtInPSD[i]=(float)sqrt((double)(lastSpectrum2[i]+lastSpectrum[i]+spectrum[i])/(double)3.0);
- lastSpectrum2[i]=lastSpectrum[i];
- lastSpectrum[i]=spectrum[i];
- }
- }
- void Wiener::DesignWiener(int t, int flagVADNest, const float *in, const float *sqrtInPSD, float *sqrtNoisePSD, float *sqrtDen3PSD, float *filter)
- {
- int i;
- float lambdaNSE;
- float sqrtDenPSD[NR_MAX_SPEC_LENGTH], sqrtDen2PSD[NR_MAX_SPEC_LENGTH];
- float sqrtEta[NR_MAX_SPEC_LENGTH], sqrtEta2[NR_MAX_SPEC_LENGTH], h[NR_MAX_SPEC_LENGTH];
- /* set the forgetting factor */
- if(t<NR_NB_FRAME_THRESHOLD_NSE)
- lambdaNSE=1-1/(float)(m_nbFrameX);
- else
- lambdaNSE=(float)NR_LAMBDA_NSE;
-
- if(t>=2) m_nbFrameX++;
-
- if(flagVADNest==0){ /* this is the result of the previous frame */
- for(i=0;i<m_specLength;i++){
- /* Update. Pnoise(tn)^0.5 = max(a * Pnoise(tn-1)^0.5 + (1-a) * Pin_PSD(tn)^0.5, EPS) */
- sqrtNoisePSD[i]=(float)my_max(lambdaNSE*sqrtNoisePSD[i]+(1-lambdaNSE)*sqrtInPSD[i], NR_EPS);
- }
- }
- else{
- /* No change. Pnoise(t)^0.5 = Pnoise(tn)^0.5 */
- }
-
- if(t>=2){
- /* Be careful about sqrt and square! */
- for(i=0;i<m_specLength;i++){
- sqrtDenPSD[i]=(float)(NR_BETA*sqrtDen3PSD[i]+(1-NR_BETA)*NR_T(sqrtInPSD[i]-sqrtNoisePSD[i]));
- sqrtEta[i]=sqrtDenPSD[i]/sqrtNoisePSD[i];
- h[i]=sqrtEta[i]/(1+sqrtEta[i]);
- sqrtDen2PSD[i]=h[i]*(sqrtInPSD[i]);
- sqrtEta2[i]=my_max(sqrtDen2PSD[i]/sqrtNoisePSD[i], (float)NR_ETA_TH);
- filter[i]=sqrtEta2[i]/(1+sqrtEta2[i]);
- sqrtDen3PSD[i]=(float)(filter[i]*sqrt(in[i]));
- }
- }
- }
- void Wiener::DesignSpecsub(int t, int flagVADNest, const float *in, const float *sqrtInPSD, float *sqrtNoisePSD, float *sqrtDen3PSD, float *filter)
- {
- int i;
- float lambdaNSE;
- float s, n;
- float globalFac;
- /* set the forgetting factor */
- if(t<NR_NB_FRAME_THRESHOLD_NSE)
- lambdaNSE=1-1/(float)(m_nbFrameX);
- else
- lambdaNSE=(float)NR_LAMBDA_NSE;
-
- if(t>=2) m_nbFrameX++;
-
- if(flagVADNest==0 && t<NR_NB_FRAME_THRESHOLD_NSE){ /* this is the result of the previous frame */
- for(i=0;i<m_specLength;i++){
- sqrtNoisePSD[i]=(float)sqrt(my_max(lambdaNSE*NR_SQ(sqrtNoisePSD[i])
- +(1-lambdaNSE)*(in[i]-NR_SQ(sqrtDen3PSD[i])), NR_EPS*NR_EPS));
- }
- }
- else{
- /* No change. Pnoise(t)^0.5 = Pnoise(tn)^0.5 */
- }
-
- if(t<2) return;
- /* compute filter with over-subtraction */
- s=0, n=0;
- for(i=0;i<m_specLength;i++){
- s+=NR_SQ(sqrtInPSD[i]); n+=NR_SQ(sqrtNoisePSD[i]);
- }
- s/=m_specLength; n/=m_specLength;
- globalFac=my_max(n/s,(float)NR_SS_TH);
- for(i=0;i<m_specLength;i++){
- float instSNR = NR_SQ(sqrtNoisePSD[i])/(NR_SQ(sqrtNoisePSD[i])+NR_SQ(sqrtInPSD[i]));
- float instSN = NR_SQ(sqrtNoisePSD[i])/NR_SQ(sqrtInPSD[i]+(float)1e-20);
- filter[i] = (float)sqrt(my_max(1-(1+m_oversubFactor[i]*instSNR)*instSN, (float)NR_SS_TH*NR_SS_TH*instSN));
- sqrtDen3PSD[i]=(float)(filter[i]*sqrt(in[i]));
- }
- return;
- }
- void Wiener::VADNest(int t, const float *s)
- {
- int i;
- float lambdaLTE, lambdaLTEhigherE=(float)0.99;
- float frameEn=0, mean=0;
- int M=2*m_shiftSize; /* changed to get smooth estimate */
- if(t<NR_NB_FRAME_THRESHOLD_LTE)
- lambdaLTE=1-1/(float)(m_nbFrameVADNest+1);
- else
- lambdaLTE=(float)NR_LAMBDA_LTE;
- for(i=0;i<M;i++) {
- frameEn += s[i]*s[i];
- mean += s[i];
- }
- frameEn = frameEn - mean*mean/M; /* owkwon: Use the AC energy only */
- frameEn = frameEn*80/(float)M; /* owkwon: normalize to a value for 80 samples */
- if(t>=2 && frameEn>0) { /* Start updating after only valid non-zero data */
- m_nbFrameVADNest++;
- }
- frameEn = (float)(0.5+16/log(2)*log((64+frameEn)/64)); /* 16/log(2) ~ 23.08 */
- if(frameEn<NR_ENERGY_FLOOR) frameEn=NR_ENERGY_FLOOR; /* to avoid the worst case of all-zero data */
- if(m_nbFrameVADNest==1){
- m_meanEn=frameEn; /* first non-zero frame */
- }
- if(frameEn-m_meanEn<NR_SNR_THRESHOLD_NO_UPD_LTE && (frameEn-m_meanEn < NR_SNR_THRESHOLD_UPD_LTE || t < NR_MIN_FRAME)){
- if(frameEn<m_meanEn || t<NR_MIN_FRAME)
- m_meanEn = m_meanEn + (1-lambdaLTE)*(frameEn-m_meanEn);
- else
- m_meanEn = m_meanEn + (1-lambdaLTEhigherE)*(frameEn-m_meanEn);
-
- if(m_meanEn<NR_ENERGY_FLOOR) m_meanEn=NR_ENERGY_FLOOR;
- }
- if(m_nbFrameVADNest>0 && frameEn-m_meanEn>=NR_SNR_THRESHOLD_NO_UPD_LTE){ /* owkwon: */
- m_flagVADNest=1;
- m_nbSpeechFrame++;
- }
- else if(frameEn-m_meanEn > NR_SNR_THRESHOLD_VAD && t>=NR_MIN_FRAME){ /* owkwon: changed the inquality sign to '>' */
- m_flagVADNest=1;
- m_nbSpeechFrame++;
- }
- else{
- if((m_nbSpeechFrame < NR_MIN_SPEECH_FRAME_HANGOVER) && (m_nbSpeechFrame >= 2)){ /* owkwon: added the condition '&& (m_nbSpeechFrame >= 2)' */
- m_hangOver=NR_HANGOVER;
- }
- m_nbSpeechFrame=0;
- if(m_hangOver != 0){
- m_hangOver=m_hangOver-1;
- m_flagVADNest=1;
- }
- else{
- m_flagVADNest=0;
- }
- }
- }
- void Wiener::ApplyFilter(float *re, float *im, float *h, float *out)
- {
- int i;
- re[0] *= h[0]; im[0] = 0;
- for(i=1;i<m_fftSize/2;i++){
- re[i] *= h[i]; im[i] *= h[i];
- re[m_fftSize-i] = re[i]; im[m_fftSize-i] = -im[i];
- }
- re[m_fftSize/2] = 0;
-
- int m = (int) (log10 (m_fftSize) / log10 (2));
- PRFFT_NEW(re,im,m,m_fftSize,-1);
- for(i=0;i<m_winSize;i++){
- out[i]=re[i];
- }
- }
- #ifdef _DEBUG
- int Wiener::SaveInput(const char *fname, int offsetX)
- {
- int i, sampleN;
- int endX=my_max(0,m_inputEndX-offsetX*m_shiftSize);
- FILE *fp=fopen(fname,"wb");
- if(fp==0){
- assert(0);
- }
- sampleN=my_min(NR_BUF_SIZE,endX);
- for(i=endX-sampleN;i<endX;i++){
- fwrite((char*)(&m_inputSpeech[i%NR_BUF_SIZE]), sizeof(short), 1, fp);
- }
- fclose(fp);
- return 1;
- }
- int Wiener::SaveDenoised(const char *fname, int offsetX)
- {
- int i, sampleN;
- FILE *fp=fopen(fname,"wb");
- if(fp==0){
- assert(0);
- }
- int endX=m_inputEndX-offsetX*m_shiftSize;
- sampleN=my_min(NR_BUF_SIZE,endX);
- for(i=endX-sampleN;i<endX;i++){
- short s=m_denSpeech[i%NR_BUF_SIZE];
- fwrite((char*)&s, sizeof(short), 1, fp);
- }
- fclose(fp);
- return 1;
- }
- #endif
- void Wiener::MelFilterBank(float *h2, float *h2mel)
- {
- int i,k;
- for(k=0;k<=m_NumChannels+1;k++){
- WfMelFB *melFB=&m_MelFB[k];
- float sum = h2[melFB->m_centerX]; /* The center weight is always 1 */
- for(i=melFB->m_lowX;i<melFB->m_centerX;i++){
- sum += m_MelWeight[i]*h2[i];
- }
- for(i=melFB->m_centerX+1;i<=melFB->m_highX;i++){
- sum += (1-m_MelWeight[i])*h2[i];
- }
- h2mel[k]=sum/melFB->m_sumWeight;
- }
- }
- void Wiener::MelIDCT(float *h2mel, float *hWFmirr)
- {
- int n,k;
- float hWF[NR_NUM_CHANNELS+2];
- for(n=0;n<=NR_NUM_CHANNELS+1;n++){
- float sum=0;
- for(k=0;k<=NR_NUM_CHANNELS+1;k++)
- sum += h2mel[k]*m_melIdctMatrix[k*(NR_NUM_CHANNELS+2)+n];
- hWF[n]=sum;
- }
- for(n=0;n<=NR_NUM_CHANNELS+1;n++)
- hWFmirr[n]=hWF[n];
- for(n=NR_NUM_CHANNELS+2;n<=2*(NR_NUM_CHANNELS+1);n++)
- hWFmirr[n]=hWF[2*(NR_NUM_CHANNELS+1)+1-n];
- }
- void Wiener::ApplyWiener(float *s, float *hWFmirr, float *hWFw, float *out)
- {
- int i,n;
- float hWFcaus[2*(NR_NUM_CHANNELS+1)+1];
- float hWFtrunc[NR_FL];
- int FL2=(NR_FL-1)/2;
- float fac=0;
- for(n=0;n<=NR_NUM_CHANNELS;n++)
- hWFcaus[n]=hWFmirr[n+NR_NUM_CHANNELS+1];
- for(n=NR_NUM_CHANNELS+1;n<=2*(NR_NUM_CHANNELS+1);n++)
- hWFcaus[n]=hWFmirr[n-NR_NUM_CHANNELS-1];
- for(n=0;n<=NR_FL-1;n++)
- hWFtrunc[n]=hWFcaus[n+NR_NUM_CHANNELS+1-FL2];
- for(n=0;n<=NR_FL-1;n++){
- hWFw[n]=m_HanningWin2[n]*hWFtrunc[n];
- fac += m_HanningWin2[n];
- }
- for(n=0;n<m_shiftSize;n++){
- float sum=0;
- for(i=-FL2; i<=FL2; i++) sum += hWFw[i+FL2]*s[n-i];
- out[n]=sum;
- }
- }
- /*---------------------------------------------------------------------------
- Initialize data structure for FFT windows (mel filter bank).
- Computes start (bin[k-1]), center (bin[k]) and end (bin[k+1]) points
- of each mel filter to FFT index.
- Input:
- m_sampleRate Sampling frequency
- m_FFTLength FFT length
- m_NumChannels Number of channels
- Output:
- Initialized weights and mel filters
- *---------------------------------------------------------------------------*/
- void Wiener::InitMelFilterBanks (float startingFrequency, float samplingRate, int fftLength, int numChannels)
- {
- int i, k;
- float freq[NR_NUM_CHANNELS+2];
- int bin[NR_NUM_CHANNELS+2];
- float start_mel, fs_per_2_mel;
- /* Constants for calculation */
- freq[0] = startingFrequency;
- start_mel = (float)(2595.0 * log10 (1.0 + startingFrequency / 700.0));
- bin[0] = Round(fftLength * freq[0] / samplingRate);
- freq[numChannels+1] = (float)(samplingRate / 2.0);
- fs_per_2_mel = (float)(2595.0 * log10 (1.0 + (samplingRate / 2.0) / 700.0));
- bin[numChannels+1] = Round(fftLength * freq[numChannels+1] / samplingRate);
- /* Calculating mel-scaled frequency and the corresponding FFT-bin */
- /* number for the lower edge of the band */
- for (k = 1; k <= numChannels; k++) {
- freq[k] = (float)(700 * (pow (10, (start_mel + (float) k / (numChannels + 1) * (fs_per_2_mel - start_mel)) / 2595.0) - 1.0));
- bin[k] = Round(fftLength * freq[k] / samplingRate);
- }
- /* This part is never used to compute MFCC coefficients */
- /* but initialized for completeness */
- for(i = 0; i<bin[0]; i++){
- m_MelWeight[i]=0;
- }
- m_MelWeight[fftLength/2]=1;
- /* Initialize low, center, high indices to FFT-bin */
- m_MelFB[0].m_lowX=0;
- m_MelFB[0].m_centerX=bin[0];
- m_MelFB[0].m_highX=bin[1]-1;
- for (k = 1; k <= numChannels; k++) {
- m_MelFB[k].m_lowX=bin[k-1]+1;
- m_MelFB[k].m_centerX=bin[k];
- m_MelFB[k].m_highX=bin[k+1]-1;
- }
- m_MelFB[numChannels+1].m_lowX=bin[numChannels]+1;
- m_MelFB[numChannels+1].m_centerX=bin[numChannels+1];
- m_MelFB[numChannels+1].m_highX=bin[numChannels+1];
- for (k = 1; k <= numChannels+1; k++) {
- for(i = bin[k-1]; i<bin[k]; i++){
- m_MelWeight[i]=(i-bin[k-1])/(float)(bin[k]-bin[k-1]);
- }
- }
- m_MelWeight[bin[numChannels+1]]=0;
- m_MelFB[0].m_sumWeight=(bin[1]-bin[0]+1)/(float)2;
- for (k=1;k<=numChannels;k++){
- m_MelFB[k].m_sumWeight=(bin[k+1]-bin[k-1])/(float)2;
- }
- m_MelFB[numChannels+1].m_sumWeight=(bin[numChannels+1]-bin[numChannels]+1)/(float)2;
- return;
- }
- /*---------------------------------------------------------------------------
- Initializes matrix for IDCT computation (IDCT is implemented
- as matrix-vector multiplication). The DCT matrix is of size
- (numChannels+2)-by-(numChannels+2).
- *---------------------------------------------------------------------------*/
- int Wiener::InitMelIDCTMatrix (float *idctMatrix, int numChannels)
- {
- int k, n;
- float center[NR_NUM_CHANNELS+2], df[NR_NUM_CHANNELS+2];
- float fac=m_sampleRate/(float)(2*(m_specLength-1));
- center[0]=0;
- center[NR_NUM_CHANNELS+1]=m_sampleRate/(float)2;
- for(k=1;k<=NR_NUM_CHANNELS;k++){
- int i;
- WfMelFB *melFB=&m_MelFB[k];
- float sum = (float)melFB->m_centerX; /* The center weight is always 1 */
- for(i=melFB->m_lowX;i<melFB->m_centerX;i++) sum += m_MelWeight[i]*i;
- for(i=melFB->m_centerX+1;i<=melFB->m_highX;i++) sum += (1-m_MelWeight[i])*i;
- center[k]=sum*fac/melFB->m_sumWeight; /* owkwon: Use normalized W(k,i) */
- }
- df[0]=(center[1]-center[0])/(float)m_sampleRate;
- for(k=1;k<=NR_NUM_CHANNELS;k++){
- df[k]=(center[k+1]-center[k-1])/(float)m_sampleRate;
- }
- df[NR_NUM_CHANNELS+1]=(center[NR_NUM_CHANNELS+1]-center[NR_NUM_CHANNELS])/(float)m_sampleRate;
- for (k = 0; k < numChannels+2; k++){
- for (n = 0; n < numChannels+2; n++)
- idctMatrix[k*(numChannels+2)+n] = (float) cos ((2*M_PI*n*center[k])/(float)m_sampleRate) * df[k];
- }
- return 1;
- }