FE_endpoint.h
上传用户:italyroyal
上传日期:2013-05-06
资源大小:473k
文件大小:4k
- ///////////////////////////////////////////////////////////////////////////////
- // This is a part of the Feature program.
- // Version: 1.0
- // Date: February 22, 2003
- // Programmer: Oh-Wook Kwon
- // Copyright(c) 2003 Oh-Wook Kwon. All rights reserved. owkwon@ucsd.edu
- ///////////////////////////////////////////////////////////////////////////////
- #ifndef _FE_ENDPOINT_H_
#define _FE_ENDPOINT_H_
- #include "FE_common.h"
- /*-----------------------------------*/
- /* Definition for endpoint detectors */
- /*-----------------------------------*/
- #define EPD_MAX_RECORD_TIME 10 /* 10 seconds */
- #define EPD_WAV_BUF_SIZE (EPD_MAX_RECORD_TIME*16000) /* 10 seconds at 16 kHz mono sampling frequency */
- #define EPD_FRAME_SHIFT_IN_MS 10 /* frame shift in millisecond (10 ms) */
- #define EPD_MAX_WIN_SIZE 512 /* maximum window size */
- /*-----------------------------------*/
- /* Definition for endpoint detectors */
- /*-----------------------------------*/
- #define EPD_MAX_PAUSE_IN_MS 2000 /* maximum duration of long-pause in millisecond (<=2000 ms) */
- /*-----------------*/
- /* Type definition */
- /*-----------------*/
- typedef enum {
- EPD_STATUS_WAITING,
- EPD_STATUS_SPEECH,
- EPD_STATUS_PAUSE,
- EPD_STATUS_ENDPOINT
- } EpdLocalStatus;
- typedef enum {
- EPD_FK_SILENCE=0,
- EPD_FK_SPEECH
- } EpdFrameKind;
- /*----------------*/
- /* Data structure */
- /*----------------*/
- typedef struct {
- int m_sampleRate;
- int m_shiftSize; /* Shift size in samples (10 ms) */
- int m_winSize; /* Window size in samples, same as feature extraction. */
- int m_threshFrameN;
- int m_startFrameN;
- int m_endFrameN;
- int m_startSilenceFrameN;
- int m_endSilenceFrameN;
- int m_longPauseFrameN; /* to detect utterance end-point */
- } EpdParm;
- #ifdef _DEBUG
- #define EPD_SPEECH_BUF_SIZE EPD_WAV_BUF_SIZE
- #define EPD_FRAME_BUF_SIZE (EPD_MAX_RECORD_TIME*100) /* assuming 10 ms frame shift */
- #else
- #define EPD_SPEECH_BUF_SIZE ((EPD_MAX_PAUSE_IN_MS/10+1)*EPD_MAX_WIN_SIZE) /* to save memory */
- #define EPD_FRAME_BUF_SIZE (EPD_MAX_PAUSE_IN_MS/10+1) /* to save memory */
- #endif
- #define EPD_BUF_SIZE 7
- class Epd {
- public:
- /* fixed part */
- int m_isActive;
- int m_isAudio;
- EpdParm m_config;
- /* adaptive part */
- long m_uttBeginX; /* start sample point of speech */
- long m_uttEndX; /* end sample point of speech */
- long m_localFrameX; /* local time frame index */
- /* adaptive noise estimation */
- float m_lambdaLTE;
- float m_noiseEn;
- float m_lambdaLTEhigherE;
- int m_nbSpeechFrame;
- int m_nbFrameEpd;
- EpdFrameKind m_flagVAD;
- /* ZCR tracking */
- float m_noiseLevel;
- float m_lambdaZcr;
- float m_meanZcr;
- /* SNR tracking */
- float m_signalEn;
- float m_lambdaSignalE;
- long m_absTimeX;
- float m_lastSnr;
- /* working variables */
- EpdLocalStatus m_localStatus; /* The status of EPD must be hidden to other modules */
- EpdFrameKind m_isSpeechA[EPD_FRAME_BUF_SIZE];
- float m_zcrA[EPD_FRAME_BUF_SIZE]; /* to remove breath noise */
- long m_sampleEndX; /* end sample point to input audio */
- short m_epdSpeech[EPD_SPEECH_BUF_SIZE];
- int m_speechSegN; /* number of detected speech segments */
- /*--------------------*/
- /* Member functions */
- /*--------------------*/
-
- Epd();
- virtual ~Epd();
- int Init(int samplingRate, int isAudio, int isActive, int isDenoised);
- int InitNewUtterance();
- FeReturnCode OneFrame(float *in, float *out, int frameX, int winSize, FeReturnCode inStatus, EpdFrameKind *frameKind);
- int GetOutput(short *sampleA, int maxSampleN);
- void Close();
- void SetMaxPause(int msec);
- #ifdef _DEBUG
- int SaveOutput(const char *fname, int offsetX);
- #endif
- private:
- int ParmInit(EpdParm *epdParm, int samplingRate, int isDenoised);
- int PutSample(float *sampleA, int sampleN);
- int FindBeginPoint(int startX);
- int FindEndPoint(int startX, int reqSilN);
- int SaveAudio(const char *fname, int begX, int endX);
- EpdFrameKind OneFrame(const float *s);
- };
- #endif