wav2ftr1.m
资源名称:speech.rar [点击查看]
上传用户:ay_070428
上传日期:2014-12-04
资源大小:11427k
文件大小:3k
源码类别:
语音合成与识别
开发平台:
Matlab
- function parameter= wav2ftr1(Inwav)
- % WAV2FTR Wave to MFCC feature extraction
- setparam;
- if isstr(Inwav),
- [Inwav,Fs,NBits] = wavread(Inwav);
- end
- % ====== Step 1: Pre-emphasis.
- Inwav = filter([1, -0.95], 1, Inwav);
- % ====== Step 2: frame blocking.
- Frame = buffer(Inwav, frameSize, overlap);
- load tri_coef.mat % for triBandFilter parameter : fstart,fcent,fstop.
- normalize_coff = 10;
- energy = sum(Frame.^2)/frameSize;
- index = find(energy < threshold);
- energy(index) = [];
- logEnergy = 10*log10(energy)/normalize_coff;
- Frame(:, index) = [];
- parameter = [];
- for i = 1:size(Frame, 2);
- % ====== Step 3: hamming window.
- % w(m) = 0.54 - 0.46 cos(2*pi*m / n) , 1<=m<=n+1.
- %Wframe = Frame*(0.54 - 0.46*cos(2*pi*i / (FrameCount-1)));
- Wframe = hamming(frameSize).*Frame(:,i);
- % ====== Step 4: fast fourier transform.
- % Using FFT function to calculate.
- % Compute square of real part and imaginary part.
- %fftFrame = sqrt(real(fft(Wframe)).^2+imag(fft(Wframe)).^2);
- fftFrame = abs(fft(Wframe));
- % ====== Step 5: triangular bandpass filter.
- % Using user defined function triBandFilter(fftFrame{i}).
- P = 20; %P means counts of log spectral magnitude.
- tbfCoef = triBandFilter(fftFrame,P,fstart,fcent,fstop);
- %tbfCoef = bandfilt(fftFrame,P,fstart,fcent,fstop);
- % ====== Step 6: cosine transform.
- % Using DCT to get L order mel-scale-cepstrum parameters.
- L = 12; %L means L order , generally L is 12.
- cepstrum = mel_cepstrum2(L,P,tbfCoef);
- parameter = [parameter cepstrum'];
- end;
- parameter = [parameter; logEnergy];
- %=========compute delta energy and delta cepstrum============
- %Calculate delta cepstrum and delta log energy
- %Combine them with cepstrum and log energy, get 26 order parameter.
- delta_window = 2;
- parameter = getDeltaFeature(delta_window, parameter);
- %========================Subfunction================================
- % ====== Triangular Band Filter
- function tbfCoef = triBandFilter(fftFrame,P,fstart,fcent,fstop)
- %The function is triangular bandpass filter.
- for i = 1 : P,
- for j = fstart(i) : fcent(i),
- filtmag(j) = (j-fstart(i))/(fcent(i)-fstart(i));
- end;
- for j = fcent(i)+1: fstop(i),
- filtmag(j) = 1-(j-fcent(i))/(fstop(i)-fcent(i));
- end;
- tbfCoef(i) = sum(fftFrame(fstart(i):fstop(i)).*filtmag(fstart(i):fstop(i))');
- end;
- tbfCoef = log(tbfCoef.^2);
- % ====== Mel-scale cepstrum
- function cepstrum = mel_cepstrum2(L,P,tbfCoef)
- %compute mel-scale cepstrum , L should be 12 at most part.
- for i=1:L,
- coef = cos((pi/P)*i*(linspace(1,P,P)-0.5))';
- cepstrum(i) = sum(coef.*tbfCoef');
- end;
- % ====== Delta cepstrum and delta log energy
- function parameter = getDeltaFeature(delta_window,parameter)
- % Compute delta cepstrum and delta log energy.
- rows = size(parameter,1);
- cols = size(parameter,2);
- temp = [zeros(rows,delta_window) parameter zeros(rows,delta_window)];
- temp2 = zeros(rows,cols);
- denominator = sum([1:delta_window].^2)*2;
- for i = 1+delta_window : cols+delta_window,
- subtrahend = 0;
- minuend = 0;
- for j = 1 : delta_window,
- subtrahend = subtrahend + temp(:,i+j)*j;
- minuend = minuend + temp(:,i-j)*(-j);
- end;
- temp2(:,i-delta_window) = (subtrahend - minuend)/denominator;
- end;
- parameter = [parameter ; temp2];