endpoint.m
上传用户:ay_070428
上传日期:2014-12-04
资源大小:11427k
文件大小:5k
源码类别:

语音合成与识别

开发平台:

Matlab

  1. function out = endpoint(y, fs, plotOpt, epdParam)
  2. % ENDPOINT End point detection
  3. if nargin==0, selfdemo; return; end
  4. if nargin<4,
  5. epdParam.frameSize = 256;
  6. epdParam.overlap = 86;
  7. epdParam.deltaEnergyLevel1 = -30;
  8. epdParam.deltaEnergyLevel2 = -20;
  9. epdParam.zcrRatio = 1;
  10. end
  11. if nargin<3, plotOpt=0; end
  12. if nargin<2, fs=8000; end
  13. frameSize=epdParam.frameSize;
  14. overlap=epdParam.overlap;
  15. deltaEnergyLevel1=epdParam.deltaEnergyLevel1;
  16. deltaEnergyLevel2=epdParam.deltaEnergyLevel2;
  17. zcrRatio=epdParam.zcrRatio;
  18. % ====== Zero adjusted
  19. y = y-mean(y);
  20. %wavplay(y, fs, 'sync');
  21. % ====== Take frames
  22. framedY  = buffer2mex(y, frameSize, overlap);
  23. frameNum = size(framedY, 2); % Number of frames
  24. % ====== Compute log energy
  25. meanFrame = (mean(framedY.^2));
  26. meanFrame(meanFrame==0) = eps;
  27. energy = 10*log10(meanFrame);
  28. time = (1:frameNum)*(frameSize-overlap)/fs;
  29. energyLevel1 = max(energy)+deltaEnergyLevel1;
  30. energyLevel2 = max(energy)+deltaEnergyLevel2;
  31. % ====== Compute zero crossing rate
  32. zcr = zcratemex(framedY);
  33. % ====== Find initial end points according energy level2 (upper level)
  34. voicedIndex = find(energy>=energyLevel2);
  35. zcrThreshold = max(zcr)*zcrRatio;
  36. sound = [];
  37. k = 1;
  38. sound(k).begin = voicedIndex(1);
  39. for i=2:length(voicedIndex)-1,
  40. if voicedIndex(i+1)-voicedIndex(i)>1,
  41. sound(k).end = voicedIndex(i);
  42. sound(k+1).begin = voicedIndex(i+1);
  43. k = k+1;
  44. end
  45. end
  46. sound(k).end = voicedIndex(end);
  47. % ====== Delete short sound clips
  48. index = [];
  49. for i=1:length(sound),
  50. if (sound(i).end-sound(i).begin)<5
  51. index = [index, i];
  52. end
  53. end
  54. %index
  55. %sound(i).end
  56. %sound(i).begin
  57. sound(index) = [];
  58. %minIndex = localmax(-energy); % Find index of local minima
  59. % ====== Expand end points to energy level1 (lower level)
  60. for i=1:length(sound),
  61. head = sound(i).begin;
  62. % while (head-1)>=1 & energy(head-1)>energyLevel1 & ~minIndex(head-1),
  63. while (head-1)>=1 & energy(head-1)>energyLevel1,
  64. head=head-1;
  65. end
  66. sound(i).begin = head;
  67. tail = sound(i).end;
  68. % while (tail+1)<=length(energy) & energy(tail+1)>energyLevel1 & ~minIndex(tail+1),
  69. while (tail+1)<=length(energy) & energy(tail+1)>energyLevel1,
  70. tail=tail+1;
  71. end
  72. sound(i).end = tail;
  73. end
  74. % ====== Expand end points to include high zcr region
  75. for i=1:length(sound),
  76. head = sound(i).begin;
  77. while (head-1)>=1 & zcr(head-1)>zcrThreshold,
  78. head=head-1;
  79. end
  80. sound(i).begin = head;
  81. end
  82. % ====== Delete repeated sound segments
  83. if length(sound) ~=0,
  84. index = [];
  85. for i=1:length(sound)-1,
  86. if sound(i).begin==sound(i+1).begin & sound(i).end==sound(i+1).end,
  87. index=[index, i];
  88. end
  89. end
  90. sound(index) = [];
  91. end;
  92. % ====== Transform sample-point-based index
  93. if length(sound) ~=0,
  94. for i=1:length(sound),
  95. out(i).begin = (sound(i).begin-1)*(frameSize-overlap)+1;
  96. out(i).end   = (sound(i).end)*(frameSize-overlap)+overlap;
  97. end
  98. else
  99.    out = [];
  100. end;
  101. if plotOpt,
  102. subplot(3,1,1);
  103. plot((1:length(y))/fs, y);
  104. axis([-inf inf -1 1]);
  105. ylabel('Amplitude');
  106. title('Wave form');
  107. subplot(3,1,2);
  108. plot(time, energy);
  109. line([min(time), max(time)], energyLevel1*[1 1], 'color', 'c');
  110. line([min(time), max(time)], energyLevel2*[1 1], 'color', 'c');
  111. axis tight
  112. ylabel('Log energy (dB)');
  113. title('Log energy');
  114. subplot(3,1,3);
  115. plot(time, zcr);
  116. line([min(time), max(time)], zcrThreshold*[1 1], 'color', 'c');
  117. axis([-inf inf 0 inf]);
  118. ylabel('ZCR');
  119. title('Zero crossing rate');
  120. % Plot end points
  121. subplot(3,1,1);
  122. yBound = [-1 1];
  123. for i=1:length(sound),
  124. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  125. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  126. end
  127. % Plot end points
  128. subplot(3,1,2);
  129. yBound = [min(energy) max(energy)];
  130. for i=1:length(sound),
  131. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  132. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  133. end
  134. % Plot end points
  135. subplot(3,1,3);
  136. yBound = [0 max(zcr)];
  137. for i=1:length(sound),
  138. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  139. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  140. end
  141. % Play the segmented sound
  142. fprintf('%g sound clips are collected.n', length(sound));
  143. for i=1:length(sound),
  144. head = sound(i).begin*(frameSize-overlap);
  145. tail = min(length(y), sound(i).end*(frameSize-overlap));
  146. thisY = y(head:tail);
  147. fprintf('His return to hear the cutted sound %g:', i);
  148. pause;
  149. fprintf('n');
  150. wavplay(thisY, fs, 'sync');
  151. end
  152. fprintf('n');
  153. end
  154. % ====== Self demo
  155. function selfdemo
  156. epdParam.frameSize = 256;
  157. epdParam.overlap = 86;
  158. epdParam.deltaEnergyLevel1 = -28;
  159. epdParam.deltaEnergyLevel2 = -14;
  160. epdParam.zcrRatio = 0.5;
  161. fs = 8000;
  162. duration = 3;
  163. plotOpt = 1;
  164. %y = recsound('8a.wav', duration, fs);
  165. wavefile='8a.wav';
  166. wavefile='8a.wav';
  167. [y, fs] = wavreadc(wavefile);
  168. out = feval(mfilename, y, fs, plotOpt, epdParam);