endpoint_bb.m
上传用户:ay_070428
上传日期:2014-12-04
资源大小:11427k
文件大小:5k
源码类别:

语音合成与识别

开发平台:

Matlab

  1. function out = endpoint(y, fs, plotOpt, epdParam)
  2. % ENDPOINT End point detection
  3. if nargin==0, selfdemo; return; end
  4. if nargin<4,
  5. epdParam.frameSize = 256;
  6. epdParam.overlap = 86;
  7. epdParam.deltaEnergyLevel1 = -28;
  8. epdParam.deltaEnergyLevel2 = -14;
  9. epdParam.zcrRatio = 0.5;
  10. end
  11. if nargin<3, plotOpt=0; end
  12. if nargin<2, fs=8000; end
  13. frameSize=epdParam.frameSize;
  14. overlap=epdParam.overlap;
  15. deltaEnergyLevel1=epdParam.deltaEnergyLevel1;
  16. deltaEnergyLevel2=epdParam.deltaEnergyLevel2;
  17. zcrRatio=epdParam.zcrRatio;
  18. % ====== Zero adjusted
  19. y = y-mean(y);
  20. %wavplay(y, fs, 'sync');
  21. % ====== Take frames
  22. framedY = buffer(y, frameSize, overlap);
  23. frameNum = size(framedY, 2); % Number of frames
  24. % ====== Compute log energy
  25. energy = 10*log10(mean(framedY.^2));
  26. time = (1:frameNum)*(frameSize-overlap)/fs;
  27. energyLevel1 = max(energy)+deltaEnergyLevel1;
  28. energyLevel2 = max(energy)+deltaEnergyLevel2;
  29. % ====== Compute zero crossing rate
  30. zcr = zcrate(framedY);
  31. % ====== Find initial end points according energy level2 (upper level)
  32. voicedIndex = find(energy>=energyLevel2);
  33. zcrRatio = max(zcr)*zcrRatio;
  34. sound = [];
  35. k = 1;
  36. sound(k).begin = voicedIndex(1);
  37. for i=2:length(voicedIndex)-1,
  38. if voicedIndex(i+1)-voicedIndex(i)>1,
  39. sound(k).end = voicedIndex(i);
  40. sound(k+1).begin = voicedIndex(i+1);
  41. k = k+1;
  42. end
  43. end
  44. sound(k).end = voicedIndex(end);
  45. % ====== Delete short sound clips
  46. index = [];
  47. for i=1:length(sound),
  48. if (sound(i).end-sound(i).begin)<5
  49. index = [index, i];
  50. end
  51. end
  52. sound(index) = [];
  53. %minIndex = localmax(-energy); % Find index of local minima
  54. % ====== Expand end points to energy level1 (lower level)
  55. for i=1:length(sound),
  56. head = sound(i).begin;
  57. % while (head-1)>=1 & energy(head-1)>energyLevel1 & ~minIndex(head-1),
  58. while (head-1)>=1 & energy(head-1)>energyLevel1,
  59. head=head-1;
  60. end
  61. sound(i).begin = head;
  62. tail = sound(i).end;
  63. % while (tail+1)<=length(energy) & energy(tail+1)>energyLevel1 & ~minIndex(tail+1),
  64. while (tail+1)<=length(energy) & energy(tail+1)>energyLevel1,
  65. tail=tail+1;
  66. end
  67. sound(i).end = tail;
  68. end
  69. % ====== Expand end points to include high zcr region
  70. for i=1:length(sound),
  71. head = sound(i).begin;
  72. while (head-1)>=1 & zcr(head-1)>zcrRatio,
  73. head=head-1;
  74. end
  75. sound(i).begin = head;
  76. end
  77. % ====== Delete repeated sound segments
  78. index = [];
  79. for i=1:length(sound)-1,
  80. if sound(i).begin==sound(i+1).begin & sound(i).end==sound(i+1).end,
  81. index=[index, i];
  82. end
  83. end
  84. sound(index) = [];
  85. % ====== Transform sample-point-based index
  86. for i=1:length(sound),
  87. out(i).begin = sound(i).begin*(frameSize-overlap);
  88. out(i).end   = min(length(y), sound(i).end*(frameSize-overlap));
  89. end
  90. if plotOpt,
  91. subplot(3,1,1);
  92. plot((1:length(y))/fs, y);
  93. axis([-inf inf -1 1]);
  94. ylabel('Amplitude');
  95. title('Wave form');
  96. subplot(3,1,2);
  97. plot(time, energy);
  98. line([min(time), max(time)], energyLevel1*[1 1], 'color', 'c');
  99. line([min(time), max(time)], energyLevel2*[1 1], 'color', 'c');
  100. axis tight
  101. ylabel('Log energy (dB)');
  102. title('Log energy');
  103. subplot(3,1,3);
  104. plot(time, zcr);
  105. line([min(time), max(time)], zcrRatio*[1 1], 'color', 'c');
  106. axis([-inf inf 0 inf]);
  107. ylabel('ZCR');
  108. title('Zero crossing rate');
  109. % Plot end points
  110. subplot(3,1,1);
  111. yBound = [-1 1];
  112. for i=1:length(sound),
  113. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  114. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  115. end
  116. % Plot end points
  117. subplot(3,1,2);
  118. yBound = [min(energy) max(energy)];
  119. for i=1:length(sound),
  120. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  121. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  122. end
  123. % Plot end points
  124. subplot(3,1,3);
  125. yBound = [0 max(zcr)];
  126. for i=1:length(sound),
  127. line(sound(i).begin*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'r');
  128. line(  sound(i).end*(frameSize-overlap)/fs*[1,1], yBound, 'color', 'g');
  129. end
  130. % Play the segmented sound
  131. fprintf('%g sound clips are collected.n', length(sound));
  132. for i=1:length(sound),
  133. head = sound(i).begin*(frameSize-overlap);
  134. tail = min(length(y), sound(i).end*(frameSize-overlap));
  135. thisY = y(head:tail);
  136. fprintf('His return to hear the cutted sound %g:', i);
  137. pause;
  138. fprintf('n');
  139. wavplay(thisY, fs, 'sync');
  140. end
  141. fprintf('n');
  142. end
  143. % ====== Self demo
  144. function selfdemo
  145. epdParam.frameSize = 256;
  146. epdParam.overlap = 86;
  147. epdParam.deltaEnergyLevel1 = -28;
  148. epdParam.deltaEnergyLevel2 = -14;
  149. epdParam.zcrRatio = 0.5;
  150. fs = 8000;
  151. duration = 3;
  152. plotOpt = 1;
  153. y = recsound('test.wav', duration, fs);
  154. out = feval(mfilename, y, fs, plotOpt, epdParam);