midi

开发平台：
Visual C++

ZCR_Noteseprate.m：源码内容
							function notes_steps = ZCR_Noteseprate();
% 打开文件
clc;
[filename,path]=uigetfile('*.wav','please select a wave file'); 
filename=strcat(path,filename);
[y,fs] = wavread(filename);
% 可调参数
sn = 30; % 过零率前加的白噪声的信噪比
wn = 0.5; % 对有毛刺的过零率结果进行的低通滤波器的截止频率
filterOnoff = 1; % 上述低通滤波器的开关
factor = 1; % 对过零率结果进行门限判决，门限设为过零率的均值×factor
% 音频做居中处理
y =  y-sum(y)/length(y);
% 加高斯白噪声
yn = awgn(y, sn);
% 特征提取，过零率、能量、频谱中心
zcr = zeroCrossRate(yn, fs, 10, 10);
% 浊音区间端点检测
threshold = sum(zcr) / length(zcr) * factor; % 门限
data = zcr - threshold;
temp = [data 0].*[0 data];
B = find( temp<0 );
if data(B(1))>data(B(1)-1) % 判断第一个点是高zcr点还是低zcr点
    % 高 第一个点要被舍弃，startpoints从第二点开始，endpoints从第三点开始
    startpoints = B(2:2:end);
    endpoints = B(3:2:end); % 认为第一个端点应该是清音或者静音的开始，后面的端点是清音与浊音交替
else
    % 低 第一个是低zcr点，startpoints从第一点开始，endpoints从第三点开始
    startpoints = B(1:2:end); % 认为第一个端点应该是浊音段的开始，后面的端点是清音与浊音交替
    endpoints = B(2:2:end);
end
% clear yn; clear zcr; clear zcrf; clear data; clear temp; clear B; clear A;
% 除野点：持续时间小于100ms的浊音段，认为是野点，忽略掉
voiceSoundSections = [];
for i=1:length(startpoints)
    if endpoints(i) - 1 - startpoints(i) < 10
        continue;
    else
        voiceSoundSections = [voiceSoundSections [startpoints(i);endpoints(i)]];
    end
end
startpoints = voiceSoundSections(1,:); 
endpoints = voiceSoundSections(2,:)-1;
%---------------
figure;
d=0.01;
x=d:d:d*length(zcr);
plot(x,zcr/max(zcr));
hold on; plot(x,ones(1,length(zcr))*threshold/max(zcr)); title('过零率 & 端点切分结果');
hold on; stem(d*startpoints,ones(1,length(startpoints)),'r');
hold on; stem(d*endpoints,ones(1,length(endpoints)));
%---------------
% 求tempo
tmp1 = startpoints(1:size(startpoints,2)-1);
tmp2 = startpoints(2:end);
tempo = min(tmp2-tmp1);
tempo = min([tempo endpoints(end)-startpoints(end) ] )
% 在每个tempo内，求一个平均绝对音高(Hz)
% pitch = Pitch_Detection_Autocorrelation(y, fs, 30, 20);
%  采用的是能量谱最大值的方法
notes_frequence = [];
samples_in_Frame = fs*10/1000;
for i=2:length(startpoints)
    N = round( (startpoints(i)-startpoints(i-1)) / tempo );
    D=round((startpoints(i)-startpoints(i-1)) / N);
    begin_sample = startpoints(i-1)*samples_in_Frame;
    for j = 1:N
        begin_sample = begin_sample + (j-1)*D*samples_in_Frame;
        end_sample = begin_sample + D * samples_in_Frame -1;
        data = y(begin_sample:end_sample);
        spectral = abs(fft(data));
        spectral = spectral(1:floor(length(spectral)/2) );
        [tmp1,tmp2] = max(spectral);
        fi = fs / length(data);
        fmax = tmp2 * fi;        
        notes_frequence = [notes_frequence fmax];
    end
end
N = round((endpoints(end)-startpoints(end)) / tempo);
D=(startpoints(i)-startpoints(i-1)) / N;
begin_sample = startpoints(i-1)*samples_in_Frame;
for j = 1:N
    begin_sample = begin_sample + (j-1)*D*samples_in_Frame;
    end_sample = begin_sample + D * samples_in_Frame -1;
    data = y(begin_sample:end_sample);
    spectral = abs(fft(data));
    spectral = spectral(1:floor(length(spectral)/2) );
    [tmp1,tmp2] = max(spectral);
    fi = fs / length(data);
    fmax = tmp2 * fi;        
    notes_frequence = [notes_frequence fmax];
end
%  采用的是能量中心的方法，被放弃了
% N = round((endpoints(end)-startpoints(end)) / tempo);
% D=(startpoints(i)-startpoints(i-1)) / N;
% begin_sample = startpoints(i-1)*samples_in_Frame;
% for j = 1:N
%     begin_sample = begin_sample + (j-1)*D*samples_in_Frame;
%     end_sample = begin_sample + D * samples_in_Frame -1;
%     data = y(begin_sample:end_sample);
%     spectral = abs(fft(data));
%     spectral = spectral(1:floor(length(spectral)/2) );
%     E= sum(spectral.^2);
%     if(sum(spectral)==0)
%         centroid = 0;
%     else
%         centroid = sum(spectral.^2.*(1:length(spectral))') / E * 1000 / 20;
%     end
%     notes_frequence = [notes_frequence centroid];
% end
% 转换成相对音高（音阶）
notes_steps = zeros(1,length(notes_frequence)-1);
for i = 2 : length(notes_frequence)
    current_pitch = notes_frequence(i);
    previous_pitch = notes_frequence(i-1);
    notes_steps(i-1) = round(12*(log(current_pitch)-log(previous_pitch))/log(2));
end
disp('输入音频的相对音高结果：')
return;