letter_compare.m
上传用户:zslfgd
上传日期:2010-04-06
资源大小:115k
文件大小:5k
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- %% %%
- %% Prof. Sclaroff's CS585 Image avd Video Processing %%
- %% Project ONE %%
- %% C H A R A C T E R R E C O G N I T I O N %%
- %% %%
- %% by Stanislav Rost %%
- %% ID: 31764117 %%
- %% %%
- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
- function result = letter_compare(ocr_letters, document_letter)
- % LETTER_COMPARE.M
- %
- % function result = letter_compare(ocr_letters, document_letter)
- %
- % Parameters: ocr_letters is an array (1..4) of structures
- % with fields: Mean, Covariance
- % Subscripts of array mean 1=O 2=E 3=A 4=U
- % document_letter fields is a structure which has
- % with fields: Image, EulerNumber, Area, BoundingBox
- %
- % This function compares a letter extracted from the image paragraph
- % to each of the four vowels. Returns the index of the vowel
- % which the letter from the paragraph matches, if any.
- % If no vowels match up, return 0.
- %
- % Assumptions:
- % - the letter form the document/paragraph is properly oriented
- % Calculate the data necessary for the matching
- perImg = bwperim(document_letter.Image);
- perArea = bwarea(perImg);
- compactness = perArea^2/document_letter.Area;
- moments = invmoments(document_letter.Image);
- % Calculate the top/bottom area ratio
- midpoint = floor(document_letter.BoundingBox(4)/2);
- topPart = document_letter.Image(1:midpoint,:);
- bottomPart = document_letter.Image((midpoint+1):end,:);
- partRatio = bwarea(topPart)/bwarea(bottomPart);
- % Final vector for the doucment image which will be used for comparison
- vector = [ moments(1) moments(2) moments(3) compactness partRatio ];
- % Go through all four letters
- for i = 1: 4,
- % Closeness will be Mahalanobis distance without the log term
- closeness(i) = (vector - ocr_letters(i).Mean)*...
- inv(ocr_letters(i).Covariance)*...
- (vector - ocr_letters(i).Mean)';
- % Distance is Mahalanobis distance with the log term
- % The log term equalizes the distance space so that
- % we can compare which one of the four letters
- % is closer to the letter extracted from the paragraph
- distance(i) = closeness(i) + log(det(ocr_letters(i).Covariance));
- end
- % Find one of the vowels with the smallest distance to the document letter
- minimumDistance = distance(1);
- closestMatch = 1;
- for j = 1:4,
- if distance(j)<minimumDistance
- minimumDistance = distance(j);
- closestMatch = j;
- end
- end
- % Now we have to determine if the closest match is noise or not
- variance = [ ocr_letters(closestMatch).Covariance(1,1) ...
- ocr_letters(closestMatch).Covariance(2,2) ...
- ocr_letters(closestMatch).Covariance(3,3) ...
- ocr_letters(closestMatch).Covariance(4,4) ...
- ocr_letters(closestMatch).Covariance(5,5) ];
- % Standard deviation
- std = sqrt(variance);
- matchCloseness = closeness(closestMatch);
- % Use Mahalanobis distance to figure out the allowed range for
- % the closeness within which the letter is accepted as a match
- allowedRange = 6.5*(std) * inv(ocr_letters(closestMatch).Covariance) * (std)';
- % Closeness is within allowed interval
- if (matchCloseness <= allowedRange)
- % ###################### SECONDARY CHECKS ################
- if closestMatch == 1
- % It thinks it is an O
- if (document_letter.EulerNumber ~= 0)
- % Reject
- closestMatch = 0;
- end
- elseif closestMatch == 2
- % It thinks it is an E
- if (document_letter.EulerNumber ~= 0)
- % Reject
- closestMatch = 0;
- end
- elseif closestMatch == 3
- % It thinks it is an A
- % Euler# can be 0 or 1
- if (document_letter.EulerNumber ~= 0) & ...
- (document_letter.EulerNumber ~= -1)
- % Reject
- closestMatch = 0;
- end
- % Ahh, but it might be an S !
- % S's have about equal distribution
- % of weight about their midpoint
- if (partRatio > 0.87 )
- % S rejected because the weight of the top
- % half is about equal to the wight of the
- % bottom half
- closestMatch = 0;
- end
- elseif closestMatch == 4
- % It thinks it is a U
- % Euler# can be 0 or 1
- if (document_letter.EulerNumber ~=1 )
- % Reject
- closestMatch = 0;
- end
- % Also, calculate the Eulers for the top and
- % bottom slices
- midpoint = floor(document_letter.BoundingBox(4)/2);
- topPart = im2bw(document_letter.Image(1:midpoint,:), 0.5);
- bottomPart = im2bw(document_letter.Image(midpoint+1,:), 0.5);
- topFeature = imfeature(double(topPart), 'EulerNumber');
- bottomFeature = imfeature(double(bottomPart), 'EulerNumber');
- topEuler = topFeature(1).EulerNumber;
- bottomEuler = bottomFeature(1).EulerNumber;
- % For u's, the top Euler number is 2, and the bottom
- % part has Euler number of 2
- if (topEuler~=2) | (bottomEuler ~= 2)
- % Can't be an U according to Euler checks
- closestMatch = 0;
- end
- end
- result = closestMatch;
- else
- result = 0;
- end