danwenben.m
上传用户:dongbao
上传日期:2022-08-09
资源大小:1k
文件大小:2k
源码类别:

matlab例程

开发平台:

Matlab

  1. %提取单词并小写...................................................................
  2. text=textread('lpin.txt','%s');           %提取文本中的单词
  3. stopword=textread('stopword.txt','%s');   %提取stopword中的单词
  4. a=struct('word',[],'count',[]);           %定义一个结构体函数,word存放单词,count存放单词出现的次数
  5. a(1).word=lower(text);                    %将文本中单词小写
  6. %去掉stopword................................................................
  7. k=1;
  8. result=[];
  9. flag=0;
  10. for i=1:length(a(1).word)
  11.     for j=1:length(stopword)
  12.          if strcmp(a(1).word{i},stopword{j})==1
  13.              flag=1;
  14.          end
  15.      end
  16.     if flag==0;
  17.     result{k}=a(1).word{i};
  18.     k=k+1;
  19.     end
  20.     flag=0;
  21. end
  22. %提取词干....................................................................
  23. k=1;s=[];
  24. for i=1:length(result)
  25.    s{k}=porterStemmer(result{i});
  26.    k=k+1;
  27. end
  28. %将与处理的单词写入 lpout.txt.................................................
  29. for i=1:length(s)
  30.     fid=fopen('lpout.txt','a');
  31.     fprintf(fid,'%s n',s{i});
  32.     fclose(fid);
  33. end
  34. %对单词进行计数并提取出来......................................................
  35. m=textread('lpout.txt','%s');
  36. a(2).word=m;
  37. for i=1:length(m)
  38.     a(2).count(i)=1;          %将未计数前的所有单词数置为1
  39. end
  40. for i=1:length(m)                   %如果有相同的单词,则将后面的单词数置0,将本单词数加1
  41.     for j=(i+1):length(m)
  42.         if (strcmp(a(2).word{i},a(2).word{j})==1)&&(a(2).count(i)~=0)
  43.             a(2).count(i)=a(2).count(i)+1;
  44.             a(2).count(j)=0;
  45.         else
  46.             continue;
  47.         end
  48.     end
  49. end
  50. k=1;
  51. for i=1:length(a(2).word)                   %如果单词数非0,则将单词和次数写入结构体数组
  52.     if a(2).count(i)~=0
  53.         a(3).word{k}=a(2).word{i};
  54.         a(3).count(k)=a(2).count(i);
  55.         k=k+1;
  56.     else
  57.         continue;
  58.     end
  59. end
  60.     
  61. %将单词存入word.txt,将次数存入count.txt......................................
  62. for i=1:length(a(3).word)
  63.     fid=fopen('word.txt','a');
  64.     fprintf(fid,'%s n',a(3).word{i});
  65.     fclose(fid);
  66. end
  67.     
  68. for j=1:length(a(3).count) 
  69.     fid=fopen('count.txt','a');
  70.     fprintf(fid,'%d n',a(3).count(j));
  71.     fclose(fid);
  72. end