KeywordsSort.cs
资源名称:2.rar [点击查看]
上传用户:hshongkong
上传日期:2021-11-20
资源大小:10241k
文件大小:11k
源码类别:

多国语言处理

开发平台:

C#

  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. using System.Text.RegularExpressions;
  5. using CommonLibrary;
  6. namespace ChineseSplitter
  7. {
  8.     /// <summary>
  9.     /// 关键字排序
  10.     /// </summary>
  11.     public class KeywordsSort
  12.     {
  13.         static WordsType myWordsType;
  14.         const string SPLIT_STRING = "  ";
  15.         const char SPLIT_CHAR = '/';
  16.         public List<KeywordsInfo> myKeyList;
  17.         /// <summary>
  18.         /// 构造函数
  19.         /// </summary>
  20.         public KeywordsSort()
  21.         {
  22.             if (myWordsType == null)
  23.             {
  24.                 myWordsType = new WordsType();
  25.             }
  26.             this.myKeyList = new List<KeywordsInfo>();
  27.         }
  28.         /// <summary>
  29.         /// 添加关键字
  30.         /// </summary>
  31.         /// <param name="inputStr">字符串</param>
  32.         public void Append(string inputStr)
  33.         {
  34.             string[] keywords = Regex.Split(inputStr, SPLIT_STRING);
  35.             KeywordsInfo myTempInfo = new KeywordsInfo("");
  36.             int indexCount = 0;
  37.             for (int i = 0; i < keywords.Length; i++)
  38.             {
  39.                 string[] keys = keywords[i].Split(SPLIT_CHAR);
  40.                 string keyword = keys[0].Replace("rn","");
  41.                 string keywordType = "";
  42.                 if (keys.Length >= 2) keywordType = keys[1];
  43.                 if (keywords[i] != String.Empty && myWordsType.GetMark(keywordType) > 0)
  44.                 {
  45.                     myTempInfo.Keywords = keyword;
  46.                     int place = Finder.FindPlace<KeywordsInfo>(this.myKeyList, myTempInfo, KeywordsInfoHashCompare);
  47.                     if (place == -1)
  48.                     {
  49.                         place = Finder.FindInsertPlace<KeywordsInfo>(this.myKeyList, myTempInfo, KeywordsInfoHashCompare);
  50.                         KeywordsInfo myNewInfo = new KeywordsInfo(keyword);
  51.                         this.myKeyList.Insert(place, myNewInfo);
  52.                       
  53.                     }
  54.                     KeywordsInfo myKeyInfo = this.myKeyList[place];
  55.                     myKeyInfo.Count += 1;
  56.                     myKeyInfo.WordsMark += myWordsType.GetMark(keywordType);
  57.                     myKeyInfo.PositionList.Add(indexCount);
  58.                     this.myKeyList[place] = myKeyInfo;
  59.                 }
  60.                 indexCount += keyword.Length;
  61.             }
  62.         }
  63.         /// <summary>
  64.         /// 获得积分最前的关键字
  65.         /// </summary>
  66.         /// <returns>返回值</returns>
  67.         public string GetTopKeywords()
  68.         {
  69.             return GetTopKeywords(10);
  70.         }
  71.         /// <summary>
  72.         /// 获得积分最前的关键字
  73.         /// </summary>
  74.         /// <param name="topCount">关键字数</param>
  75.         /// <returns>返回值</returns>
  76.         public string GetTopKeywords(int topCount)
  77.         {
  78.             
  79.             Sorter.DimidiateSort<KeywordsInfo>(this.myKeyList, KeywordsInfoAllMarkCompare);
  80.             if (topCount > this.myKeyList.Count) topCount = this.myKeyList.Count;
  81.             StringBuilder myStr = new StringBuilder();
  82.             for (int i = 0; i < topCount; i++)
  83.             {
  84.                 myStr.Append(this.myKeyList[i].Keywords + "  ");
  85.             }
  86.             Sorter.DimidiateSort<KeywordsInfo>(this.myKeyList, KeywordsInfoHashCompare);
  87.             return myStr.ToString();
  88.         }
  89.         /// <summary>
  90.         /// KeywordsInfo比较方法,根据Count
  91.         /// </summary>
  92.         /// <param name="value1">值1</param>
  93.         /// <param name="value2">值2</param>
  94.         /// <returns>返回值</returns>
  95.         private int KeywordsInfoCountCompare(KeywordsInfo value1,KeywordsInfo value2)
  96.         {
  97.             if (value1.Count > value2.Count)
  98.             {
  99.                 return 1;
  100.             }
  101.             else if (value1.Count < value2.Count)
  102.             {
  103.                 return -1;
  104.             }
  105.             else 
  106.             {
  107.                 return 0;
  108.             }
  109.         }
  110.         /// <summary>
  111.         /// KeywordsInfo比较方法,根据Hash
  112.         /// </summary>
  113.         /// <param name="value1">值1</param>
  114.         /// <param name="value2">值2</param>
  115.         /// <returns>返回值</returns>
  116.         private int KeywordsInfoHashCompare(KeywordsInfo value1, KeywordsInfo value2)
  117.         {
  118.             if (value1.Keywords.GetHashCode() > value2.Keywords.GetHashCode())
  119.             {
  120.                 return 1;
  121.             }
  122.             else if (value1.Keywords.GetHashCode() < value2.Keywords.GetHashCode())
  123.             {
  124.                 return -1;
  125.             }
  126.             else
  127.             {
  128.                 return 0;
  129.             }
  130.         }
  131.         /// <summary>
  132.         /// KeywordsInfo比较方法,根据AllMark
  133.         /// </summary>
  134.         /// <param name="value1">值1</param>
  135.         /// <param name="value2">值2</param>
  136.         /// <returns>返回值</returns>
  137.         private int KeywordsInfoAllMarkCompare(KeywordsInfo value1, KeywordsInfo value2)
  138.         {
  139.             if (value1.AllMark > value2.AllMark)
  140.             {
  141.                 return 1;
  142.             }
  143.             else if (value1.AllMark < value2.AllMark)
  144.             {
  145.                 return -1;
  146.             }
  147.             else
  148.             {
  149.                 return 0;
  150.             }
  151.         }
  152.     }
  153.     /// <summary>
  154.     /// 关键字信息
  155.     /// </summary>
  156.     public struct KeywordsInfo
  157.     {
  158.         /// <summary>
  159.         /// 关键字
  160.         /// </summary>
  161.         public string Keywords;
  162.         /// <summary>
  163.         /// 出现次数
  164.         /// </summary>
  165.         public short Count;
  166.         /// <summary>
  167.         /// 关键字积分
  168.         /// </summary>
  169.         public float WordsMark;
  170.         /// <summary>
  171.         /// 关键字出现的位置类列
  172.         /// </summary>
  173.         public List<int> PositionList;
  174.         public KeywordsInfo(string _Keywords)
  175.         {
  176.             this.Keywords = _Keywords;
  177.             this.Count = 0;
  178.             this.WordsMark = 0;
  179.             this.PositionList = new List<int>();
  180.         }
  181.         /// <summary>
  182.         /// 总积分
  183.         /// </summary>
  184.         public float AllMark
  185.         {
  186.             get
  187.             {
  188.                 return this.Count * this.WordsMark;
  189.             }
  190.         }
  191.         public byte[] SaveBytes
  192.         {
  193.             get
  194.             {
  195.                 return null;
  196.             }
  197.         }
  198.     }
  199.     /// <summary>
  200.     /// 关键字类别积分
  201.     /// </summary>
  202.     public class WordsType
  203.     {
  204.         Dictionary<string, float> typeList;
  205.         const float DEFAULT_MARK = 0.8f;
  206.         /// <summary>
  207.         /// 构造函数
  208.         /// </summary>
  209.         public WordsType()
  210.         {
  211.             this.typeList = new Dictionary<string, float>();
  212.             this.typeList.Add("", (float)(DEFAULT_MARK));
  213.             this.typeList.Add("Ag", (float)(DEFAULT_MARK * 0.50));
  214.             this.typeList.Add("a", (float)(DEFAULT_MARK * 0.50));
  215.             this.typeList.Add("ad", (float)(DEFAULT_MARK * 0.50));
  216.             this.typeList.Add("an", (float)(DEFAULT_MARK * 0.50));
  217.             this.typeList.Add("b", (float)(DEFAULT_MARK * 0.50));
  218.             this.typeList.Add("c", (float)(DEFAULT_MARK * 0.50));
  219.             this.typeList.Add("Dg", (float)(DEFAULT_MARK * 0.50));
  220.             this.typeList.Add("d", (float)(DEFAULT_MARK * 0.50));
  221.             this.typeList.Add("e", (float)(DEFAULT_MARK * 0.50));
  222.             this.typeList.Add("f", (float)(DEFAULT_MARK * 0.50));
  223.             this.typeList.Add("g", (float)(DEFAULT_MARK * 0.50));
  224.             this.typeList.Add("h", (float)(DEFAULT_MARK * 0.50));
  225.             this.typeList.Add("i", (float)(DEFAULT_MARK * 0.50));
  226.             this.typeList.Add("j", (float)(DEFAULT_MARK * 0.50));
  227.             this.typeList.Add("k", (float)(DEFAULT_MARK * 0.50));
  228.             this.typeList.Add("l", (float)(DEFAULT_MARK * 0.50));
  229.             this.typeList.Add("m", (float)(DEFAULT_MARK * 0.50));
  230.             this.typeList.Add("Ng", (float)(DEFAULT_MARK * 0.50));
  231.             this.typeList.Add("n", (float)(DEFAULT_MARK * 0.50));
  232.             this.typeList.Add("nr", (float)(DEFAULT_MARK * 0.50));
  233.             this.typeList.Add("ns", (float)(DEFAULT_MARK * 0.50));
  234.             this.typeList.Add("nt", (float)(DEFAULT_MARK * 0.50));
  235.             this.typeList.Add("nz", (float)(DEFAULT_MARK * 0.50));
  236.             this.typeList.Add("o", (float)(DEFAULT_MARK * 0.50));
  237.             this.typeList.Add("p", (float)(DEFAULT_MARK * 0.50));
  238.             this.typeList.Add("q", (float)(DEFAULT_MARK * 0.50));
  239.             this.typeList.Add("r", (float)(DEFAULT_MARK * 0.50));
  240.             this.typeList.Add("s", (float)(DEFAULT_MARK * 0.50));
  241.             this.typeList.Add("Tg", (float)(DEFAULT_MARK * 0.50));
  242.             this.typeList.Add("t", (float)(DEFAULT_MARK * 0.50));
  243.             this.typeList.Add("u", (float)(DEFAULT_MARK * 0.50));
  244.             this.typeList.Add("Vg", (float)(DEFAULT_MARK * 0.50));
  245.             this.typeList.Add("v", (float)(DEFAULT_MARK * 0.50));
  246.             this.typeList.Add("vd", (float)(DEFAULT_MARK * 0.50));
  247.             this.typeList.Add("vn", (float)(DEFAULT_MARK * 0.50));
  248.             this.typeList.Add("w", (float)(DEFAULT_MARK * 0));
  249.             this.typeList.Add("x", (float)(DEFAULT_MARK * 0.50));
  250.             this.typeList.Add("y", (float)(DEFAULT_MARK * 0.50));
  251.             this.typeList.Add("z", (float)(DEFAULT_MARK * 0.50));
  252.         }
  253.         /// <summary>
  254.         /// 获取关键字的积分
  255.         /// </summary>
  256.         /// <param name="wordsType">词性类别</param>
  257.         /// <returns>返回积分</returns>
  258.         public float GetMark(string wordsType)
  259.         {
  260.             if (wordsType != null && this.typeList.ContainsKey(wordsType))
  261.             {
  262.                 return this.typeList[wordsType];
  263.             }
  264.             else
  265.             {
  266.                 return DEFAULT_MARK;
  267.             }
  268.         }
  269.     }
  270.     /*
  271.     词性表
  272.      
  273.     Ag  形语素 
  274.     a  形容词 
  275.     ad  副形词 
  276.     an  名形词 
  277.     b  区别词 
  278.     c  连词 
  279.     Dg  副语素 
  280.     d  副词 
  281.     e  叹词 
  282.     f  方位词 
  283.     g  语素 
  284.     h  前接成分 
  285.     i  成语 
  286.     j  简称略语 
  287.     k  后接成分 
  288.     l  习用语 
  289.     m  数词 
  290.     Ng  名语素 
  291.     n  名词 
  292.     nr  人名 
  293.     ns  地名 
  294.     nt  机构团体 
  295.     nz  其他专名 
  296.     o  拟声词 
  297.     p  介词 
  298.     q  量词 
  299.     r  代词 
  300.     s  处所词 
  301.     Tg  时语素 
  302.     t  时间词 
  303.     u  助词 
  304.     Vg  动语素 
  305.     v  动词 
  306.     vd  副动词 
  307.     vn  名动词 
  308.     w  标点符号 
  309.     x  非语素字 
  310.     y  语气词 
  311.     z  状态词
  312.      */
  313.     /*
  314.    public struct WordsTypeMark
  315.    {
  316.        public string WordsType;
  317.        public byte Mark;
  318.        public WordsTypeMark(string _WordsType, byte _Mark)
  319.        {
  320.            this.WordsType = _WordsType;
  321.            this.Mark = _Mark;
  322.        }
  323.    }*/
  324. }