ClassXwordClient.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:9k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.IO;
  3. using System.Collections.Generic;
  4. using System.Collections;
  5. using System.Text;
  6. using System.Net;
  7. using System.Net.Sockets;
  8. using System.Threading;
  9. /*
  10.       '       迅龙中文分类搜索引擎  v0.6
  11.       '
  12.       '        LGPL  许可发行
  13.       '
  14.       '       宁夏大学  张冬 康彩  zd4004@163.com
  15.       ' 
  16.       '        官网 http://blog.163.com/zd4004/
  17.  */
  18. namespace XunLong.xWordNewClient 
  19.   // XunLong.xWordNewClient XwordClassLibraryNew  
  20. {
  21.     /// <summary>
  22.     /// 得到1个分词结果
  23.     /// </summary>
  24.     public class ClassXwordClientNewIt
  25.     {
  26.         public int nowPort ;
  27.         public string hostName;
  28.         TcpClient client;
  29.         NetworkStream ns;
  30.         /// <summary>
  31.         /// 编码 
  32.         /// </summary>
  33.         NewNxuEncoding.CNewNxuEncoding mCode = new NewNxuEncoding.CNewNxuEncoding();
  34.         
  35.         /// <summary>
  36.         /// 分词缓存  缓存曾经分词的数据
  37.         /// </summary>
  38.         private Hashtable Y = new Hashtable();
  39.         public ClassXwordClientNewIt()
  40.         {
  41.             //读取配置
  42.            // XunLong.CongifData.Config.InitConfigData("D:\XunLongRUN\xunlong.kc");
  43.            
  44.             
  45.         }
  46.         ~ClassXwordClientNewIt()
  47.         {
  48.             try
  49.             {
  50.                 ns.Close();
  51.                 client.Close();
  52.             }
  53.             catch
  54.             { 
  55.             }
  56.         
  57.         }
  58.         /// <summary>
  59.         /// 初始化设定值
  60.         /// </summary>
  61.         public void Init_start()
  62.         {
  63.             Console.WriteLine("初始化-分词接口");
  64.                    Init();
  65.         }
  66.         private void Init()
  67.         {
  68.         XXP:
  69.             Console.WriteLine("分词接口工作在 "+ hostName+ " : "+ nowPort.ToString()+"  []");
  70.             try
  71.             {
  72.                 client = new TcpClient(hostName, nowPort);
  73.                 client.ReceiveTimeout = 30000; //超时值为300
  74.                 client.SendTimeout = 5000;    //超时值为100                 
  75.                 ns = client.GetStream();
  76.                 Console.WriteLine("->> RE LINK NEWXWORD");
  77.             }
  78.             catch
  79.             {
  80.                 try
  81.                 {
  82.                     client.Close();
  83.                 }
  84.                 catch
  85.                 { }
  86.                 System.Threading.Thread.Sleep(100);
  87.                 goto XXP;
  88.             }
  89.         }
  90.         /// <summary>
  91.         /// 得到1个分词结果
  92.         /// </summary>
  93.         /// <param name="dat"></param>
  94.         /// <returns></returns>
  95.         public string GetOneXword(string dat)
  96.         {
  97.            // Console.WriteLine(dat);
  98.             if (Y.Contains(dat) == true)
  99.             {
  100.                 return Y[dat].ToString();
  101.             }
  102.             if (dat.Length == 0)
  103.             {
  104.                 return "";
  105.             }
  106.             //判断句子中是否含有中文
  107.             if (WordsIScn(dat) == false)
  108.             {
  109.                 return dat + "/n";
  110.             
  111.             }
  112.             //含有空格的字符序列
  113.             if (dat.Length - dat.Replace(" ", "").Length > dat.Length/8 + 2)
  114.             {
  115.                 goto NEXTTRY2;
  116.             }
  117.             int CC = 0;
  118.             foreach (char oneTT in dat)
  119.             {
  120.                 if (oneTT < (char)0 | oneTT > (char)255 )
  121.                 {
  122.                     CC = CC + 1;
  123.                 }
  124.              
  125.             }
  126.             if (CC <3 | CC <= dat.Length * 0.3)  //其它字符个数很少
  127.             {
  128.                 return dat + "/n";
  129.             }
  130.             
  131.         NEXTTRY2:
  132.             try
  133.             {
  134.                            
  135.                 Encoding gbx = System.Text.Encoding.GetEncoding("gb2312");
  136.                 byte[] byteSend = gbx.GetBytes(dat);
  137.                 try
  138.                 {
  139.                     ns.Write(byteSend, 0, byteSend.Length);
  140.                 }
  141.                 catch
  142.                 {
  143.                     try
  144.                     {
  145.                         ns.Close();
  146.                         client.Close();
  147.                     }
  148.                     catch
  149.                     { }
  150.  
  151.                     Init();
  152.                     return dat + "/n";
  153.                 }
  154.                 byte[] bytes = new byte[4096];
  155.                 int bytesRead = 0;
  156.                 System.Threading.Thread.Sleep(250);
  157.                 try
  158.                 {
  159.                     bytesRead = ns.Read(bytes, 0, bytes.Length);
  160.                     string d = gbx.GetString(bytes, 0, bytesRead);
  161.                     d = d.Trim();
  162.                     //// 合并名称参数
  163.                     string d2 = comNameTag(d);
  164.                    // if (Y.Contains(dat) == false)
  165.                    // {
  166.                    //     Y.Add(dat,d);
  167.                    // }
  168.                     try
  169.                     {
  170.                         Y.Add(dat, d);
  171.                     }
  172.                     catch
  173.                     { }
  174.                     return d;
  175.                 }
  176.                 catch
  177.                 {
  178.                     try
  179.                     {
  180.                         ns.Close();
  181.                         client.Close();
  182.                     }
  183.                     catch
  184.                     { }
  185.                     Init();
  186.                     return dat + "/n";
  187.                 }
  188.             }
  189.             catch
  190.             {
  191.                 try
  192.                 {
  193.                     ns.Close();
  194.                     client.Close();
  195.                 }
  196.                 catch
  197.                 { }
  198.                 Init();
  199.                 return dat + "/n";
  200.             }
  201.         }
  202.         /// <summary>
  203.         /// 合并名称参数
  204.         /// </summary>
  205.         /// <param name="data"></param>
  206.         /// <returns></returns>
  207.         private string comNameTag(string data)
  208.         {
  209.             data = data.Replace("   ", " ");
  210.             data = data.Replace("  ", " ");
  211.             string[] myStr = data.Split(' ');
  212.             for (int i = 1; i < myStr.Length; i++)
  213.             {
  214.                 if ((myStr[i - 1].IndexOf("/nr") > -1) && (myStr[i].IndexOf("/nr") > -1))
  215.                 {
  216.                     string[] my1 = myStr[i - 1].Split('/');
  217.                     string[] my2 = myStr[i].Split('/');
  218.                     myStr[i - 1] = my1[0] + my2[0] + "/nr";
  219.                     myStr[i] = "";
  220.                 }
  221.             }
  222.             string myback = "";
  223.             for (int i = 0; i < myStr.Length; i++)
  224.             {
  225.                 if (myStr[i].Length > 0)
  226.                 {
  227.                     myback = myback + myStr[i] + " ";
  228.                 }
  229.             }
  230.             myback = myback.Trim();
  231.             return myback;
  232.         }
  233.         /// <summary>
  234.         /// 初始化已经分词的结果 可以用来加速
  235.         /// </summary>
  236.         /// <param name="okPath">使用缓存服务器的缓存数据</param>
  237.         public void initOKxWord(string okPath)
  238.         {
  239.             Console.WriteLine(" >> 使用缓存服务器的缓存数据 >>");
  240.             //初始化分词缓存
  241.             Y.Clear();
  242.             StreamReader reader = null;
  243.             try
  244.             {
  245.                 reader = new StreamReader(okPath, System.Text.Encoding.GetEncoding("gb2312"));
  246.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  247.                 {
  248.                     if (line != null)
  249.                     {
  250.                         if (line.IndexOf('t') > 0)
  251.                         {
  252.                             string[] mxd = line.Split('t');
  253.                             //解码
  254.                             mxd[0] = mCode.CODE2CN(mxd[0]);
  255.                             mxd[1] = mCode.CODE2CN(mxd[1]);
  256.                             if (Y.Contains(mxd[0]) == false)
  257.                             {
  258.                                 Y.Add(mxd[0], mxd[1]);
  259.                             }
  260.                         }
  261.                     }
  262.                 }
  263.                 reader.Close();
  264.             }
  265.             catch (IOException e)
  266.             {
  267.                 Console.WriteLine(e.Message);
  268.             }
  269.             finally
  270.             {
  271.                 if (reader != null)
  272.                     reader.Close();
  273.             }
  274.             Console.WriteLine("-共加载分词缓存数据-> " + Y.Count.ToString() + " 条");
  275.         }
  276.         /// <summary>
  277.         /// 判断句子中是否含有中文
  278.         /// </summary>
  279.         /// <param name="words">字符串</param> 
  280.         private  bool WordsIScn(string words)
  281.         {
  282.             string TmmP;
  283.             for (int i = 0; i < words.Length; i++)
  284.             {
  285.                 TmmP = words.Substring(i, 1);
  286.                 byte[] sarr = System.Text.Encoding.GetEncoding("gb2312").GetBytes(TmmP);
  287.                 if (sarr.Length == 2)
  288.                 {
  289.                     return true;
  290.                 }
  291.             }
  292.             return false;
  293.         }
  294.     }
  295.         
  296.     
  297. }