ClassXwordClient.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:9k
- using System;
- using System.IO;
- using System.Collections.Generic;
- using System.Collections;
- using System.Text;
- using System.Net;
- using System.Net.Sockets;
- using System.Threading;
- /*
- ' 迅龙中文分类搜索引擎 v0.6
- '
- ' LGPL 许可发行
- '
- ' 宁夏大学 张冬 康彩 zd4004@163.com
- '
- ' 官网 http://blog.163.com/zd4004/
- */
- namespace XunLong.xWordNewClient
- // XunLong.xWordNewClient XwordClassLibraryNew
- {
- /// <summary>
- /// 得到1个分词结果
- /// </summary>
- public class ClassXwordClientNewIt
- {
- public int nowPort ;
- public string hostName;
- TcpClient client;
- NetworkStream ns;
- /// <summary>
- /// 编码
- /// </summary>
- NewNxuEncoding.CNewNxuEncoding mCode = new NewNxuEncoding.CNewNxuEncoding();
-
- /// <summary>
- /// 分词缓存 缓存曾经分词的数据
- /// </summary>
- private Hashtable Y = new Hashtable();
- public ClassXwordClientNewIt()
- {
- //读取配置
- // XunLong.CongifData.Config.InitConfigData("D:\XunLongRUN\xunlong.kc");
-
-
- }
- ~ClassXwordClientNewIt()
- {
- try
- {
- ns.Close();
- client.Close();
- }
- catch
- {
- }
-
- }
- /// <summary>
- /// 初始化设定值
- /// </summary>
- public void Init_start()
- {
- Console.WriteLine("初始化-分词接口");
- Init();
- }
- private void Init()
- {
- XXP:
- Console.WriteLine("分词接口工作在 "+ hostName+ " : "+ nowPort.ToString()+" []");
- try
- {
- client = new TcpClient(hostName, nowPort);
- client.ReceiveTimeout = 30000; //超时值为300
- client.SendTimeout = 5000; //超时值为100
- ns = client.GetStream();
- Console.WriteLine("->> RE LINK NEWXWORD");
- }
- catch
- {
- try
- {
- client.Close();
- }
- catch
- { }
- System.Threading.Thread.Sleep(100);
- goto XXP;
- }
- }
- /// <summary>
- /// 得到1个分词结果
- /// </summary>
- /// <param name="dat"></param>
- /// <returns></returns>
- public string GetOneXword(string dat)
- {
- // Console.WriteLine(dat);
- if (Y.Contains(dat) == true)
- {
- return Y[dat].ToString();
- }
- if (dat.Length == 0)
- {
- return "";
- }
- //判断句子中是否含有中文
- if (WordsIScn(dat) == false)
- {
- return dat + "/n";
-
- }
- //含有空格的字符序列
- if (dat.Length - dat.Replace(" ", "").Length > dat.Length/8 + 2)
- {
- goto NEXTTRY2;
- }
- int CC = 0;
- foreach (char oneTT in dat)
- {
- if (oneTT < (char)0 | oneTT > (char)255 )
- {
- CC = CC + 1;
- }
-
- }
- if (CC <3 | CC <= dat.Length * 0.3) //其它字符个数很少
- {
- return dat + "/n";
- }
-
- NEXTTRY2:
- try
- {
-
- Encoding gbx = System.Text.Encoding.GetEncoding("gb2312");
- byte[] byteSend = gbx.GetBytes(dat);
- try
- {
- ns.Write(byteSend, 0, byteSend.Length);
- }
- catch
- {
- try
- {
- ns.Close();
- client.Close();
- }
- catch
- { }
-
- Init();
- return dat + "/n";
- }
- byte[] bytes = new byte[4096];
- int bytesRead = 0;
- System.Threading.Thread.Sleep(250);
- try
- {
- bytesRead = ns.Read(bytes, 0, bytes.Length);
- string d = gbx.GetString(bytes, 0, bytesRead);
- d = d.Trim();
- //// 合并名称参数
- string d2 = comNameTag(d);
- // if (Y.Contains(dat) == false)
- // {
- // Y.Add(dat,d);
- // }
- try
- {
- Y.Add(dat, d);
- }
- catch
- { }
- return d;
- }
- catch
- {
- try
- {
- ns.Close();
- client.Close();
- }
- catch
- { }
- Init();
- return dat + "/n";
- }
- }
- catch
- {
- try
- {
- ns.Close();
- client.Close();
- }
- catch
- { }
- Init();
- return dat + "/n";
- }
- }
- /// <summary>
- /// 合并名称参数
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private string comNameTag(string data)
- {
- data = data.Replace(" ", " ");
- data = data.Replace(" ", " ");
- string[] myStr = data.Split(' ');
- for (int i = 1; i < myStr.Length; i++)
- {
- if ((myStr[i - 1].IndexOf("/nr") > -1) && (myStr[i].IndexOf("/nr") > -1))
- {
- string[] my1 = myStr[i - 1].Split('/');
- string[] my2 = myStr[i].Split('/');
- myStr[i - 1] = my1[0] + my2[0] + "/nr";
- myStr[i] = "";
- }
- }
- string myback = "";
- for (int i = 0; i < myStr.Length; i++)
- {
- if (myStr[i].Length > 0)
- {
- myback = myback + myStr[i] + " ";
- }
- }
- myback = myback.Trim();
- return myback;
- }
- /// <summary>
- /// 初始化已经分词的结果 可以用来加速
- /// </summary>
- /// <param name="okPath">使用缓存服务器的缓存数据</param>
- public void initOKxWord(string okPath)
- {
- Console.WriteLine(" >> 使用缓存服务器的缓存数据 >>");
- //初始化分词缓存
- Y.Clear();
- StreamReader reader = null;
- try
- {
- reader = new StreamReader(okPath, System.Text.Encoding.GetEncoding("gb2312"));
- for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
- {
- if (line != null)
- {
- if (line.IndexOf('t') > 0)
- {
- string[] mxd = line.Split('t');
- //解码
- mxd[0] = mCode.CODE2CN(mxd[0]);
- mxd[1] = mCode.CODE2CN(mxd[1]);
- if (Y.Contains(mxd[0]) == false)
- {
- Y.Add(mxd[0], mxd[1]);
- }
- }
- }
- }
- reader.Close();
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
- Console.WriteLine("-共加载分词缓存数据-> " + Y.Count.ToString() + " 条");
- }
- /// <summary>
- /// 判断句子中是否含有中文
- /// </summary>
- /// <param name="words">字符串</param>
- private bool WordsIScn(string words)
- {
- string TmmP;
- for (int i = 0; i < words.Length; i++)
- {
- TmmP = words.Substring(i, 1);
- byte[] sarr = System.Text.Encoding.GetEncoding("gb2312").GetBytes(TmmP);
- if (sarr.Length == 2)
- {
- return true;
- }
- }
- return false;
- }
- }
-
-
- }