搜索引擎

开发平台：
C#

ClassXwordClient.cs：源码内容
							using System;
using System.IO;
using System.Collections.Generic;
using System.Collections;
using System.Text;
using System.Net;
using System.Net.Sockets;
using System.Threading;
/*
      '       迅龙中文分类搜索引擎  v0.6
      '
      '        LGPL  许可发行
      '
      '       宁夏大学  张冬 康彩  zd4004@163.com
      ' 
      '        官网 http://blog.163.com/zd4004/
 */
namespace XunLong.xWordNewClient 
  // XunLong.xWordNewClient XwordClassLibraryNew  
{
    /// <summary>
    /// 得到1个分词结果
    /// </summary>
    public class ClassXwordClientNewIt
    {
        public int nowPort ;
        public string hostName;
        TcpClient client;
        NetworkStream ns;
        /// <summary>
        /// 编码 
        /// </summary>
        NewNxuEncoding.CNewNxuEncoding mCode = new NewNxuEncoding.CNewNxuEncoding();
        
        /// <summary>
        /// 分词缓存  缓存曾经分词的数据
        /// </summary>
        private Hashtable Y = new Hashtable();
        public ClassXwordClientNewIt()
        {
            //读取配置
           // XunLong.CongifData.Config.InitConfigData("D:\XunLongRUN\xunlong.kc");
           
            
        }
        ~ClassXwordClientNewIt()
        {
            try
            {
                ns.Close();
                client.Close();
            }
            catch
            { 
            }
        
        }
        /// <summary>
        /// 初始化设定值
        /// </summary>
        public void Init_start()
        {
            Console.WriteLine("初始化-分词接口");
                   Init();
        }
        private void Init()
        {
        XXP:
            Console.WriteLine("分词接口工作在 "+ hostName+ " : "+ nowPort.ToString()+"  []");
            try
            {
                client = new TcpClient(hostName, nowPort);
                client.ReceiveTimeout = 30000; //超时值为300
                client.SendTimeout = 5000;    //超时值为100                 
                ns = client.GetStream();
                Console.WriteLine("->> RE LINK NEWXWORD");
            }
            catch
            {
                try
                {
                    client.Close();
                }
                catch
                { }
                System.Threading.Thread.Sleep(100);
                goto XXP;
            }
        }
        /// <summary>
        /// 得到1个分词结果
        /// </summary>
        /// <param name="dat"></param>
        /// <returns></returns>
        public string GetOneXword(string dat)
        {
           // Console.WriteLine(dat);
            if (Y.Contains(dat) == true)
            {
                return Y[dat].ToString();
            }
            if (dat.Length == 0)
            {
                return "";
            }
            //判断句子中是否含有中文
            if (WordsIScn(dat) == false)
            {
                return dat + "/n";
            
            }
            //含有空格的字符序列
            if (dat.Length - dat.Replace(" ", "").Length > dat.Length/8 + 2)
            {
                goto NEXTTRY2;
            }
            int CC = 0;
            foreach (char oneTT in dat)
            {
                if (oneTT < (char)0 | oneTT > (char)255 )
                {
                    CC = CC + 1;
                }
             
            }
            if (CC <3 | CC <= dat.Length * 0.3)  //其它字符个数很少
            {
                return dat + "/n";
            }
            
        NEXTTRY2:
            try
            {
                           
                Encoding gbx = System.Text.Encoding.GetEncoding("gb2312");
                byte[] byteSend = gbx.GetBytes(dat);
                try
                {
                    ns.Write(byteSend, 0, byteSend.Length);
                }
                catch
                {
                    try
                    {
                        ns.Close();
                        client.Close();
                    }
                    catch
                    { }
 
                    Init();
                    return dat + "/n";
                }
                byte[] bytes = new byte[4096];
                int bytesRead = 0;
                System.Threading.Thread.Sleep(250);
                try
                {
                    bytesRead = ns.Read(bytes, 0, bytes.Length);
                    string d = gbx.GetString(bytes, 0, bytesRead);
                    d = d.Trim();
                    //// 合并名称参数
                    string d2 = comNameTag(d);
                   // if (Y.Contains(dat) == false)
                   // {
                   //     Y.Add(dat,d);
                   // }
                    try
                    {
                        Y.Add(dat, d);
                    }
                    catch
                    { }
                    return d;
                }
                catch
                {
                    try
                    {
                        ns.Close();
                        client.Close();
                    }
                    catch
                    { }
                    Init();
                    return dat + "/n";
                }
            }
            catch
            {
                try
                {
                    ns.Close();
                    client.Close();
                }
                catch
                { }
                Init();
                return dat + "/n";
            }
        }
        /// <summary>
        /// 合并名称参数
        /// </summary>
        /// <param name="data"></param>
        /// <returns></returns>
        private string comNameTag(string data)
        {
            data = data.Replace("   ", " ");
            data = data.Replace("  ", " ");
            string[] myStr = data.Split(' ');
            for (int i = 1; i < myStr.Length; i++)
            {
                if ((myStr[i - 1].IndexOf("/nr") > -1) && (myStr[i].IndexOf("/nr") > -1))
                {
                    string[] my1 = myStr[i - 1].Split('/');
                    string[] my2 = myStr[i].Split('/');
                    myStr[i - 1] = my1[0] + my2[0] + "/nr";
                    myStr[i] = "";
                }
            }
            string myback = "";
            for (int i = 0; i < myStr.Length; i++)
            {
                if (myStr[i].Length > 0)
                {
                    myback = myback + myStr[i] + " ";
                }
            }
            myback = myback.Trim();
            return myback;
        }
        /// <summary>
        /// 初始化已经分词的结果 可以用来加速
        /// </summary>
        /// <param name="okPath">使用缓存服务器的缓存数据</param>
        public void initOKxWord(string okPath)
        {
            Console.WriteLine(" >> 使用缓存服务器的缓存数据 >>");
            //初始化分词缓存
            Y.Clear();
            StreamReader reader = null;
            try
            {
                reader = new StreamReader(okPath, System.Text.Encoding.GetEncoding("gb2312"));
                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
                {
                    if (line != null)
                    {
                        if (line.IndexOf('t') > 0)
                        {
                            string[] mxd = line.Split('t');
                            //解码
                            mxd[0] = mCode.CODE2CN(mxd[0]);
                            mxd[1] = mCode.CODE2CN(mxd[1]);
                            if (Y.Contains(mxd[0]) == false)
                            {
                                Y.Add(mxd[0], mxd[1]);
                            }
                        }
                    }
                }
                reader.Close();
            }
            catch (IOException e)
            {
                Console.WriteLine(e.Message);
            }
            finally
            {
                if (reader != null)
                    reader.Close();
            }
            Console.WriteLine("-共加载分词缓存数据-> " + Y.Count.ToString() + " 条");
        }
        /// <summary>
        ///	判断句子中是否含有中文
        /// </summary>
        /// <param name="words">字符串</param> 
        private  bool WordsIScn(string words)
        {
            string TmmP;
            for (int i = 0; i < words.Length; i++)
            {
                TmmP = words.Substring(i, 1);
                byte[] sarr = System.Text.Encoding.GetEncoding("gb2312").GetBytes(TmmP);
                if (sarr.Length == 2)
                {
                    return true;
                }
            }
            return false;
        }
    }
        
    
}