ClassCaiYang.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:5k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.IO;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using System.Collections;
  6. /*
  7.       '       迅龙中文分类搜索引擎  v0.6
  8.       '
  9.       '        LGPL  许可发行
  10.       '
  11.       '       宁夏大学  张冬 康彩  zd4004@163.com
  12.       ' 
  13.       '        官网 http://blog.163.com/zd4004/
  14.  */
  15. namespace XunLong.binAutoModelTeam
  16. {
  17.     class ClassCaiYang
  18.     {
  19.         /// <summary>
  20.         /// 目录列表队列
  21.         /// </summary>
  22.         ArrayList LIST = new ArrayList();
  23.         /// <summary>
  24.         /// 文件系统对象
  25.         /// </summary>
  26.         NetHashTableAPI.ClassNHT db = new NetHashTableAPI.ClassNHT();
  27.         
  28.         /// <summary>
  29.         ///  文件保存路径
  30.         /// </summary>
  31.        public  string savePath;
  32.         /// <summary>
  33.         /// 读取各个模版  记录和存储 第一个采样的url 和 模板名称(由原始模板数据MD5生成)
  34.         /// </summary>
  35.         public void Init(string filePath, string savePathX, string k_c_path)
  36.         {
  37.             db.SetClassNHT(filePath, 3145727, k_c_path);
  38.             LIST = db.SearchOneList("http:");
  39.             Console.WriteLine(" 共有 ==>> " + LIST.Count.ToString()+"  条数据");
  40.             savePath = savePathX;
  41.         }
  42.         public void Run()
  43.         {
  44.             int BN = 0;
  45.             if (System.IO.File.Exists(savePath) == true)
  46.             {
  47.                 System.IO.File.Delete(savePath);
  48.             }
  49.             while (LIST.Count > 0)
  50.             {
  51.                 //取出第一个
  52.                 string a_one = LIST[0].ToString();
  53.                 LIST.Remove(a_one);
  54.                 // key = url  val  =  相似度  >12
  55.                 Hashtable TMP_H = new Hashtable();
  56.                
  57.                 //建模的等级 从高到低               
  58.                     for (int i = 0; i < LIST.Count; i++)
  59.                     {
  60.                         int n1 = XunLong.UrlStringLib.ClassUrlString.Url2Url(a_one, LIST[i].ToString());
  61.                         if (n1 >= 12) 
  62.                         {
  63.                             TMP_H.Add(LIST[i],n1);
  64.                         }
  65.                     }
  66.                     for (int u = 50; u >= 12; u--)
  67.                     {
  68.                         ArrayList TMP = new ArrayList();
  69.                         TMP.Clear();
  70.                         foreach (System.Collections.DictionaryEntry de in TMP_H)
  71.                         {
  72.                             if ((int)de.Value >= u)
  73.                             {
  74.                                 TMP.Add(de.Key);
  75.                             }
  76.                         }
  77.                         if (TMP.Count >=64)  //个数大于36  等级最高的
  78.                         {
  79.                             foreach (string b_a in TMP)
  80.                             {
  81.                                 LIST.Remove(b_a);
  82.                                 TMP_H.Remove(b_a);
  83.                             }
  84.                             //写入一条数据
  85.                             putmOLDMODELSOURCEFileData(savePath, a_one);
  86.                             BN = BN + 1;
  87.                             Console.WriteLine("-建模等级->" + u.ToString());
  88.                             Console.WriteLine("-共识别出可模版化数据-> " + BN.ToString() + " 条");
  89.                             goto x_NEXT;       //只取符合条件 等级最高的
  90.                         }
  91.                     }
  92.                     if (LIST.Count % 10 == 0)
  93.                     {
  94.                         Console.Write(" " + LIST.Count.ToString());
  95.                     }
  96.                 x_NEXT: ;
  97.             }
  98.             Console.WriteLine("识别完毕");
  99.             Console.WriteLine("数据保存在: ");
  100.             Console.WriteLine(savePath);
  101.         }
  102.         /// <summary>
  103.         /// 写入已经使用过的一条数据   xxx-->> 相对于设置列表 olgurl
  104.         /// </summary>
  105.         /// <param name="filename">文件名</param>
  106.         /// <param name="data">数据</param>
  107.         /// <param name="isApp">是否追加模式</param>
  108.         public void putmOLDMODELSOURCEFileData(string okPath, string data)
  109.         {
  110.             StreamWriter writer = null;
  111.             try
  112.             {
  113.                 writer = new StreamWriter(okPath, true, System.Text.Encoding.GetEncoding("gb2312"));
  114.                 //  writer.Write(data);
  115.                 writer.WriteLine(data);
  116.                 writer.Close();
  117.             }
  118.             catch (IOException e)
  119.             {
  120.                 Console.WriteLine(e.Message);
  121.             }
  122.             finally
  123.             {
  124.                 if (writer != null)
  125.                     writer.Close();
  126.             }
  127.         }
  128.     }
  129. }