ClassUserModel.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:17k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.IO;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using System.Collections;
  6. using System.Text.RegularExpressions;
  7. /*
  8.       '       迅龙中文分类搜索引擎  v0.6
  9.       '
  10.       '        LGPL  许可发行
  11.       '
  12.       '       宁夏大学  张冬 康彩  zd4004@163.com
  13.       ' 
  14.       '        官网 http://blog.163.com/zd4004/
  15.  */
  16. namespace XunLong.ModelUserClassLibrary
  17. {
  18.     /// <summary>
  19.     /// 模板使用库
  20.     /// </summary>
  21.     public class ClassUserModel
  22.     {
  23.         /// <summary>
  24.         /// 是否是全部显示
  25.         /// </summary>
  26.         private bool isShowmodelOneList =false;
  27.         /// <summary>
  28.         /// 全部列表数据
  29.         /// </summary>
  30.         public  Hashtable modelOneList = new Hashtable();
  31.         /// <summary>
  32.         /// 模板数据集
  33.         /// </summary>
  34.         public  ArrayList n = new ArrayList();
  35.         /// <summary>
  36.         /// 加载模板
  37.         /// </summary>
  38.         /// <param name="dir"></param>
  39.         /// <returns>返回模版数量</returns>
  40.         public int init(string Xdir)
  41.         {
  42.             // 1 得到目录下的文件
  43.             DirectoryInfo dir = new DirectoryInfo(Xdir);
  44.             XunLong.PublicClassLibrary.kcSearch new_it = new XunLong.PublicClassLibrary.kcSearch();
  45.             isShowmodelOneList = false ;
  46.         
  47.             modelOneList.Clear();
  48.             ArrayList n22 = new ArrayList();
  49.             n22.Clear();
  50.             n.Clear();
  51.             //模板数目计数
  52.             int i = 0;
  53.             // 2 遍历文件  读取数据压入
  54.             foreach (FileInfo f in dir.GetFiles("*.a"))   //遍历获得以xml为扩展名的文件   
  55.             {
  56.                 String name = f.FullName;         //name为该文件夹下的文件名称,如f.FullName为全名  
  57.                 name = name.Substring(0, name.Length - 2);
  58.                 new_it.a = getFileData(name + ".a");
  59.                 new_it.b = getFileData(name + ".b");
  60.                 new_it.c = getFileData(name + ".c");
  61.                 new_it.d = getFileData(name + ".d");
  62.                 new_it.e = getFileData(name + ".e");
  63.                 new_it.t = getFileData(name + ".t");
  64.                 new_it.h = getFileData(name + ".h");
  65.                 new_it.s = getFileData(name + ".s");
  66.                 new_it.isSORTIT = false;
  67.                 new_it.isXnum = CharNum(new_it.d);
  68.                 i = i + 1;
  69.                 Console.WriteLine("加载模板 "+name);
  70.                 if (n22.Contains(new_it) == false)
  71.                 {
  72.                     n22.Add(new_it);
  73.                 }
  74.             }
  75.             n = sortIt(n22);
  76.             Console.WriteLine("模板加载完成  总计: "+i.ToString()+" 个模板");
  77.             return i; //返回模版数量
  78.         }
  79.         /// <summary>
  80.         /// 排序  按照含有的*多少
  81.         /// </summary>
  82.         /// <param name="ui"></param>
  83.         /// <returns></returns>
  84.         private ArrayList sortIt(ArrayList ui)
  85.         {
  86.             int ui_Count =ui.Count;
  87.             XunLong.PublicClassLibrary.kcSearch[] One = new XunLong.PublicClassLibrary.kcSearch[ui_Count];
  88.             for (int i = 0; i < ui_Count; i++)
  89.             {
  90.                 One[i] = (XunLong.PublicClassLibrary.kcSearch)ui[i];
  91.             }
  92.             for (int i = 0; i < ui_Count; i++)
  93.             {
  94.                 
  95.                 for (int j = i; j < ui_Count; j++)
  96.                 {
  97.                   
  98.                     if (One[i].isXnum  < One[j].isXnum)
  99.                     {
  100.                         XunLong.PublicClassLibrary.kcSearch OneT = new XunLong.PublicClassLibrary.kcSearch();
  101.                         OneT = One[i];
  102.                         One[i] = One[j];
  103.                         One[j] = OneT;
  104.                     }
  105.                 }
  106.             }
  107.             ArrayList s = new ArrayList();
  108.             s.Clear();
  109.             for (int i = 0; i < ui_Count; i++)
  110.             {
  111.                 s.Add(One[i]);
  112.             }
  113.             return s;
  114.        
  115.         }
  116.         /// <summary>
  117.         /// 得到*个数
  118.         /// </summary>
  119.         /// <param name="dat"></param>
  120.         /// <returns></returns>
  121.         private int CharNum(string dat)
  122.         {
  123.             return dat.Length - dat.Replace("*", "").Length ;
  124.         }
  125.         /// <summary>
  126.         /// 压入测试模板
  127.         /// </summary>
  128.         /// <param name="a"></param>
  129.         /// <param name="b"></param>
  130.         /// <param name="c"></param>
  131.         /// <param name="d"></param>
  132.         public void TestModeL(string a, string b,string c,string d,string e,string t,string h,string s)
  133.         {
  134.             isShowmodelOneList = true;
  135.             n.Clear();
  136.             modelOneList.Clear();
  137.             XunLong.PublicClassLibrary.kcSearch new_it = new XunLong.PublicClassLibrary.kcSearch();
  138.             new_it.a = a;
  139.             new_it.b = b;
  140.             new_it.c = c;
  141.             new_it.d = d;
  142.             new_it.e = e;
  143.             new_it.t = t;
  144.             new_it.h = h;
  145.             new_it.s = s;
  146.             n.Add(new_it);
  147.         }
  148.         /// <summary>
  149.         /// 读文件
  150.         /// </summary>
  151.         /// <param name="filename"></param>
  152.         /// <returns></returns>
  153.         private string getFileData(string filename)
  154.         {
  155.             StreamReader reader = null;
  156.             string data = string.Empty;
  157.             try
  158.             {
  159.                 reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
  160.                 data = reader.ReadToEnd();
  161.                 reader.Close();
  162.                 return data;
  163.             }
  164.             catch (IOException e)
  165.             {
  166.                 Console.WriteLine(e.Message);
  167.             }
  168.             finally
  169.             {
  170.                 if (reader != null)
  171.                     reader.Close();
  172.             }
  173.             return "";
  174.             /*
  175.             StreamReader reader = null;
  176.             string data = string.Empty;
  177.             try
  178.             {
  179.                 reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
  180.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  181.                 {
  182.                     if (data == "")
  183.                     {
  184.                         data = line;
  185.                     }
  186.                     else
  187.                     {
  188.                         data = data + "n" + line;
  189.                     }
  190.                 }
  191.                 reader.Close();
  192.                 return data;
  193.             }
  194.             catch (IOException e)
  195.             {
  196.                 Console.WriteLine(e.Message);
  197.             }
  198.             finally
  199.             {
  200.                 if (reader != null)
  201.                     reader.Close();
  202.             }
  203.             return "";
  204.             */
  205.         }
  206.         /// <summary>
  207.         /// 匹配模板 得到数据
  208.         /// </summary>
  209.         /// <param name="data"></param>
  210.         /// <returns></returns>
  211.         public XunLong.PublicClassLibrary.kcSearch getTagAndData(string data)
  212.         {
  213.             XunLong.PublicClassLibrary.kcSearch myTagData = new XunLong.PublicClassLibrary.kcSearch();
  214.             
  215.            
  216.             string NewData = data;
  217.             // TagAndData myTagData = new TagAndData();
  218.             // 1  查找 是否 可以找到匹配模块  找不到则 返回 cnull 
  219.             // 2  取出匹配数据
  220.             // 3  根据匹配模板得到新的标签和内容数据
  221.             int a1 = 0, a2 = 0, a1Len = 0, a2Len = 0;
  222.             string[] myBack = new string[2000];
  223.             //遍历匹配模版
  224.             //for (int i = 0; i <= TagModelNum; i++)
  225.             //{
  226.             foreach (XunLong.PublicClassLibrary.kcSearch xain in n)
  227.             {
  228.                  data = NewData;
  229.                 int myBackLen = 0;
  230.                 XunLong.PublicClassLibrary.kcSearch aX = (XunLong.PublicClassLibrary.kcSearch)xain;
  231.       
  232.                 if (aX.d.Length < 2)
  233.                 {
  234.                     goto NewFindX;
  235.                 }
  236.                 aX.d = " " + aX.d ;
  237.                 //匹配数据分解
  238.                 string[] myTmpDB = aX.d.Split('*');
  239.                 for (int j = 1; j < myTmpDB.Length; j++)
  240.                 {
  241.                     if (myTmpDB[j].Length > 0)
  242.                     {
  243.                         a1 = data.IndexOf(myTmpDB[j - 1]);
  244.                         a1Len = myTmpDB[j - 1].Length;
  245.                         a2 = data.IndexOf(myTmpDB[j], a1 + a1Len - 1);
  246.                        
  247.                         a2Len = myTmpDB[j].Length;
  248.                         if (a1 == -1 || a2 == -1)
  249.                         {
  250.                             goto NewFindX;
  251.                         }
  252.                         string mybackone = data.Substring(a1 + a1Len, a2 - a1 - a1Len);
  253.                   
  254.                         data = data.Substring(a2, data.Length - a2);
  255.                         if (mybackone != null)
  256.                         {
  257.                             myBack[myBackLen] = GetTXT(mybackone);
  258.                             myBackLen = myBackLen + 1;
  259.                         }
  260.                         else
  261.                         {
  262.                             mybackone = " ";
  263.                             myBack[myBackLen] = GetTXT(mybackone);
  264.                             myBackLen = myBackLen + 1;
  265.                         }
  266.                     }
  267.                 }
  268.                 string axa = aX.a;
  269.                 string axb = aX.b;
  270.                 string axc = aX.c;
  271.                 string axt = aX.t;
  272.                 string axh = aX.h;
  273.                 string axs = aX.s;
  274.                 //替换标签
  275.                 if (isShowmodelOneList == true)
  276.                {
  277.                    modelOneList.Clear();
  278.                 }
  279.                 for (int h = 0; h < myBackLen; h++)
  280.                 {
  281.                     //数据模板
  282.                     axa = axa.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
  283.                     //类聚模板
  284.                     axb = axb.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", CCxmlTag(myBack[h]));
  285.                     //相关模板
  286.                     axc = axc.Replace("<TAGDATA INDEX=" + h.ToString() + "/>",  CCxmlTag(myBack[h]));
  287.                     //标题模板
  288.                     axt = axt.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", CCxmlTag(myBack[h]));
  289.                     //HTML块
  290.                     axh = axh.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
  291.                     //摘要
  292.                     axs = axs.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
  293.                     if (isShowmodelOneList == true)
  294.                     {
  295.                         modelOneList.Add(h, myBack[h]);
  296.                     }
  297.                 }
  298.                 myTagData.a =axa;
  299.                 myTagData.b = axb;
  300.                 myTagData.c = axc;
  301.                 myTagData.t = axt;
  302.                 myTagData.h = axh;
  303.                 myTagData.s = axs;
  304.                 myTagData.isOK = true;
  305.                 /*
  306.                 myTagData.Add("a", axa);
  307.                 myTagData.Add("b", axb);
  308.                 myTagData.Add("c", axc);
  309.                 myTagData.Add("t", axt);
  310.                 //匹配成功标志
  311.                 myTagData.Add("ok", "ok");
  312.                 */
  313.                 return myTagData;
  314.             NewFindX: ;
  315.                 for (int xi = 0; xi < 2000; xi++)
  316.                 { myBack[xi] = ""; }
  317.                 a1 = 0; a2 = 0; a1Len = 0; a2Len = 0;
  318.               //  myTagData.Clear();
  319.             }
  320.             //取出  <ZD**></ZD***>
  321.             return GetNullXZDHtml(data);  //不能匹配模板时自动按照网页数据取得
  322.         }
  323.         /// <summary>
  324.         /// 当全部不能匹配时 主类别为 其它 得到数据
  325.         /// </summary>
  326.         /// <param name="data"></param>
  327.         /// <returns></returns>
  328.         private XunLong.PublicClassLibrary.kcSearch GetNullXZDHtml(string datas)
  329.         {
  330.             XunLong.PublicClassLibrary.kcSearch mm = new XunLong.PublicClassLibrary.kcSearch();
  331.             mm.isOK = false;
  332.             mm.a = "";// ParseHtml(datas);
  333.             mm.b = "";//"<XL主类别>HTM</XL主类别>";
  334.             mm.c = "";
  335.             mm.t = "";// GetTitle(datas);
  336.             mm.h = "";// GetBODY(datas);
  337.             mm.s = "";//GetBODY(datas);
  338.             /*
  339.             mm.Add("t", GetTitle(datas));
  340.             mm.Add("a", ParseHtml(datas));
  341.             mm.Add("b", "<XL主类别>HTM文档</XL主类别>");
  342.             mm.Add("c", "");
  343.             */
  344.             return mm;
  345.             /*
  346.             //" <ZDKC0>" + data1 + "</ZDKC0> <ZDbody>" + data2 + "</ZDbody> ");
  347.             int a1 = datas.IndexOf("<ZDKC0>");
  348.             int a2 = datas.IndexOf("</ZDKC0>");
  349.             int a3 = datas.IndexOf("<ZDbody>");
  350.             int a4 = datas.IndexOf("</ZDbody>");
  351.             string data1 = "";
  352.             string data2 = "";
  353.             try
  354.             {
  355.                 if (a1 > 0 & a2 > 0 & a2 > a1)
  356.                 {
  357.                     data1 = GetTXT(datas.Substring(a1 + 7, a2 - a1 - 7));
  358.                 }
  359.                 if (a3 > 0 & a4 > 0 & a4 > a3)
  360.                 {
  361.                     data2 = GetTXT(datas.Substring(a3 + 8, a4 - a3 - 8));
  362.                 }
  363.                 mm.Add("t",data1);
  364.                 mm.Add("a",data2);
  365.                 mm.Add("b", "<XL主类别>HTM文档</XL主类别>");
  366.                 mm.Add("c", "" );
  367.             }
  368.             catch
  369.             {
  370.                 mm.Add("t","");
  371.                 mm.Add("a", "");
  372.                 mm.Add("b", "");
  373.                 mm.Add("c", "");
  374.             }
  375.             //xkx.title = newHT["title"].ToString();
  376.             //xkx.data = newHT["data"].ToString();
  377.             //xkx.xmlKind = newHT["xmlKind"].ToString();
  378.             //xkx.xmlAbout = newHT["xmlAbout"].ToString();
  379.             return mm;
  380.             */
  381.         }
  382.         /// <summary>
  383.         /// 把读取的文件中的所有的html标记去掉,把&nbsp;替换成空格
  384.         /// </summary>
  385.         /// <param name="html"></param>
  386.         /// <returns></returns>
  387.         private string ParseHtml(string html)
  388.         {
  389.             string temp = Regex.Replace(html, "<[^>]*>", "");
  390.             return temp.Replace("&nbsp;", " ");
  391.         }
  392.         /// <summary>
  393.         /// 获得读取的html文挡的标题
  394.         /// </summary>
  395.         /// <param name="html"></param>
  396.         /// <returns></returns>
  397.         private string GetTitle(string html)
  398.         {
  399.             Match m = Regex.Match(html, "<ZDKC0>(.*)</ZDKC0>");
  400.             if (m.Groups.Count == 2)
  401.                 return m.Groups[1].Value;
  402.             return "此文挡标题未知";
  403.         }
  404.         /// <summary>
  405.         /// 获得读取的html文挡的内容
  406.         /// </summary>
  407.         /// <param name="html"></param>
  408.         /// <returns></returns>
  409.         private string GetBODY(string html)
  410.         {
  411.             Match m = Regex.Match(html, "<ZDbody>(.*)</ZDbody>");
  412.             if (m.Groups.Count == 2)
  413.                 return m.Groups[1].Value;
  414.             return "此文挡内容未知";
  415.         }
  416.         /// <summary>
  417.         /// 把数据变为符合XML规范的数据
  418.         /// </summary>
  419.         /// <param name="data"></param>
  420.         /// <returns></returns>
  421.         private string CCxmlTag(string data)
  422.         {
  423.            // data = data.Replace("/", "/");
  424.             data = data.Replace("<", "〈");
  425.             data = data.Replace(">", "〉");
  426.             // data = data.Replace("", "");
  427.             //  data = data.Replace("", "");
  428.             data = data.Trim();
  429.             return data;
  430.         }
  431.         /// <summary>
  432.         /// 得到中文
  433.         /// </summary>
  434.         /// <param name="data"></param>
  435.         /// <returns></returns>
  436.         public string GetTXT(string data)
  437.         {
  438.             ParseHTML parse = new ParseHTML();
  439.             parse.Source = data;
  440.             string myHD = "";
  441.             int XXX = 0;
  442.             while ((!parse.Eof()) && (XXX <= data.Length))
  443.             {
  444.                 char ch = parse.Parse();
  445.                 if (ch != 0)
  446.                 {
  447.                     myHD = myHD + ch.ToString();
  448.                 }
  449.                 XXX = XXX + 1;
  450.             }
  451.             data = myHD;
  452.             data = data.Replace("b", "");
  453.             data = data.Replace("f", "");
  454.             data = data.Replace("&nbsp;", "");
  455.             data = data.Replace("t", "");
  456.             data = data.Replace("v", "");
  457.             data = data.Replace("   ", "  ");
  458.             data = data.Trim();
  459.             //去掉多余的行
  460.             data = data.Replace("rrr", "rr");
  461.             data = data.Replace("nnn", "nn");
  462.             myHD = data;
  463.             return myHD;
  464.         }
  465.     }
  466. }