ClassUserModel.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:17k
- using System;
- using System.IO;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
- using System.Text.RegularExpressions;
- /*
- ' 迅龙中文分类搜索引擎 v0.6
- '
- ' LGPL 许可发行
- '
- ' 宁夏大学 张冬 康彩 zd4004@163.com
- '
- ' 官网 http://blog.163.com/zd4004/
- */
- namespace XunLong.ModelUserClassLibrary
- {
- /// <summary>
- /// 模板使用库
- /// </summary>
- public class ClassUserModel
- {
- /// <summary>
- /// 是否是全部显示
- /// </summary>
- private bool isShowmodelOneList =false;
- /// <summary>
- /// 全部列表数据
- /// </summary>
- public Hashtable modelOneList = new Hashtable();
- /// <summary>
- /// 模板数据集
- /// </summary>
- public ArrayList n = new ArrayList();
- /// <summary>
- /// 加载模板
- /// </summary>
- /// <param name="dir"></param>
- /// <returns>返回模版数量</returns>
- public int init(string Xdir)
- {
- // 1 得到目录下的文件
- DirectoryInfo dir = new DirectoryInfo(Xdir);
- XunLong.PublicClassLibrary.kcSearch new_it = new XunLong.PublicClassLibrary.kcSearch();
- isShowmodelOneList = false ;
-
- modelOneList.Clear();
- ArrayList n22 = new ArrayList();
- n22.Clear();
- n.Clear();
- //模板数目计数
- int i = 0;
- // 2 遍历文件 读取数据压入
- foreach (FileInfo f in dir.GetFiles("*.a")) //遍历获得以xml为扩展名的文件
- {
- String name = f.FullName; //name为该文件夹下的文件名称,如f.FullName为全名
- name = name.Substring(0, name.Length - 2);
- new_it.a = getFileData(name + ".a");
- new_it.b = getFileData(name + ".b");
- new_it.c = getFileData(name + ".c");
- new_it.d = getFileData(name + ".d");
- new_it.e = getFileData(name + ".e");
- new_it.t = getFileData(name + ".t");
- new_it.h = getFileData(name + ".h");
- new_it.s = getFileData(name + ".s");
- new_it.isSORTIT = false;
- new_it.isXnum = CharNum(new_it.d);
- i = i + 1;
- Console.WriteLine("加载模板 "+name);
- if (n22.Contains(new_it) == false)
- {
- n22.Add(new_it);
- }
- }
- n = sortIt(n22);
- Console.WriteLine("模板加载完成 总计: "+i.ToString()+" 个模板");
- return i; //返回模版数量
- }
- /// <summary>
- /// 排序 按照含有的*多少
- /// </summary>
- /// <param name="ui"></param>
- /// <returns></returns>
- private ArrayList sortIt(ArrayList ui)
- {
- int ui_Count =ui.Count;
- XunLong.PublicClassLibrary.kcSearch[] One = new XunLong.PublicClassLibrary.kcSearch[ui_Count];
- for (int i = 0; i < ui_Count; i++)
- {
- One[i] = (XunLong.PublicClassLibrary.kcSearch)ui[i];
- }
- for (int i = 0; i < ui_Count; i++)
- {
-
- for (int j = i; j < ui_Count; j++)
- {
-
- if (One[i].isXnum < One[j].isXnum)
- {
- XunLong.PublicClassLibrary.kcSearch OneT = new XunLong.PublicClassLibrary.kcSearch();
- OneT = One[i];
- One[i] = One[j];
- One[j] = OneT;
- }
- }
- }
- ArrayList s = new ArrayList();
- s.Clear();
- for (int i = 0; i < ui_Count; i++)
- {
- s.Add(One[i]);
- }
- return s;
-
- }
- /// <summary>
- /// 得到*个数
- /// </summary>
- /// <param name="dat"></param>
- /// <returns></returns>
- private int CharNum(string dat)
- {
- return dat.Length - dat.Replace("*", "").Length ;
- }
- /// <summary>
- /// 压入测试模板
- /// </summary>
- /// <param name="a"></param>
- /// <param name="b"></param>
- /// <param name="c"></param>
- /// <param name="d"></param>
- public void TestModeL(string a, string b,string c,string d,string e,string t,string h,string s)
- {
- isShowmodelOneList = true;
- n.Clear();
- modelOneList.Clear();
- XunLong.PublicClassLibrary.kcSearch new_it = new XunLong.PublicClassLibrary.kcSearch();
- new_it.a = a;
- new_it.b = b;
- new_it.c = c;
- new_it.d = d;
- new_it.e = e;
- new_it.t = t;
- new_it.h = h;
- new_it.s = s;
- n.Add(new_it);
- }
- /// <summary>
- /// 读文件
- /// </summary>
- /// <param name="filename"></param>
- /// <returns></returns>
- private string getFileData(string filename)
- {
- StreamReader reader = null;
- string data = string.Empty;
- try
- {
- reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
- data = reader.ReadToEnd();
- reader.Close();
- return data;
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
- return "";
- /*
- StreamReader reader = null;
- string data = string.Empty;
- try
- {
- reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
- for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
- {
- if (data == "")
- {
- data = line;
- }
- else
- {
- data = data + "n" + line;
- }
- }
- reader.Close();
- return data;
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
- return "";
- */
- }
- /// <summary>
- /// 匹配模板 得到数据
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- public XunLong.PublicClassLibrary.kcSearch getTagAndData(string data)
- {
- XunLong.PublicClassLibrary.kcSearch myTagData = new XunLong.PublicClassLibrary.kcSearch();
-
-
- string NewData = data;
- // TagAndData myTagData = new TagAndData();
- // 1 查找 是否 可以找到匹配模块 找不到则 返回 cnull
- // 2 取出匹配数据
- // 3 根据匹配模板得到新的标签和内容数据
- int a1 = 0, a2 = 0, a1Len = 0, a2Len = 0;
- string[] myBack = new string[2000];
- //遍历匹配模版
- //for (int i = 0; i <= TagModelNum; i++)
- //{
- foreach (XunLong.PublicClassLibrary.kcSearch xain in n)
- {
- data = NewData;
- int myBackLen = 0;
- XunLong.PublicClassLibrary.kcSearch aX = (XunLong.PublicClassLibrary.kcSearch)xain;
-
- if (aX.d.Length < 2)
- {
- goto NewFindX;
- }
- aX.d = " " + aX.d ;
- //匹配数据分解
- string[] myTmpDB = aX.d.Split('*');
- for (int j = 1; j < myTmpDB.Length; j++)
- {
- if (myTmpDB[j].Length > 0)
- {
- a1 = data.IndexOf(myTmpDB[j - 1]);
- a1Len = myTmpDB[j - 1].Length;
- a2 = data.IndexOf(myTmpDB[j], a1 + a1Len - 1);
-
- a2Len = myTmpDB[j].Length;
- if (a1 == -1 || a2 == -1)
- {
- goto NewFindX;
- }
- string mybackone = data.Substring(a1 + a1Len, a2 - a1 - a1Len);
-
- data = data.Substring(a2, data.Length - a2);
- if (mybackone != null)
- {
- myBack[myBackLen] = GetTXT(mybackone);
- myBackLen = myBackLen + 1;
- }
- else
- {
- mybackone = " ";
- myBack[myBackLen] = GetTXT(mybackone);
- myBackLen = myBackLen + 1;
- }
- }
- }
- string axa = aX.a;
- string axb = aX.b;
- string axc = aX.c;
- string axt = aX.t;
- string axh = aX.h;
- string axs = aX.s;
- //替换标签
- if (isShowmodelOneList == true)
- {
- modelOneList.Clear();
- }
- for (int h = 0; h < myBackLen; h++)
- {
- //数据模板
- axa = axa.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
- //类聚模板
- axb = axb.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", CCxmlTag(myBack[h]));
- //相关模板
- axc = axc.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", CCxmlTag(myBack[h]));
- //标题模板
- axt = axt.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", CCxmlTag(myBack[h]));
- //HTML块
- axh = axh.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
- //摘要
- axs = axs.Replace("<TAGDATA INDEX=" + h.ToString() + "/>", myBack[h]);
- if (isShowmodelOneList == true)
- {
- modelOneList.Add(h, myBack[h]);
- }
- }
- myTagData.a =axa;
- myTagData.b = axb;
- myTagData.c = axc;
- myTagData.t = axt;
- myTagData.h = axh;
- myTagData.s = axs;
- myTagData.isOK = true;
- /*
- myTagData.Add("a", axa);
- myTagData.Add("b", axb);
- myTagData.Add("c", axc);
- myTagData.Add("t", axt);
- //匹配成功标志
- myTagData.Add("ok", "ok");
- */
- return myTagData;
- NewFindX: ;
- for (int xi = 0; xi < 2000; xi++)
- { myBack[xi] = ""; }
- a1 = 0; a2 = 0; a1Len = 0; a2Len = 0;
- // myTagData.Clear();
- }
- //取出 <ZD**></ZD***>
- return GetNullXZDHtml(data); //不能匹配模板时自动按照网页数据取得
- }
- /// <summary>
- /// 当全部不能匹配时 主类别为 其它 得到数据
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private XunLong.PublicClassLibrary.kcSearch GetNullXZDHtml(string datas)
- {
- XunLong.PublicClassLibrary.kcSearch mm = new XunLong.PublicClassLibrary.kcSearch();
- mm.isOK = false;
- mm.a = "";// ParseHtml(datas);
- mm.b = "";//"<XL主类别>HTM</XL主类别>";
- mm.c = "";
- mm.t = "";// GetTitle(datas);
- mm.h = "";// GetBODY(datas);
- mm.s = "";//GetBODY(datas);
- /*
- mm.Add("t", GetTitle(datas));
- mm.Add("a", ParseHtml(datas));
- mm.Add("b", "<XL主类别>HTM文档</XL主类别>");
- mm.Add("c", "");
- */
- return mm;
- /*
- //" <ZDKC0>" + data1 + "</ZDKC0> <ZDbody>" + data2 + "</ZDbody> ");
- int a1 = datas.IndexOf("<ZDKC0>");
- int a2 = datas.IndexOf("</ZDKC0>");
- int a3 = datas.IndexOf("<ZDbody>");
- int a4 = datas.IndexOf("</ZDbody>");
- string data1 = "";
- string data2 = "";
- try
- {
- if (a1 > 0 & a2 > 0 & a2 > a1)
- {
- data1 = GetTXT(datas.Substring(a1 + 7, a2 - a1 - 7));
- }
- if (a3 > 0 & a4 > 0 & a4 > a3)
- {
- data2 = GetTXT(datas.Substring(a3 + 8, a4 - a3 - 8));
- }
- mm.Add("t",data1);
- mm.Add("a",data2);
- mm.Add("b", "<XL主类别>HTM文档</XL主类别>");
- mm.Add("c", "" );
- }
- catch
- {
- mm.Add("t","");
- mm.Add("a", "");
- mm.Add("b", "");
- mm.Add("c", "");
- }
- //xkx.title = newHT["title"].ToString();
- //xkx.data = newHT["data"].ToString();
- //xkx.xmlKind = newHT["xmlKind"].ToString();
- //xkx.xmlAbout = newHT["xmlAbout"].ToString();
- return mm;
- */
- }
- /// <summary>
- /// 把读取的文件中的所有的html标记去掉,把 替换成空格
- /// </summary>
- /// <param name="html"></param>
- /// <returns></returns>
- private string ParseHtml(string html)
- {
- string temp = Regex.Replace(html, "<[^>]*>", "");
- return temp.Replace(" ", " ");
- }
- /// <summary>
- /// 获得读取的html文挡的标题
- /// </summary>
- /// <param name="html"></param>
- /// <returns></returns>
- private string GetTitle(string html)
- {
- Match m = Regex.Match(html, "<ZDKC0>(.*)</ZDKC0>");
- if (m.Groups.Count == 2)
- return m.Groups[1].Value;
- return "此文挡标题未知";
- }
- /// <summary>
- /// 获得读取的html文挡的内容
- /// </summary>
- /// <param name="html"></param>
- /// <returns></returns>
- private string GetBODY(string html)
- {
- Match m = Regex.Match(html, "<ZDbody>(.*)</ZDbody>");
- if (m.Groups.Count == 2)
- return m.Groups[1].Value;
- return "此文挡内容未知";
- }
- /// <summary>
- /// 把数据变为符合XML规范的数据
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private string CCxmlTag(string data)
- {
- // data = data.Replace("/", "/");
- data = data.Replace("<", "〈");
- data = data.Replace(">", "〉");
- // data = data.Replace("", "");
- // data = data.Replace("", "");
- data = data.Trim();
- return data;
- }
- /// <summary>
- /// 得到中文
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- public string GetTXT(string data)
- {
- ParseHTML parse = new ParseHTML();
- parse.Source = data;
- string myHD = "";
- int XXX = 0;
- while ((!parse.Eof()) && (XXX <= data.Length))
- {
- char ch = parse.Parse();
- if (ch != 0)
- {
- myHD = myHD + ch.ToString();
- }
- XXX = XXX + 1;
- }
- data = myHD;
- data = data.Replace("b", "");
- data = data.Replace("f", "");
- data = data.Replace(" ", "");
- data = data.Replace("t", "");
- data = data.Replace("v", "");
- data = data.Replace(" ", " ");
- data = data.Trim();
- //去掉多余的行
- data = data.Replace("rrr", "rr");
- data = data.Replace("nnn", "nn");
- myHD = data;
- return myHD;
- }
- }
- }