FormHTMLMODEL.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:20k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.Collections.Generic;
  3. using System.ComponentModel;
  4. using System.Data;
  5. using System.Drawing;
  6. using System.Text;
  7. using System.Windows.Forms;
  8. using System.Collections;
  9. using System.IO;
  10. using System.Text;
  11. using System.Collections;
  12. using System.Threading;
  13. using System.Net;
  14. /*
  15.       '       迅龙中文分类搜索引擎  v0.6
  16.       '
  17.       '        LGPL  许可发行
  18.       '
  19.       '       宁夏大学  张冬 康彩  zd4004@163.com
  20.       ' 
  21.       '        官网 http://blog.163.com/zd4004/
  22.  */
  23. namespace XunLong.HTMLMODEL.TEST
  24. {
  25.     public partial class FormHTMLMODEL : Form
  26.     {
  27.         /// <summary>
  28.         /// 文件系统对象
  29.         /// </summary>
  30.         private NetHashTableAPI.ClassNHT db = new NetHashTableAPI.ClassNHT();
  31.         XunLong.HtmlClassLibrary.ClassHTML myHTML2CLEAR = new XunLong.HtmlClassLibrary.ClassHTML();
  32.         public string PathIT = "";
  33.         public FormHTMLMODEL(String DPATH)
  34.         {
  35.             InitializeComponent();
  36.             textBox5.Text = DPATH;
  37.             XunLong.CongifData.Config.InitConfigData(DPATH);
  38.         }
  39.         private void button2_Click(object sender, EventArgs e)
  40.         {
  41.             if (textBox4.Text.Trim().Length == 0)
  42.             {
  43.                 return;
  44.             }
  45.             
  46.             textBox9.Text = myHTML2CLEAR.HTML2CLEAR(GetOneHTML(textBox4.Text, "gb2312"), textBox4.Text);
  47.             MessageBox.Show("数据读取完成!");
  48.         }
  49.         /// <summary>
  50.         /// 得到一个网页数据
  51.         /// </summary>
  52.         /// <param name="murl"></param>
  53.         /// <returns></returns>
  54.         public string GetOneHTML(string murl, string codeType)
  55.         {
  56.             HttpWebRequest request = (HttpWebRequest)WebRequest.Create(murl);
  57.             codeType = codeType.ToLower();
  58.             request.Timeout = 20000;
  59.             try
  60.             {
  61.                 //下面来看看如何处理HTML页面。首先要做的当然是下载HTML页面,这可以通过C#提供的HttpWebRequest类实现: 
  62.                 // request = (HttpWebRequest)WebRequest.Create(murl);
  63.                 WebResponse response = request.GetResponse();
  64.                 Stream stream = response.GetResponseStream();
  65.                 string buffer = "", line;
  66.                 //接下来我们就从request创建一个stream流。在执行其他处理之前,我们要先确定该文件是二进制文件还是文本文件,不同的文件类型处理方式也不同。下面的代码确定该文件是否为二进制文件。 
  67.                 //。如果是文本文件,首先从stream创建一个StreamReader,然后将文本文件的内容一行一行加入缓冲区。 
  68.                 //  response.ContentType.
  69.                 // Encoding gbx = System.Text.Encoding.GetEncoding("gb2312");
  70.                 //存放当前的应用的字符集
  71.                 string NowCodeSet = "";
  72.                 if (response.ContentType.ToLower().StartsWith("text/"))
  73.                 {
  74.                     //自动检测 UTF8
  75.                     if ((response.ContentType.ToLower().IndexOf("utf-8") > -1) | (response.ContentType.ToLower().IndexOf("UTF-8") > -1))
  76.                     {
  77.                         StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8);
  78.                         NowCodeSet = "utf-8";
  79.                         buffer = "";
  80.                         while ((line = reader.ReadLine()) != null)
  81.                         {
  82.                             buffer += line + "rn";
  83.                         }
  84.                         reader.Close();
  85.                         stream.Close();
  86.                         response.Close();
  87.                         buffer = myHTML2CLEAR.Str2Str(buffer);
  88.                     }
  89.                     else
  90.                     {
  91.                         //自动检测GB2312
  92.                         if ((response.ContentType.ToLower().IndexOf("gb2312") > -1) | (response.ContentType.ToLower().IndexOf("GB2312") > -1))
  93.                         {
  94.                             StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("GB2312"));
  95.                             NowCodeSet = "gb2312";
  96.                             buffer = "";
  97.                             while ((line = reader.ReadLine()) != null)
  98.                             {
  99.                                 buffer += line + "rn";
  100.                             }
  101.                             reader.Close();
  102.                             stream.Close();
  103.                             response.Close();
  104.                         }
  105.                         else
  106.                         {
  107.                             //自动检测 不到时按照默认设置进行
  108.                             if (codeType.ToLower() == "gb2312")
  109.                             {
  110.                                 StreamReader reader = new StreamReader(stream, System.Text.Encoding.GetEncoding("GB2312"));
  111.                                 NowCodeSet = "gb2312";
  112.                                 buffer = "";
  113.                                 while ((line = reader.ReadLine()) != null)
  114.                                 {
  115.                                     buffer += line + "rn";
  116.                                 }
  117.                                 reader.Close();
  118.                                 stream.Close();
  119.                                 response.Close();
  120.                             }
  121.                             else
  122.                             {
  123.                                 StreamReader reader = new StreamReader(stream, System.Text.Encoding.UTF8);
  124.                                 NowCodeSet = "utf-8";
  125.                                 buffer = "";
  126.                                 while ((line = reader.ReadLine()) != null)
  127.                                 {
  128.                                     buffer += line + "rn";
  129.                                 }
  130.                                 reader.Close();
  131.                                 stream.Close();
  132.                                 response.Close();
  133.                                 buffer = myHTML2CLEAR.Str2Str(buffer);
  134.                             }
  135.                         }
  136.                     }
  137.                 }
  138.                 ///字符集为gb2312  而刚应用为utf-8 
  139.                 if ((buffer.ToLower().IndexOf("gb2312") > -1) & (NowCodeSet == "utf-8"))
  140.                 {
  141.                     HttpWebRequest requestX = (HttpWebRequest)WebRequest.Create(murl);
  142.                     WebResponse responseX = requestX.GetResponse();
  143.                     Stream streamX = responseX.GetResponseStream();
  144.                     StreamReader readerX = new StreamReader(streamX, System.Text.Encoding.GetEncoding("GB2312"));
  145.                     buffer = "";
  146.                     while ((line = readerX.ReadLine()) != null)
  147.                     {
  148.                         buffer += line + "rn";
  149.                     }
  150.                     readerX.Close();
  151.                     streamX.Close();
  152.                     responseX.Close();
  153.                 }
  154.                 ///字符集为utf-8 而刚应用为 gb2312 
  155.                 if ((buffer.ToLower().IndexOf("utf-8") > -1) & (NowCodeSet == "gb2312"))
  156.                 {
  157.                     HttpWebRequest requestY = (HttpWebRequest)WebRequest.Create(murl);
  158.                     WebResponse responseY = requestY.GetResponse();
  159.                     Stream streamY = responseY.GetResponseStream();
  160.                     StreamReader readerY = new StreamReader(streamY, System.Text.Encoding.UTF8);
  161.                     buffer = "";
  162.                     while ((line = readerY.ReadLine()) != null)
  163.                     {
  164.                         buffer += line + "rn";
  165.                     }
  166.                     readerY.Close();
  167.                     streamY.Close();
  168.                     responseY.Close();
  169.                     buffer = myHTML2CLEAR.Str2Str(buffer);
  170.                 }
  171.                 //   string tmm =clearHTMLDB(buffer);
  172.                 string tmm = buffer;
  173.                 if (tmm.Length > 0)
  174.                 {
  175.                     Console.WriteLine("GUrlData : --> " + murl);
  176.                 }
  177.                 return tmm;  //返回经过过滤得数据
  178.             }
  179.             catch
  180.             {
  181.                 request.Abort();
  182.                 Console.WriteLine("Err : --> " + murl);
  183.                 return "";
  184.             }
  185.         }
  186.         /// <summary>
  187.         /// 读文件
  188.         /// </summary>
  189.         /// <param name="filename"></param>
  190.         /// <returns></returns>
  191.         private string getFileData(string filename)
  192.         {
  193.             StreamReader reader = null;
  194.             string data = string.Empty;
  195.             try
  196.             {
  197.                 reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
  198.                 data = reader.ReadToEnd();
  199.                 reader.Close();
  200.                 return data;
  201.             }
  202.             catch (IOException e)
  203.             {
  204.                 Console.WriteLine(e.Message);
  205.             }
  206.             finally
  207.             {
  208.                 if (reader != null)
  209.                     reader.Close();
  210.             }
  211.             return "";
  212.         }
  213.         /// <summary>
  214.         /// 写文件
  215.         /// </summary>
  216.         /// <param name="filename"></param>
  217.         /// <param name="data"></param>
  218.         private void putFileData(string filename, string data)
  219.         {
  220.             StreamWriter writer = null;
  221.             try
  222.             {
  223.                 writer = new StreamWriter(filename, false, System.Text.Encoding.GetEncoding("gb2312"));
  224.                 writer.Write(data);
  225.                 writer.Close();
  226.             }
  227.             catch (IOException e)
  228.             {
  229.                 Console.WriteLine(e.Message);
  230.             }
  231.             finally
  232.             {
  233.                 if (writer != null)
  234.                     writer.Close();
  235.             }
  236.         }
  237.         private void button1_Click(object sender, EventArgs e)
  238.         {
  239.             openFileDialog1.Title = "选择一个模板文件 ";
  240.             openFileDialog1.Filter = "(*.a)|*.a";
  241.             openFileDialog1.ShowDialog();
  242.             string a = openFileDialog1.FileName;
  243.             if (System.IO.File.Exists(a) == false)
  244.             {
  245.                 return;
  246.             }
  247.             // 检验是否选择了正确的文件
  248.             if (a.IndexOf(".a") == -1 & a.IndexOf(".b") == -1 & a.IndexOf(".c") == -1 & a.IndexOf(".d") == -1 & a.IndexOf(".e") == -1 & a.IndexOf(".t") == -1)
  249.             {
  250.                 MessageBox.Show("文件格式错误!");
  251.                 return;
  252.             
  253.             }
  254.             string x = a.Substring(0, a.Length - 2);
  255.             this.Text = x;
  256.             textBox_a.Text = getFileData(x + ".a");
  257.             textBox_b.Text = getFileData(x + ".b");
  258.             textBox_c.Text = getFileData(x + ".c");
  259.             textBox_d.Text = getFileData(x + ".d");
  260.             textBox_e.Text = getFileData(x + ".e");
  261.             textBox_t.Text = getFileData(x + ".t");
  262.             textBox_h.Text = getFileData(x + ".h");
  263.             textBox_s.Text = getFileData(x + ".s");
  264.             button4.Enabled = true;
  265.         }
  266.         private void button3_Click(object sender, EventArgs e)
  267.         {
  268.            
  269.         }
  270.         /// <summary>
  271.         /// 保存模板
  272.         /// </summary>
  273.         /// <param name="sender"></param>
  274.         /// <param name="e"></param>
  275.         private void button4_Click(object sender, EventArgs e)
  276.         {
  277.             /*
  278.                 //store 存储  index 索引 token 分析 
  279.                     doc.Add(new Field("t", x.t, true, true, true));        //标题
  280.                     doc.Add(new Field("a", x.a, true, true, true));        //数据
  281.                    // doc.Add(new Field("b", x.b, true, false, false));    //类聚模板得到
  282.                     doc.Add(new Field("b", x.b, true, true, true ));    //类聚模板得到
  283.                     doc.Add(new Field("c", x.c, true, false, false));  //相关模板得到
  284.              */
  285.             string x    = this.Text;
  286.             putFileData(x + ".a",textBox_a.Text);
  287.             putFileData(x + ".b",textBox_b.Text);
  288.             putFileData(x + ".c",textBox_c.Text);
  289.             putFileData(x + ".d",textBox_d.Text);
  290.             putFileData(x + ".e",textBox_e.Text);
  291.             putFileData(x + ".t", textBox_t.Text);
  292.             putFileData(x + ".h", textBox_h.Text);
  293.             putFileData(x + ".s", textBox_s.Text);
  294.             MessageBox.Show("模板保存成功!  地址: "+x);
  295.         }
  296.         private void listBox2_SelectedIndexChanged(object sender, EventArgs e)
  297.         {
  298.             textBox13.Text = listBox2.Text;
  299.         }
  300.         private void listBox1_SelectedIndexChanged(object sender, EventArgs e)
  301.         {
  302.             textBox13.Text = listBox1.Text;
  303.         }
  304.         private void button5_Click(object sender, EventArgs e)
  305.         {
  306.            
  307.             
  308.             db.SetClassNHT(textBox14.Text , 3145727,textBox5.Text);
  309.           //  ArrayList n = db.SearchOneList("http");
  310.           //  listBox3.Items.Clear();
  311.             
  312.           //   label13.Text = "系统内共有 " + n.Count.ToString()+"  个文件";
  313.           //  foreach (string a in n)
  314.           //  {
  315.           //      listBox3.Items.Add(a);
  316.             
  317.           //  }
  318.           button5.Enabled = false;
  319.           MessageBox.Show("完成!");
  320.         }
  321.         /*
  322.         private void listBox3_SelectedIndexChanged(object sender, EventArgs e)
  323.         {
  324.             if (listBox3.Text == null)
  325.             {
  326.                 return;
  327.             }
  328.             textBox4.Text = listBox3.Text;
  329.             textBox9.Text = XunLong.HtmlClassLibrary.ClassHTML.HTML2CLEAR( db.Value(listBox3.Text));
  330.         }
  331.         */
  332.         private void FormHTMLMODEL_Load(object sender, EventArgs e)
  333.         {
  334.           
  335.             //  XunLong.CongifData.Config.InitConfigData("D:\XunLongRUN\xunlong.kc");
  336.             textBox14.Text = XunLong.CongifData.Config.SpiderData;
  337.             textBox1.Text = getFileData(XunLong.CongifData.Config.main_s_type);
  338.         //    comboBox1.Items.Clear();
  339.             // 1 得到目录下的文件
  340.          //   DirectoryInfo dir = new DirectoryInfo(XunLong.CongifData.Config.xlType);
  341.               // 2 遍历文件  读取数据压入
  342.         //    foreach (FileInfo f in dir.GetFiles("*.xlt"))   //遍历获得以xml为扩展名的文件   
  343.        //     {
  344.         //        String name = f.Name;         //name为该文件夹下的文件名称,如f.FullName为全名  
  345.         //        name = name.Substring(0, name.Length - 4);
  346.         //        comboBox1.Items.Add(name);
  347.         //    }
  348.         //    comboBox1.Text = "共得到 "+  comboBox1.Items.Count.ToString() + "项数据";
  349.         }
  350.         public void InitOne(string a,string b,string c,string d,string e,string t,string h,string s, string path)
  351.         {
  352.             textBox_a.Text = a;
  353.             textBox_b.Text = b;
  354.             textBox_c.Text = c;
  355.             textBox_d.Text = d;
  356.             textBox_e.Text = e;
  357.             textBox_t.Text = t;
  358.             textBox_h.Text = h;
  359.             textBox_s.Text = s;
  360.             button4.Enabled = true;
  361.             this.Text =path+"\"+ getMD5name(d);
  362.         
  363.         }
  364.         /// <summary>
  365.         /// 得到数据的MD5名
  366.         /// </summary>
  367.         /// <param name="data"></param>
  368.         /// <returns></returns>
  369.         private string getMD5name(string data)
  370.         {
  371.             string strMd5 = System.Web.Security.FormsAuthentication.HashPasswordForStoringInConfigFile(data, "md5");
  372.             return strMd5;
  373.         }
  374.         private void comboBox1_SelectedIndexChanged(object sender, EventArgs e)
  375.         {
  376.             /*
  377.             if (comboBox1.Text.Length > 1)
  378.             {
  379.                 try
  380.                 {
  381.                   //  textBox1.Text = getFileData(XunLong.CongifData.Config.xlType + "\" + comboBox1.Text + ".xlt");
  382.                 }
  383.                 catch
  384.                 { }
  385.             
  386.             }
  387.             */
  388.         }
  389.         private void tabPage4_Click(object sender, EventArgs e)
  390.         {
  391.         }
  392.         private void button6_Click(object sender, EventArgs e)
  393.         {
  394.             string a = textBox_e.Text;
  395.             if(a.IndexOf("n")>0 & a.Length>20)
  396.             {
  397.                 char[] x = {'n','r'};
  398.                 string[] ab = a.Split(x);
  399.                 if (ab[0].Length > 0)
  400.                 {
  401.                     webBrowser1.Navigate(ab[0]);
  402.                 }
  403.                 else
  404.                 {
  405.                     webBrowser1.Navigate(ab[2]);
  406.                 }
  407.             }
  408.             else
  409.             {
  410.               MessageBox.Show("采样列表为空!");
  411.             
  412.             }
  413.            
  414.         }
  415.         private void button3_Click_1(object sender, EventArgs e)
  416.         {
  417.          //   XunLong.HtmlClassLibrary.ClassHTML myHTML2CLEAR = new XunLong.HtmlClassLibrary.ClassHTML();
  418.             string url ="";
  419.           //  if(textBox4.Text.Length > 0)
  420.           //  {
  421.           //      url = textBox4.Text;
  422.           //  }
  423.           //  else
  424.           //  {
  425.           //      url = textBox2.Text;
  426.          //   }
  427.             //建立滤波类 
  428.             XunLong.ModelUserClassLibrary.ClassUserModel m = new XunLong.ModelUserClassLibrary.ClassUserModel();
  429.             //压入测试模板
  430.             m.TestModeL(textBox_a.Text, textBox_b.Text, textBox_c.Text, textBox_d.Text, textBox_e.Text, textBox_t.Text, textBox_h.Text, textBox_s.Text);
  431.             XunLong.PublicClassLibrary.kcSearch k = m.getTagAndData(textBox9.Text);
  432.             Hashtable p = m.modelOneList;
  433.             listBox1.Items.Clear();
  434.             listBox2.Items.Clear();
  435.             /*
  436.             foreach (System.Collections.DictionaryEntry de in k)
  437.             {
  438.                 listBox1.Items.Add(de.Key.ToString() + 't' + de.Value.ToString());
  439.             }
  440.             */
  441.             foreach (System.Collections.DictionaryEntry de in p)
  442.             {
  443.                 listBox2.Items.Add(de.Key.ToString() + 't' + de.Value.ToString());
  444.             }
  445.             /*
  446.             textBox12.Text = k["t"].ToString();
  447.             textBox8.Text = k["a"].ToString();
  448.             textBox7.Text = k["b"].ToString();
  449.             textBox6.Text = k["c"].ToString();
  450.             */
  451.             textBox12.Text = k.t;
  452.             textBox8.Text = k.a;
  453.             textBox7.Text = k.b;
  454.             textBox6.Text = k.c;
  455.             textBox16.Text = k.h;
  456.             textBox3.Text = k.s;
  457.         }
  458.         private void button7_Click(object sender, EventArgs e)
  459.         {
  460.             if (textBox2.Text.Trim().Length == 0)
  461.             {
  462.                 return;
  463.             }
  464.             textBox9.Text = myHTML2CLEAR.HTML2CLEAR(db.Value(textBox2.Text), textBox2.Text);         //;
  465.             MessageBox.Show("数据读取完成!");
  466.         }
  467.         private void button8_Click(object sender, EventArgs e)
  468.         {
  469.             string a = textBox_e.Text;
  470.             if (a.IndexOf("n") > 0 & a.Length > 20)
  471.             {
  472.                 char[] x = { 'n', 'r' };
  473.                 string[] ab = a.Split(x);
  474.                 if (ab[0].Length > 0)
  475.                 {
  476.                    textBox4.Text = ab[0];
  477.                    textBox2.Text = ab[0];
  478.                 }
  479.                 else
  480.                 {
  481.                     textBox4.Text = ab[2];
  482.                     textBox2.Text = ab[2];
  483.                 }
  484.             }
  485.             else
  486.             {
  487.                 MessageBox.Show("采样列表为空!");
  488.             }
  489.         }
  490.         private void button9_Click(object sender, EventArgs e)
  491.         {
  492.             try
  493.             {
  494.                 string x = this.Text;
  495.                System.IO.File.Delete(x + ".a");
  496.                System.IO.File.Delete(x + ".b");
  497.                System.IO.File.Delete(x + ".c");
  498.                System.IO.File.Delete(x + ".d");
  499.                System.IO.File.Delete(x + ".e");
  500.                System.IO.File.Delete(x + ".t");
  501.                System.IO.File.Delete(x + ".h");
  502.                System.IO.File.Delete(x + ".s");
  503.                MessageBox.Show("删除成功!  地址: " + x);
  504.                this.FindForm().Close();
  505.             }
  506.             catch
  507.             {
  508.                 MessageBox.Show("删除出错!");
  509.             }
  510.         }
  511.         private void button10_Click(object sender, EventArgs e)
  512.         {
  513.             putFileData(XunLong.CongifData.Config.main_s_type, textBox1.Text);
  514.         }
  515.     }
  516. }