ClassUrlDB.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:26k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.IO;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using System.Collections;
  6. /*
  7.       '       迅龙中文分类搜索引擎  v0.6
  8.       '
  9.       '        LGPL  许可发行
  10.       '
  11.       '       宁夏大学  张冬 康彩  zd4004@163.com
  12.       ' 
  13.       '        官网 http://blog.163.com/zd4004/
  14.  */
  15. namespace XunLong.UrlDBClassLibrary
  16. {
  17.     /// <summary>
  18.     /// URL 数据库  负责URL 信息的本地存储和读取   输出的URL  一定要是设定好的源站点的
  19.     /// </summary>
  20.    public  class ClassUrlDB
  21.     {
  22.        /// <summary>
  23.        /// 上一次发出的URL  如果和上次发出的相同 则 再选1次 总共持续 1次
  24.        /// </summary>
  25.        private string[] oldUrlIt = new string[24];
  26.        /// <summary>
  27.        /// oldUrlIt 指针
  28.        /// </summary>
  29.        private int oldUrlNum = 0;
  30.        System.Random myRandom = new Random();
  31.        /// <summary>
  32.        /// 源URLS 地址列表
  33.        /// </summary>
  34.        public  ArrayList SourceURLs = new ArrayList();
  35.         public string urlsCacheFile = "";
  36.         public string urlsSourceFile = "";
  37.        /// <summary>
  38.        /// 存放已经爬行过 但是 不需要的URL 
  39.        /// </summary>
  40.        public ArrayList NoNeedDataURL;
  41.        //当前队列 
  42.         private ArrayList urls = new ArrayList();
  43.        /// <summary>
  44.        ///  当前的数据队列 从urls中取出1000 个作为 数据获取缓存  当取光是 重新从urls中获取
  45.        /// </summary>
  46.        private ArrayList URLS_NOW = new ArrayList();
  47.        //
  48.        /// <summary>
  49.        /// 已经处理过的URL MD5  防止重复压入  mMD5
  50.        /// </summary>
  51.        private ArrayList  urlsIOLD = new ArrayList();
  52.        /// <summary>
  53.        /// 如果一个数据被重新压回 3 次 那么 直接取消 不在请求  key = url  val = 压回次数
  54.        /// </summary>
  55.        private Hashtable errUrl = new Hashtable();
  56.         public ClassUrlDB()
  57.         {
  58.             //读取配置
  59.            // XunLong.CongifData.Config.InitConfigData("D:\XunLongRUN\xunlong.kc");
  60.             errUrl.Clear();
  61.         }
  62.        /*
  63.         /// <summary>
  64.         /// 系统内增加一个源URL    
  65.         /// </summary>
  66.         /// <param name="UrlInit"></param>
  67.         public void initc3(string UrlInit)
  68.         {
  69.             ArrayList tmp = new ArrayList();
  70.             tmp.Clear();
  71.             //读出原来的 源URL 文件 
  72.             StreamReader reader = null;
  73.             try
  74.             {
  75.                 reader = new StreamReader(urlsSourceFile);
  76.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  77.                 {
  78.                     if (line.Length > 0)
  79.                     {
  80.                        // if (tmp.Contains(line) == false)
  81.                        // {
  82.                        //     tmp.Add(line);
  83.                        // }
  84.                         try
  85.                         {
  86.                             tmp.Add(line);
  87.                         }
  88.                         catch
  89.                         { }
  90.                     }
  91.                 }
  92.                 reader.Close();
  93.             }
  94.             catch (IOException e)
  95.             {
  96.                 Console.WriteLine(e.Message);
  97.             }
  98.             finally
  99.             {
  100.                 if (reader != null)
  101.                     reader.Close();
  102.             }
  103.             //添加进去 
  104.           //  if (tmp.Contains(UrlInit) == false)
  105.           //  {
  106.           //      tmp.Add(UrlInit);
  107.           //  }
  108.           //  else
  109.           //  {
  110.           //      Console.WriteLine("已经存在 >> " + UrlInit);
  111.           //      return;
  112.           //  }
  113.             try
  114.             {
  115.                 tmp.Add(UrlInit);
  116.             }
  117.             catch
  118.             {
  119.                 Console.WriteLine("已经存在 >> " + UrlInit);
  120.                 return;
  121.             }
  122.             //写入文件
  123.             StreamWriter writer = null;
  124.             try
  125.             {
  126.                 writer = new StreamWriter(urlsSourceFile);
  127.                 foreach (string a in tmp)
  128.                 {
  129.                     writer.WriteLine(a);
  130.                 }
  131.                 writer.Close();
  132.                 Console.WriteLine("写入成功 >> " + UrlInit);
  133.                 return;
  134.             }
  135.             catch (IOException e)
  136.             {
  137.                 Console.WriteLine(e.Message);
  138.             }
  139.             finally
  140.             {
  141.                 if (writer != null)
  142.                     writer.Close();
  143.             }
  144.             //写入 URL缓存文件
  145.         }
  146.        */
  147.         /// <summary>
  148.         /// 清除整个Urls队列   URL缓存文件
  149.         /// </summary>
  150.         public void ClearUrls()
  151.         {
  152.             //删除文件   URL缓存文件
  153.             if (System.IO.File.Exists(urlsCacheFile) == true)
  154.             {
  155.                 System.IO.File.Delete(urlsCacheFile);
  156.                 System.IO.File.CreateText(urlsCacheFile);
  157.             }
  158.             Console.WriteLine("Url缓存清除成功 >>  !");
  159.         }
  160.         /// <summary>
  161.         /// 开始服务 从文件读入未完成的数据
  162.         /// </summary>
  163.         public void StartUrls()
  164.         {
  165.             //StartFirstUrls();
  166.             //urls.Clear();
  167.             //读出原来的 源URL 文件 
  168.             StreamReader reader = null;
  169.             int iUrlNum = 0;
  170.             Console.WriteLine("-> 0");
  171.             try
  172.             {
  173.                 reader = new StreamReader(urlsCacheFile);
  174.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  175.                 {
  176.                     if (line.Length > 0)
  177.                     {
  178.                        // if (urls.Contains(line) == false & line.IndexOf("//")!=0)
  179.                        // {
  180.                         //    urls.Add(line);
  181.                        // }
  182.                         if (line.IndexOf("//") != 0)
  183.                         {
  184.                             try
  185.                             {
  186.                                 urls.Add(line);
  187.                                 iUrlNum = iUrlNum + 1;
  188.                                 if (iUrlNum % 2000 == 1)
  189.                                 {
  190.                                     Console.Write(" ->" + iUrlNum.ToString());
  191.                                 }
  192.                             }
  193.                             catch
  194.                             { 
  195.                                //URL 重复出现 不进行处理
  196.                             }
  197.                         }
  198.                     }
  199.                 }
  200.                 reader.Close();
  201.             }
  202.             catch (IOException e)
  203.             {
  204.                 Console.WriteLine(e.Message);
  205.             }
  206.             finally
  207.             {
  208.                 if (reader != null)
  209.                     reader.Close();
  210.             }
  211.             Console.WriteLine(" 读取缓存完成 URL条目数量 >>  "+ urls.Count.ToString());
  212.         }
  213.         /// <summary>
  214.         ///    初始化URL数据库  加载源 Url到队列
  215.         /// </summary>
  216.         public void StartFirstUrls()
  217.         {
  218.             urls.Clear();
  219.             SourceURLs.Clear();
  220.             //读出原来的 源URL 文件 
  221.             StreamReader reader = null;
  222.             try
  223.             {
  224.                 reader = new StreamReader(urlsSourceFile);
  225.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  226.                 {
  227.                     if (line.Length > 0)
  228.                     {
  229.                         if (urls.Contains(line) == false & line.IndexOf("//") != 0 & line.IndexOf("http://") >-1 )
  230.                         {
  231.                            
  232.                             urls.Add(line);
  233.                             SourceURLs.Add(line);
  234.                         }
  235.                     }
  236.                 }
  237.                 reader.Close();
  238.             }
  239.             catch (IOException e)
  240.             {
  241.                 Console.WriteLine(e.Message);
  242.             }
  243.             finally
  244.             {
  245.                 if (reader != null)
  246.                     reader.Close();
  247.             }
  248.             //再读取数据缓存中的数据
  249.             StartUrls();
  250.             Console.WriteLine(" 读取缓存完成 >>  共有 "  + urls.Count.ToString() +" 条URL" );
  251.         }
  252.         /// <summary>
  253.         /// 保存Url缓存数据
  254.         /// </summary>
  255.         public void SaveUrlsCache()
  256.         {
  257.             ArrayList xurls = urls; 
  258.             //写入文件
  259.             StreamWriter writer = null;
  260.             try
  261.             {
  262.                 writer = new StreamWriter(urlsCacheFile);
  263.                 foreach (string a in xurls)
  264.                 {
  265.                     writer.WriteLine(a);
  266.                 }
  267.                 writer.Close();
  268.                 Console.WriteLine("写入成功 >> "+xurls.Count.ToString());
  269.                 return;
  270.             }
  271.             catch (IOException e)
  272.             {
  273.                 Console.WriteLine(e.Message);
  274.             }
  275.             finally
  276.             {
  277.                 if (writer != null)
  278.                     writer.Close();
  279.             }
  280.         }
  281.         /// <summary>
  282.         /// 压入一个数据
  283.         /// </summary>
  284.         /// <param name="oneUrl">地址</param>
  285.         public void putOneUrl(string oneUrl)
  286.         {
  287.             /*
  288.             //包含其他协议链接的也不要  从前后查都为4    此种过滤 需要修改
  289.             if ((oneUrl.IndexOf("://") != 4)&(oneUrl.LastIndexOf("://") != 4))
  290.             {
  291.                 return;
  292.             }
  293.             */
  294.             oneUrl = oneUrl.Replace("t", "");
  295.             oneUrl = oneUrl.Replace("r", "");
  296.             oneUrl = oneUrl.Replace("n", "");
  297.             oneUrl = oneUrl.Trim();
  298.             if ((oneUrl.Length > 180) & (oneUrl.Length < 7))
  299.             {
  300.                 return;  //太长 或者太短 都不行
  301.             }
  302.            
  303.             if (oneUrl.IndexOf("</") > -1 | oneUrl.IndexOf("/>") > -1)
  304.             {
  305.                 return;
  306.             }
  307.             if (urls.Contains(oneUrl) == true)
  308.             {
  309.                 return;
  310.             }
  311.             //如果已经处理过 就不要重复压 
  312.             //得到该url的MD5名 
  313.             string oneUrlMD5 = getMD5name(oneUrl);
  314.             if (urlsIOLD.Contains(oneUrlMD5) == true)
  315.             {
  316.                 return;
  317.             }
  318.             //检查Url是否合法
  319.             //1 必须有http头
  320.             //2 不包含 /print
  321.             //3 不含 java sp..
  322.             //4 不含 # 
  323.             string a = "";
  324.             oneUrl = oneUrl.Trim().ToLower();
  325.             a = "/print";
  326.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  327.             {
  328.                 return;
  329.             }
  330.             a = "#";
  331.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  332.             {
  333.                 return;
  334.             }
  335.             a = "javascript:";
  336.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  337.             {
  338.                 return;
  339.             }
  340.             a = " ";
  341.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  342.             {
  343.                 return;
  344.             }
  345.             a = "mailto:";
  346.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  347.             {
  348.                 return;
  349.             }
  350.             a = "http://";
  351.             if (oneUrl.IndexOf(a.ToLower()) == -1)
  352.             {
  353.                 return;
  354.             }
  355.             a = ".css";
  356.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  357.             {
  358.                 return;
  359.             }
  360.             a = ".zip";
  361.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  362.             {
  363.                 return;
  364.             }
  365.             a = ".rar";
  366.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  367.             {
  368.                 return;
  369.             }
  370.             a = ".doc";
  371.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  372.             {
  373.                 return;
  374.             }
  375.             a = ".pdf";
  376.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  377.             {
  378.                 return;
  379.             }
  380.             a = ".ppt";
  381.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  382.             {
  383.                 return;
  384.             }
  385.             a = ".xsl";
  386.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  387.             {
  388.                 return;
  389.             }
  390.             a = ".jpg";
  391.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  392.             {
  393.                 return;
  394.             }
  395.             a = ".png";
  396.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  397.             {
  398.                 return;
  399.             }
  400.             a = ".gif";
  401.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  402.             {
  403.                 return;
  404.             }
  405.             a = ".rmvb";
  406.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  407.             {
  408.                 return;
  409.             }
  410.             a = ".rm";
  411.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  412.             {
  413.                 return;
  414.             }
  415.             a = ".dat";
  416.             if (oneUrl.IndexOf(a.ToLower()) > -1)
  417.             {
  418.                 return;
  419.             }
  420.             try
  421.             {
  422.                 if (urls.Contains(oneUrl) == false)
  423.                 {
  424.                     urls.Add(oneUrl);
  425.                 }
  426.             }
  427.             catch
  428.             { }
  429.         }
  430.        /// <summary>
  431.        /// 错误的数据重新压入 需要清除 已经处理的队列中的纪录
  432.        /// </summary>
  433.        /// <param name="oneUrl"></param>
  434.        public void putOneUrl2(string oneUrl)
  435.        {
  436.            try
  437.            {
  438.                if (errUrl.Contains(oneUrl) == true)
  439.                {
  440.                    int i = (int)errUrl[oneUrl];
  441.                    if (i > 3)
  442.                    {
  443.                        //不再压入  
  444.                        return;
  445.                    }
  446.                    else
  447.                    {
  448.                        errUrl[oneUrl] = i + 1;
  449.                    }
  450.                }
  451.                else
  452.                {
  453.                    errUrl.Add(oneUrl, 1);
  454.                }
  455.                //得到该url的MD5名 
  456.                string oneUrlMD5 = getMD5name(oneUrl);
  457.                if (urlsIOLD.Contains(oneUrlMD5) == true)
  458.                {
  459.                    urlsIOLD.Remove(oneUrlMD5);
  460.                }
  461.                putOneUrl(oneUrl);
  462.            }
  463.            catch
  464.            { }
  465.        }
  466.         /// <summary>
  467.         /// 得到一个地址
  468.         /// </summary>
  469.         public string getOneUrl2()
  470.         {
  471.             //纪录重复获取次数
  472.             int DNum = 0;
  473.             START_D:
  474.             try
  475.             {
  476.               //tt  int xx = urls.Count;
  477.                 int xx = urls.Count;
  478.                 if (xx == 0)
  479.                 {
  480.                     Console.WriteLine("URL库为空!");
  481.                     return "";
  482.                 }
  483.    
  484.                 string a = "";
  485.                 double xxx =myRandom.NextDouble() *  (double)xx;
  486.                 int x = (int)xxx;
  487.                 int xi2 = x;
  488.            
  489.              //tt   int urls_Count =urls.Count;
  490.                 int urls_Count = urls.Count;
  491.                 while (a.Length == 0 & DNum<30)
  492.                 {
  493.                     try
  494.                     {
  495.                     //tt    a = urls[xi2].ToString();
  496.                         a = urls[xi2].ToString();
  497.                         //得到该url的MD5名 
  498.                         string oneUrlMD5 = getMD5name(a);
  499.                         if ((itComAll(a) == true) | (urlsIOLD.Contains(oneUrlMD5) == true))
  500.                         {
  501.                             a = "";
  502.                             DNum = DNum + 1;
  503.                             //  xxx = myRandom.NextDouble() * Double.Parse(xx.ToString());
  504.                             //  x = (int)xxx;
  505.                            // if (xi2 > urls_Count - 3)
  506.                            // {
  507.                            //     break;
  508.                            // }
  509.                            // else
  510.                            // {
  511.                                // xi2 = xi2 + 1;
  512.                                 goto START_D;
  513.                             //}
  514.                         }
  515.                         else
  516.                         {
  517.                             break;
  518.                         }
  519.                     }
  520.                     catch
  521.                     {
  522.                       //  xxx = myRandom.NextDouble() * Double.Parse(xx.ToString());
  523.                        // x = (int)xxx;
  524.                         System.Threading.Thread.Sleep(1);
  525.                         goto START_D;
  526.                         a = "";
  527.                     }
  528.                 }
  529.                 x = xi2;
  530.                 if (a == "")
  531.                 {
  532.                     for (int c = 0; c < urls_Count; c++)
  533.                     {
  534.                         try
  535.                         {
  536.                             a = urls[c].ToString();
  537.                             if (a.Length > 0)
  538.                             {
  539.                                 break;
  540.                             }
  541.                         }
  542.                         catch
  543.                         {
  544.                             
  545.                         }
  546.                     }
  547.                     
  548.                 }
  549.                 if (a == "")
  550.                 {
  551.                     return "";
  552.                 }
  553.                 try
  554.                 {
  555.                     try
  556.                     {
  557.                         urls.Remove(a);
  558.                     }
  559.                     catch
  560.                     { 
  561.                     
  562.                     }
  563.                     //增加到已经处理过的队列
  564.                     //得到该url的MD5名 
  565.                     string oneUrlMD52 = getMD5name(a);
  566.                     if (urlsIOLD.Contains(oneUrlMD52) == false)
  567.                     {
  568.                         urlsIOLD.Add(oneUrlMD52);
  569.                     }
  570.                 }
  571.                 catch
  572.                 { 
  573.                    
  574.                 }
  575.                 Now2oldUrl( a);
  576.                 return a;
  577.             }
  578.             catch
  579.             {
  580.                 return "";
  581.               
  582.             }
  583.         }
  584.         /// <summary>
  585.         /// 得到一个地址
  586.         /// </summary>
  587.         public string getOneUrl()
  588.         {
  589.             try
  590.             {
  591.                 int xx = urls.Count;
  592.                 if (xx == 0)
  593.                 {
  594.                     Console.WriteLine("URL库为空!");
  595.                     return "";
  596.                 }
  597.                 string a = "";
  598.                 double xxx = myRandom.NextDouble() * (double)xx;
  599.                 int x = (int)xxx;
  600.                 try
  601.                 {
  602.                     a = urls[x].ToString();
  603.                 }
  604.                 catch
  605.                 {
  606.                     return "";
  607.                 }
  608.               
  609.                 if (a == "")
  610.                 {  return "";  }
  611.                 try
  612.                 {
  613.                     try
  614.                     {
  615.                         urls.Remove(a);
  616.                     }
  617.                     catch
  618.                     {
  619.                         return "";
  620.                     }
  621.                     //增加到已经处理过的队列
  622.                     //得到该url的MD5名 
  623.                     string oneUrlMD52 = getMD5name(a);
  624.                     try
  625.                     {
  626.                        urlsIOLD.Add(oneUrlMD52);
  627.                        return a;
  628.                     }
  629.                     catch
  630.                     {
  631.                         return "";
  632.                     }
  633.                 }
  634.                 catch
  635.                 {
  636.                     return "";
  637.                 }         
  638.             }
  639.             catch
  640.             {
  641.                 return "";
  642.             }
  643.         }
  644.        /// <summary>
  645.        /// 是否与上次发出的URL在同一起点
  646.        /// </summary>
  647.        /// <param name="nowUrl"></param>
  648.        /// <returns></returns>
  649.        private bool itCom(string nowUrl,string oldTmp)
  650.        {
  651.            if (nowUrl == null)
  652.            {
  653.                return false;
  654.            }
  655.            if (oldTmp == null)
  656.            {
  657.                return false;
  658.            }
  659.            oldTmp =   oldTmp.ToLower();
  660.            nowUrl =   nowUrl.ToLower();
  661.            if ((oldTmp.IndexOf("http://") == -1) | (nowUrl.IndexOf("http://") == -1))
  662.            {
  663.                return false;
  664.            }
  665.            string[] m1 = nowUrl.Split('/');
  666.            string[] m2 = oldTmp.Split('/');
  667.            if ( m1[2] ==m2[2]  )
  668.            {
  669.                return true;
  670.            }
  671.            else
  672.            {
  673.                return false;
  674.            }
  675.        }
  676.        /// <summary>
  677.        /// 判断所有的部分 取得的URL 不同于已经发送过的URL
  678.        /// </summary>
  679.        /// <param name="nowUrl"></param>
  680.        /// <returns></returns>
  681.        private bool itComAll(string nowUrl)
  682.        {
  683.            for (int i = 0; i < oldUrlIt.Length; i++)
  684.            {
  685.                if (itCom(nowUrl, oldUrlIt[i]) == true)
  686.                {
  687.                    return true;
  688.                }          
  689.            }
  690.            return false;
  691.        }
  692.        /// <summary>
  693.        /// 压入当前刚发出过的数组
  694.        /// </summary>
  695.        /// <param name="nowData"></param>
  696.        private void Now2oldUrl(string nowData)
  697.        {
  698.            if (oldUrlNum > oldUrlIt.Length-1)
  699.            {
  700.                oldUrlNum = 0;
  701.                oldUrlIt[oldUrlNum] = nowData;
  702.            }
  703.            else
  704.            {
  705.                oldUrlIt[oldUrlNum] = nowData;
  706.            }
  707.            oldUrlNum = oldUrlNum + 1;
  708.        }
  709.        /// <summary>
  710.        /// 把已经下载成功和已经具有的URL 压入 防止再次发出
  711.        /// </summary>
  712.        /// <param name="a"></param>
  713.        public void AddOldurlsIOLD(ArrayList a )
  714.        {
  715.           
  716.           
  717.            foreach (string x in a)
  718.            {
  719.                string xmd5 = getMD5name(x);
  720.              //  if (urlsIOLD.Contains(xmd5) == false)
  721.              //  {
  722.                try
  723.                {
  724.                    urlsIOLD.Add(xmd5);
  725.                }
  726.                catch
  727.                { }
  728.               // }
  729.            
  730.            }
  731.            Console.WriteLine("压入已取得数据的 "+a.Count.ToString() +" 条URL");
  732.        
  733.        }
  734.        /// <summary>
  735.        /// 得到URL的MD5名
  736.        /// </summary>
  737.        /// <param name="url"></param>
  738.        /// <returns></returns>
  739.        private string getMD5name(string url)
  740.        {
  741.            string strMd5 = System.Web.Security.FormsAuthentication.HashPasswordForStoringInConfigFile(url, "md5");
  742.            return strMd5;
  743.        }
  744.        /// <summary>
  745.        /// 读取已经获得的url  如果是在一般模式下 本命令不执行 只有在直接索引时刻
  746.        /// </summary>
  747.        /// <param name="had_url_path"></param>
  748.        public void Load_Had_Url(string had_url_path)
  749.        {
  750.            StreamReader reader = null;
  751.            try
  752.            {
  753.                reader = new StreamReader(had_url_path, System.Text.Encoding.GetEncoding("gb2312"));
  754.                for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  755.                {
  756.                    if (line != null)
  757.                    {
  758.                        if (line.Length == 0)
  759.                        { }
  760.                        else
  761.                        {
  762.                            string a_md5 = getMD5name(line);
  763.                            try
  764.                            {
  765.                                urlsIOLD.Add(a_md5);
  766.                            }
  767.                            catch
  768.                            {}
  769.                        }
  770.                    }
  771.                }
  772.                reader.Close();
  773.            }
  774.            catch (IOException e)
  775.            {
  776.                Console.WriteLine(e.Message);
  777.            }
  778.            finally
  779.            {
  780.                if (reader != null)
  781.                    reader.Close();
  782.            }
  783.            Console.WriteLine("已取得数据的 " + urlsIOLD.Count.ToString() + " 条");
  784.        
  785.        }
  786.        /// <summary>
  787.        /// 增加1个url 到已经获得的url 列表  如果是在一般模式下 本命令不执行 只有在直接索引时刻
  788.        /// </summary>
  789.        /// <param name="had_url"></param>
  790.        public void add_Had_Url(string path_o, string had_url)
  791.        {
  792.            string a_md5 = getMD5name(had_url);
  793.            try
  794.            {
  795.                urlsIOLD.Add(a_md5);
  796.                StreamWriter writer = null;
  797.                try
  798.                {
  799.                    writer = new StreamWriter(path_o, true, System.Text.Encoding.GetEncoding("gb2312"));
  800.                    writer.WriteLine(had_url);
  801.                    writer.Close();
  802.                }
  803.                catch (IOException e)
  804.                {
  805.                    Console.WriteLine(e.Message);
  806.                }
  807.                finally
  808.                {
  809.                    if (writer != null)
  810.                        writer.Close();
  811.                }
  812.            }
  813.            catch
  814.            { }
  815.        }
  816.        /// <summary>
  817.        /// 清除源地址 监控  
  818.        /// </summary>
  819.        public void ReSetSource()
  820.        {
  821.            foreach (string one_url_1 in SourceURLs)
  822.            {
  823.                string a_md5 = getMD5name(one_url_1);
  824.                try
  825.                {
  826.                    if (urlsIOLD.Contains(a_md5) == true)
  827.                    {
  828.                        urlsIOLD.Remove(a_md5);
  829.                    }
  830.                    if (urls.Contains(one_url_1) == false)
  831.                    {
  832.                        urls.Add(one_url_1);
  833.                    }
  834.                }
  835.                catch
  836.                { }
  837.            
  838.            }
  839.            Console.WriteLine("源地址重新加载——监控关键性页面");
  840.        
  841.        }
  842.     }
  843. }