ClassFilesDataBase.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:19k
源码类别:

搜索引擎

开发平台:

C#

  1. using System;
  2. using System.IO;
  3. using System.Collections.Generic;
  4. using System.Text;
  5. using System.Collections;
  6. /*
  7.       '       迅龙中文分类搜索引擎  v0.6
  8.       '
  9.       '        LGPL  许可发行
  10.       '
  11.       '       宁夏大学  张冬 康彩  zd4004@163.com
  12.       ' 
  13.       '        官网 http://blog.163.com/zd4004/
  14.  */
  15. namespace NetHashTableAPI
  16. {
  17.     /// <summary>
  18.     /// 文本数据库 以12M 为1个数据单位  满12M 建立一个索引文件 数据扩展名为 .TDB  索引扩展名为 .IXD
  19.     /// </summary>
  20.    public  class ClassFilesDataBase
  21.     {
  22.        /// <summary>
  23.        /// 索引存储单元格式
  24.        /// </summary>
  25.        struct IndexST
  26.        {
  27.            /// <summary>
  28.            /// 开始位置
  29.            /// </summary>
  30.            public int start;
  31.            /// <summary>
  32.            /// 结束位置
  33.            /// </summary>
  34.            public int end;
  35.            /// <summary>
  36.            /// 文件名
  37.            /// </summary>
  38.            public string file;
  39.            /// <summary>
  40.            /// 数据的MD5
  41.            /// </summary>
  42.            public string MD5;
  43.        
  44.        }
  45.         /// <summary>
  46.         /// 当前数据指针
  47.         /// </summary>
  48.         private int nowP = 0;
  49.         /// <summary>
  50.         /// 缓存区大小
  51.         /// </summary>
  52.         public int nMV = 0;
  53.         /// <summary>
  54.         /// 索引
  55.         /// </summary>
  56.         private ArrayList mIndex = new ArrayList();
  57.        
  58.        ///读取缓存为20个
  59.         private byte[,] mDISK = new  byte[20,1];
  60.        ///设置缓存中的文件名称
  61.        private string[] mDISKINT = new string[20];
  62.        /// <summary>
  63.        /// 缓存中的数据指针
  64.        /// </summary>
  65.        private int pDisk = 0;
  66.        /// <summary>
  67.        /// 编码类
  68.        /// </summary>
  69.        private NewNxuEncoding.CNewNxuEncoding nCode = new NewNxuEncoding.CNewNxuEncoding();
  70.         /// <summary>
  71.         /// 数据缓存区
  72.         /// </summary>
  73.         Byte[] mCache = new byte[100];
  74.         /// <summary>
  75.         /// 数据缓存区  读取时使用
  76.         /// </summary>
  77.         Byte[] mCacheREAD = new byte[100];
  78.        /// <summary>
  79.        /// 所有索引文件  用来找到某个文件所在的位置  
  80.        /// </summary>
  81.        private Hashtable IndexALL = new Hashtable();
  82.         private string dbdir = "";
  83.        /// <summary>
  84.        /// 记录所有的数据的MD5  如果在写入中发现 则再进行匹配操作
  85.        /// </summary>
  86.        private ArrayList IndexALLMD5 = new ArrayList();
  87.        /// <summary>
  88.        /// 读取目录下的所有索引文件 压入 IndexALL  
  89.        /// </summary>
  90.        /// <param name="dir"></param>
  91.        private void initIndex(string dircc)
  92.        { 
  93.           // KEY  =  name   VAL =
  94.            DirectoryInfo dir = new DirectoryInfo(dircc);
  95.            foreach (FileInfo f in dir.GetFiles("*.IXD"))   //遍历获得以xml为扩展名的文件   
  96.            {
  97.                String name = f.FullName;//name为该文件夹下的文件名称,如f.FullName为全名   
  98.                string name2 = f.Name;
  99.                name2 = name2.Substring(0, name2.Length - 4);
  100.                string nData = getFileData(name);
  101.                IndexALLMD5.Clear();
  102.                char[] n = {'r','n'};
  103.                string[] nData1 = nData.Split(n);
  104.                foreach (string a in nData1)
  105.                {
  106.                    if (a.IndexOf('t') > 0  & a.IndexOf("http://")>0)
  107.                    {
  108.                        string[] nData2 = a.Split('t');
  109.                        
  110.                        if (IndexALL.Contains(nData2[2]) == false)
  111.                        {
  112.                            IndexST mTmpp = new IndexST();
  113.                            mTmpp.file = name2;
  114.                            mTmpp.start = Int32.Parse(nData2[0]);
  115.                            mTmpp.end = Int32.Parse(nData2[1]);
  116.                            if (nData2.Length == 3)
  117.                            {
  118.                                mTmpp.MD5 = "";
  119.                            }
  120.                            else
  121.                            {
  122.                                mTmpp.MD5 = nData2[3];
  123.                                try
  124.                                {
  125.                                    IndexALLMD5.Add(nData2[3]);
  126.                                }
  127.                                catch
  128.                                { }
  129.                            }
  130.                            IndexALL.Add(nData2[2], mTmpp);
  131.                        }
  132.                        
  133.                    
  134.                    }
  135.                
  136.                }
  137.            } 
  138.          
  139.        }
  140.         /// <summary>
  141.         /// 设定数据单位  即以多少大小 为1组存放数据
  142.         /// </summary>
  143.         public void SetClassFilesDataBase(string dir, int nM  )
  144.         {
  145.             mCache = new Byte[nM];
  146.             mCacheREAD = new Byte[nM];
  147.             mDISK = new Byte[20, nM];
  148.             pDisk = 0;
  149.             for (int x = 0; x < 20; x++)
  150.             {
  151.                 mDISKINT[x] = "";
  152.             }
  153.             //纪录大小
  154.             nMV = nM;
  155.             mIndex.Clear();
  156.             //设定目录
  157.             dbdir = dir;
  158.             initIndex(dir);
  159.            // 初始化 读取索引
  160.            // IndexALL
  161.         }
  162.         /// <summary>
  163.         /// 增加数据
  164.         /// </summary>
  165.         /// <param name="key"></param>
  166.         /// <param name="val"></param>
  167.         /// <returns></returns>
  168.         public bool add(string key,string val )
  169.         {
  170.             //压入数据
  171.             if (IndexALL.Contains(key) == true)
  172.             {
  173.                 //该数据已经具有 不再压入
  174.                 return true;
  175.             }
  176.             if (key.Length == 0 | val.Length == 0)
  177.             {
  178.                 return false;
  179.             }
  180.             string OneMd5 = getMD5name(val);
  181.             //MD5 相同的 把数据指向同一个位置  对外表现出的读取和写入不变  实际数据只有1份
  182.             if (IndexALLMD5.Contains(OneMd5) == true)
  183.             {
  184.                 foreach (System.Collections.DictionaryEntry a in IndexALL)
  185.                 {
  186.                     IndexST cTmp0 = (IndexST)a.Value;
  187.                     if (cTmp0.MD5 == OneMd5)
  188.                     {
  189.                         //把本记录写入该文件索引记录的后面  即多个文件指向同1个数据 
  190.                         //合成索引 。。。。   起点 + TAB + 终点 + TAB + 名称
  191.                         string aTmp1 = cTmp0.start.ToString() + 't' + cTmp0.end.ToString() + 't' + key + 't' + OneMd5;
  192.                         IndexST cTmp1 = new IndexST();
  193.                         cTmp1.start = cTmp0.start;
  194.                         cTmp1.end = cTmp0.end;
  195.                         cTmp1.file = cTmp0.file;
  196.                         cTmp1.MD5 = cTmp0.MD5;
  197.                         //压入索引队列  提供读取服务
  198.                         IndexALL.Add(key, cTmp1);
  199.                         if (cTmp0.file == "NOW")
  200.                         {
  201.                             //压入队列  用来写入当前文件
  202.                             if (mIndex.Contains(aTmp1) == false)
  203.                             {
  204.                                 mIndex.Add(aTmp1);
  205.                             }
  206.                         }
  207.                         else
  208.                         {
  209.                             //若不是  则写入该数据所在的索引文件 
  210.                             AddPutFileData(dbdir + "\" + cTmp0.file + ".IXD", aTmp1);
  211.                         }
  212.                         return true;
  213.                     }
  214.                 }
  215.             }
  216.             Byte[] dataSV = nCode.dbsCompress(val);
  217.            // String WWdDd = "l";
  218.            // WWdDd  = nCode.dbsDeCompress(dataSV);
  219.            // Console.WriteLine("_>>>> "+WWdDd);
  220.                    //检测长度是否合适
  221.             if (nowP + dataSV.Length >= nMV)
  222.             {
  223.                 //保存数据
  224.                 SaveIt();
  225.                 //复位
  226.                 nowP = 0;
  227.                 //需要写入文件的索引 清空
  228.                 mIndex.Clear();
  229.             }        
  230.          
  231.                 //保存起点
  232.                 int  nowPSatrt = nowP;
  233.                  //压入数据
  234.                 for (int i = 0; i < dataSV.Length; i++)
  235.                 {            
  236.                     mCache[nowP] = dataSV[i];
  237.                     nowP = nowP + 1;
  238.                 }
  239.            
  240.                 //合成索引 。。。。   起点 + TAB + 终点 + TAB + 名称
  241.                 string aTmp = nowPSatrt.ToString() + 't' + dataSV.Length.ToString() + 't' + key + 't' + OneMd5;
  242.                 IndexST cTmp = new IndexST();
  243.                 
  244.                 cTmp.start = nowPSatrt;
  245.                 cTmp.end = dataSV.Length;
  246.                 cTmp.file = "NOW";
  247.                 cTmp.MD5 = OneMd5;
  248.                 //压入索引队列  提供读取服务
  249.                 IndexALL.Add(key, cTmp);
  250.                 //压入队列  用来写入当前文件
  251.                 if (mIndex.Contains(aTmp) == false)
  252.                 {
  253.                     mIndex.Add(aTmp);
  254.                 }
  255.             
  256.                 return true;
  257.         }
  258.         /// <summary>
  259.         /// 得到一个数据的值
  260.         /// </summary>
  261.         /// <param name="KeyVal"></param>
  262.         /// <returns></returns>
  263.         public string Value(string KeyVal)
  264.         {
  265.             // 初始化所有的索引文件
  266.             // 根据名称找到文件的位置
  267.             if (IndexALL.Contains(KeyVal) == false)
  268.             {
  269.                 return "cnull"; //没有数据
  270.             }
  271.           
  272.              IndexST cTmp = (IndexST)IndexALL[KeyVal];
  273.              if (cTmp.file == "NOW")
  274.              {
  275.                 // 直接从缓存区读取
  276.                  return "";
  277.              }
  278.             // 1查找一下 缓存中有无   有时 直接读取
  279.              for (int x = 0; x < 20; x++)
  280.              {
  281.                  if (mDISKINT[x] == cTmp.file)
  282.                  {
  283.                      byte[] myTmp = new byte[cTmp.end + 1];
  284.                      for (int i = cTmp.start; i < cTmp.start + cTmp.end; i++)
  285.                      {
  286.                          myTmp[i - cTmp.start] = mDISK[x,i];                     
  287.                      }
  288.                      string cbackString = nCode.dbsDeCompress(myTmp);
  289.                      return cbackString;
  290.                  }
  291.              }
  292.              //没有时把文件加载到缓存
  293.              FileStream cfs = new FileStream(dbdir + "\" + cTmp.file + ".TDB", FileMode.Open, FileAccess.Read);
  294.              BinaryReader cr = new BinaryReader(cfs);//,System.Text.ASCIIEncoding.ASCII);
  295.              byte[] m = cr.ReadBytes( (int)cfs.Length); 
  296.             
  297.            // cr.Read(mCacheREAD, cTmp.start, cTmp.end);
  298.             byte[] myTmpX = new byte[cTmp.end+1];
  299.             // 数据压入缓存
  300.             if (pDisk == 20)
  301.             {
  302.                 pDisk = 0;  //缓存指针复位
  303.             }
  304.             mDISKINT[pDisk] = cTmp.file;
  305.             //数据压入返回区
  306.             for (int i = cTmp.start; i <cTmp.start+ cTmp.end; i++)
  307.             {
  308.               // myTmpX[i - cTmp.start] = mCacheREAD[i];
  309.                myTmpX[i - cTmp.start] = m[i];
  310.                 //数据压入缓存
  311.               
  312.             }
  313.             for (int i = 0; i < m.Length; i++)
  314.             {
  315.                 mDISK[pDisk, i] = m[i];
  316.             }
  317.              string backString = nCode.dbsDeCompress(myTmpX);
  318.              
  319.               cr.Close();        
  320.             
  321.              cfs.Close();
  322.              pDisk = pDisk + 1;
  323.             // 读取数据
  324.             // 解压
  325.             // 转化为字符串
  326.              return backString;
  327.         }
  328.         /// <summary>
  329.         /// 得到1个临时的名字
  330.         /// </summary>
  331.         /// <returns></returns>
  332.         private string GetOneName()
  333.         {
  334.             System.Random newRA = new System.Random();
  335.             return DateTime.Now.Date.ToShortDateString() + "@" + DateTime.Now.Hour.ToString() +"_"+ DateTime.Now.Minute.ToString() +"_"+ DateTime.Now.Second.ToString() +"_"+ DateTime.Now.Millisecond.ToString() + "@"  + newRA.NextDouble().ToString();
  336.         
  337.         }
  338.         /// <summary>
  339.         /// 读文件
  340.         /// </summary>
  341.         /// <param name="filename"></param>
  342.         /// <returns></returns>
  343.         private  string getFileData(string filename)
  344.         {
  345.             StreamReader reader = null;
  346.             string data = string.Empty;
  347.             try
  348.             {
  349.                 reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
  350.                 data = reader.ReadToEnd();
  351.                 reader.Close();
  352.                 return data;
  353.             }
  354.             catch (IOException e)
  355.             {
  356.                 Console.WriteLine(e.Message);
  357.             }
  358.             finally
  359.             {
  360.                 if (reader != null)
  361.                     reader.Close();
  362.             }
  363.             return "";
  364.             /*
  365.             StreamReader reader = null;
  366.             string data = string.Empty;
  367.             try
  368.             {
  369.                 reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
  370.                 for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
  371.                 {
  372.                     if (data == "")
  373.                     {
  374.                         data = line;
  375.                     }
  376.                     else
  377.                     {
  378.                         data = data + "n" + line;
  379.                     }
  380.                 }
  381.                 reader.Close();
  382.                 return data;
  383.             }
  384.             catch (IOException e)
  385.             {
  386.                 Console.WriteLine(e.Message);
  387.             }
  388.             finally
  389.             {
  390.                 if (reader != null)
  391.                     reader.Close();
  392.             }
  393.             return "";
  394.             */
  395.         }
  396.         /// <summary>
  397.         /// 写文件
  398.         /// </summary>
  399.         /// <param name="filename"></param>
  400.         /// <param name="data"></param>
  401.         private  void putFileData(string filename, string data)
  402.         {
  403.             StreamWriter writer = null;
  404.             try
  405.             {
  406.                 writer = new StreamWriter(filename, false, System.Text.Encoding.GetEncoding("gb2312"));
  407.                 writer.Write(data);
  408.                 writer.Close();
  409.             }
  410.             catch (IOException e)
  411.             {
  412.                 Console.WriteLine(e.Message);
  413.             }
  414.             finally
  415.             {
  416.                 if (writer != null)
  417.                     writer.Close();
  418.             }
  419.         }
  420.         /// <summary>
  421.         /// 写文件  增加1行
  422.         /// </summary>
  423.         /// <param name="filename"></param>
  424.         /// <param name="data"></param>
  425.         private void AddPutFileData(string filename, string data)
  426.         {
  427.             StreamWriter writer = null;
  428.             try
  429.             {
  430.                 writer = new StreamWriter(filename,true, System.Text.Encoding.GetEncoding("gb2312"));
  431.                 writer.WriteLine(data);
  432.                 writer.Close();
  433.             }
  434.             catch (IOException e)
  435.             {
  436.                 Console.WriteLine(e.Message);
  437.             }
  438.             finally
  439.             {
  440.                 if (writer != null)
  441.                     writer.Close();
  442.             }
  443.         }
  444.        /// <summary>
  445.        /// 退出文本系统时  保存当前缓存中的数据
  446.        /// </summary>
  447.        public void SaveExitData()
  448.        { 
  449.        
  450.        
  451.        }
  452.         /// <summary>
  453.         /// 得到特定的键列表
  454.         /// </summary>
  455.         /// <param name="xmlTxt">XML格式 负责在键描述中查找</param>
  456.         /// <returns>键列表</returns>
  457.         public ArrayList SearchOneList(string xmlTxt)
  458.         {
  459.             ArrayList n = new ArrayList();
  460.             n.Clear();
  461.             xmlTxt = xmlTxt.ToLower().Trim();
  462.             //修改  NOW  实现路径
  463.             foreach (System.Collections.DictionaryEntry a in IndexALL)
  464.             {
  465.                 //IndexST cTmp = (IndexST)a.Value;
  466.                 string ac = a.Key.ToString();
  467.                 if (ac.IndexOf(xmlTxt) > -1) 
  468.                 {
  469.                     n.Add(a.Key.ToString());
  470.                 }
  471.             }
  472.             
  473.             
  474.             return n;
  475.         }
  476.        /// <summary>
  477.        /// 保存数据
  478.        /// </summary>
  479.        public void SaveIt()
  480.        {
  481.            
  482.            //1 得到1个名称
  483.            string tmpname = GetOneName();
  484.            //得到一个不重复的名字
  485.            while (System.IO.File.Exists(dbdir + "\" + tmpname + ".TDB") == true)
  486.            {
  487.                tmpname = GetOneName();
  488.            }
  489.            string bTmp = "";
  490.            //2 写入数据  数据 +  索引    //合成索引 。。。。   起点 + t  + 终点 + t + 名称
  491.            foreach (string a in mIndex)
  492.            {
  493.                bTmp = bTmp + a + "rn";
  494.            }
  495.            //保存数据                //写索引
  496.            putFileData(dbdir + "\" + tmpname + ".IXD", bTmp);
  497.            //写入mCache 数据
  498.            FileStream fs = new FileStream(dbdir + "\" + tmpname + ".TDB", FileMode.CreateNew);
  499.            BinaryWriter writer = null;
  500.            try
  501.            {
  502.                writer = new BinaryWriter(fs);
  503.                //写入硬盘
  504.                writer.Write(mCache, 0, nowP - 1);           
  505.                writer.Close();
  506.                fs.Close();          
  507.            }
  508.            catch
  509.            {
  510.                if (writer != null)
  511.                    writer.Close();
  512.                if (fs != null)
  513.                {
  514.                    fs.Close();
  515.                }
  516.            }
  517.            IndexST dTmp = new IndexST();
  518.            Hashtable tTmp = (Hashtable)IndexALL.Clone();
  519.            //修改  NOW  实现路径
  520.            foreach (System.Collections.DictionaryEntry a in tTmp)
  521.            {
  522.                IndexST cTmp = (IndexST)a.Value;
  523.                if (cTmp.file == "NOW")
  524.                {
  525.                    dTmp.start = cTmp.start;
  526.                    dTmp.end = cTmp.end;
  527.                    dTmp.file = tmpname;
  528.                }
  529.                //修改索引
  530.                IndexALL[a.Key] = dTmp;
  531.            }
  532.        }
  533.        /// <summary>
  534.        /// 得到数据的MD5名
  535.        /// </summary>
  536.        /// <param name="data"></param>
  537.        /// <returns></returns>
  538.        private string getMD5name(string data)
  539.        {
  540.            string strMd5 = System.Web.Security.FormsAuthentication.HashPasswordForStoringInConfigFile(data, "md5");
  541.            return strMd5;
  542.        }
  543.     }
  544. }
  545. /*
  546.   FileStream fs = new FileStream(FILE_NAME, FileMode.CreateNew);
  547.         // Create the writer for data.
  548.         BinaryWriter w = new BinaryWriter(fs);
  549.         // Write data to Test.data.
  550.         for (int i = 0; i < 11; i++) 
  551.         {
  552.             w.Write( (int) i);
  553.         }
  554.         w.Close();
  555.         fs.Close();
  556.         // Create the reader for data.
  557.         fs = new FileStream(FILE_NAME, FileMode.Open, FileAccess.Read);
  558.         BinaryReader r = new BinaryReader(fs);
  559.         // Read data from Test.data.
  560.         for (int i = 0; i < 11; i++) 
  561.         {
  562.             Console.WriteLine(r.ReadInt32());
  563.         }
  564.         r.Close();
  565.         fs.Close();
  566.  */