ClassFilesDataBase.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:19k
- using System;
- using System.IO;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
- /*
- ' 迅龙中文分类搜索引擎 v0.6
- '
- ' LGPL 许可发行
- '
- ' 宁夏大学 张冬 康彩 zd4004@163.com
- '
- ' 官网 http://blog.163.com/zd4004/
- */
- namespace NetHashTableAPI
- {
- /// <summary>
- /// 文本数据库 以12M 为1个数据单位 满12M 建立一个索引文件 数据扩展名为 .TDB 索引扩展名为 .IXD
- /// </summary>
- public class ClassFilesDataBase
- {
- /// <summary>
- /// 索引存储单元格式
- /// </summary>
- struct IndexST
- {
- /// <summary>
- /// 开始位置
- /// </summary>
- public int start;
- /// <summary>
- /// 结束位置
- /// </summary>
- public int end;
- /// <summary>
- /// 文件名
- /// </summary>
- public string file;
- /// <summary>
- /// 数据的MD5
- /// </summary>
- public string MD5;
-
- }
- /// <summary>
- /// 当前数据指针
- /// </summary>
- private int nowP = 0;
- /// <summary>
- /// 缓存区大小
- /// </summary>
- public int nMV = 0;
- /// <summary>
- /// 索引
- /// </summary>
- private ArrayList mIndex = new ArrayList();
-
- ///读取缓存为20个
- private byte[,] mDISK = new byte[20,1];
- ///设置缓存中的文件名称
- private string[] mDISKINT = new string[20];
- /// <summary>
- /// 缓存中的数据指针
- /// </summary>
- private int pDisk = 0;
- /// <summary>
- /// 编码类
- /// </summary>
- private NewNxuEncoding.CNewNxuEncoding nCode = new NewNxuEncoding.CNewNxuEncoding();
- /// <summary>
- /// 数据缓存区
- /// </summary>
- Byte[] mCache = new byte[100];
- /// <summary>
- /// 数据缓存区 读取时使用
- /// </summary>
- Byte[] mCacheREAD = new byte[100];
- /// <summary>
- /// 所有索引文件 用来找到某个文件所在的位置
- /// </summary>
- private Hashtable IndexALL = new Hashtable();
- private string dbdir = "";
- /// <summary>
- /// 记录所有的数据的MD5 如果在写入中发现 则再进行匹配操作
- /// </summary>
- private ArrayList IndexALLMD5 = new ArrayList();
- /// <summary>
- /// 读取目录下的所有索引文件 压入 IndexALL
- /// </summary>
- /// <param name="dir"></param>
- private void initIndex(string dircc)
- {
- // KEY = name VAL =
- DirectoryInfo dir = new DirectoryInfo(dircc);
- foreach (FileInfo f in dir.GetFiles("*.IXD")) //遍历获得以xml为扩展名的文件
- {
- String name = f.FullName;//name为该文件夹下的文件名称,如f.FullName为全名
- string name2 = f.Name;
- name2 = name2.Substring(0, name2.Length - 4);
- string nData = getFileData(name);
- IndexALLMD5.Clear();
- char[] n = {'r','n'};
- string[] nData1 = nData.Split(n);
- foreach (string a in nData1)
- {
- if (a.IndexOf('t') > 0 & a.IndexOf("http://")>0)
- {
- string[] nData2 = a.Split('t');
-
- if (IndexALL.Contains(nData2[2]) == false)
- {
- IndexST mTmpp = new IndexST();
- mTmpp.file = name2;
- mTmpp.start = Int32.Parse(nData2[0]);
- mTmpp.end = Int32.Parse(nData2[1]);
- if (nData2.Length == 3)
- {
- mTmpp.MD5 = "";
- }
- else
- {
- mTmpp.MD5 = nData2[3];
- try
- {
- IndexALLMD5.Add(nData2[3]);
- }
- catch
- { }
- }
- IndexALL.Add(nData2[2], mTmpp);
- }
-
-
- }
-
- }
- }
-
- }
- /// <summary>
- /// 设定数据单位 即以多少大小 为1组存放数据
- /// </summary>
- public void SetClassFilesDataBase(string dir, int nM )
- {
- mCache = new Byte[nM];
- mCacheREAD = new Byte[nM];
- mDISK = new Byte[20, nM];
- pDisk = 0;
- for (int x = 0; x < 20; x++)
- {
- mDISKINT[x] = "";
- }
- //纪录大小
- nMV = nM;
- mIndex.Clear();
- //设定目录
- dbdir = dir;
- initIndex(dir);
- // 初始化 读取索引
- // IndexALL
- }
- /// <summary>
- /// 增加数据
- /// </summary>
- /// <param name="key"></param>
- /// <param name="val"></param>
- /// <returns></returns>
- public bool add(string key,string val )
- {
- //压入数据
- if (IndexALL.Contains(key) == true)
- {
- //该数据已经具有 不再压入
- return true;
- }
- if (key.Length == 0 | val.Length == 0)
- {
- return false;
- }
- string OneMd5 = getMD5name(val);
- //MD5 相同的 把数据指向同一个位置 对外表现出的读取和写入不变 实际数据只有1份
- if (IndexALLMD5.Contains(OneMd5) == true)
- {
- foreach (System.Collections.DictionaryEntry a in IndexALL)
- {
- IndexST cTmp0 = (IndexST)a.Value;
- if (cTmp0.MD5 == OneMd5)
- {
- //把本记录写入该文件索引记录的后面 即多个文件指向同1个数据
- //合成索引 。。。。 起点 + TAB + 终点 + TAB + 名称
- string aTmp1 = cTmp0.start.ToString() + 't' + cTmp0.end.ToString() + 't' + key + 't' + OneMd5;
- IndexST cTmp1 = new IndexST();
- cTmp1.start = cTmp0.start;
- cTmp1.end = cTmp0.end;
- cTmp1.file = cTmp0.file;
- cTmp1.MD5 = cTmp0.MD5;
- //压入索引队列 提供读取服务
- IndexALL.Add(key, cTmp1);
- if (cTmp0.file == "NOW")
- {
- //压入队列 用来写入当前文件
- if (mIndex.Contains(aTmp1) == false)
- {
- mIndex.Add(aTmp1);
- }
- }
- else
- {
- //若不是 则写入该数据所在的索引文件
- AddPutFileData(dbdir + "\" + cTmp0.file + ".IXD", aTmp1);
- }
- return true;
- }
- }
- }
- Byte[] dataSV = nCode.dbsCompress(val);
- // String WWdDd = "l";
- // WWdDd = nCode.dbsDeCompress(dataSV);
- // Console.WriteLine("_>>>> "+WWdDd);
- //检测长度是否合适
- if (nowP + dataSV.Length >= nMV)
- {
- //保存数据
- SaveIt();
- //复位
- nowP = 0;
- //需要写入文件的索引 清空
- mIndex.Clear();
- }
-
- //保存起点
- int nowPSatrt = nowP;
- //压入数据
- for (int i = 0; i < dataSV.Length; i++)
- {
- mCache[nowP] = dataSV[i];
- nowP = nowP + 1;
- }
-
- //合成索引 。。。。 起点 + TAB + 终点 + TAB + 名称
- string aTmp = nowPSatrt.ToString() + 't' + dataSV.Length.ToString() + 't' + key + 't' + OneMd5;
- IndexST cTmp = new IndexST();
-
- cTmp.start = nowPSatrt;
- cTmp.end = dataSV.Length;
- cTmp.file = "NOW";
- cTmp.MD5 = OneMd5;
- //压入索引队列 提供读取服务
- IndexALL.Add(key, cTmp);
- //压入队列 用来写入当前文件
- if (mIndex.Contains(aTmp) == false)
- {
- mIndex.Add(aTmp);
- }
-
- return true;
- }
- /// <summary>
- /// 得到一个数据的值
- /// </summary>
- /// <param name="KeyVal"></param>
- /// <returns></returns>
- public string Value(string KeyVal)
- {
- // 初始化所有的索引文件
- // 根据名称找到文件的位置
- if (IndexALL.Contains(KeyVal) == false)
- {
- return "cnull"; //没有数据
- }
-
- IndexST cTmp = (IndexST)IndexALL[KeyVal];
- if (cTmp.file == "NOW")
- {
- // 直接从缓存区读取
- return "";
- }
- // 1查找一下 缓存中有无 有时 直接读取
- for (int x = 0; x < 20; x++)
- {
- if (mDISKINT[x] == cTmp.file)
- {
- byte[] myTmp = new byte[cTmp.end + 1];
- for (int i = cTmp.start; i < cTmp.start + cTmp.end; i++)
- {
- myTmp[i - cTmp.start] = mDISK[x,i];
- }
- string cbackString = nCode.dbsDeCompress(myTmp);
- return cbackString;
- }
- }
- //没有时把文件加载到缓存
- FileStream cfs = new FileStream(dbdir + "\" + cTmp.file + ".TDB", FileMode.Open, FileAccess.Read);
- BinaryReader cr = new BinaryReader(cfs);//,System.Text.ASCIIEncoding.ASCII);
- byte[] m = cr.ReadBytes( (int)cfs.Length);
-
- // cr.Read(mCacheREAD, cTmp.start, cTmp.end);
- byte[] myTmpX = new byte[cTmp.end+1];
- // 数据压入缓存
- if (pDisk == 20)
- {
- pDisk = 0; //缓存指针复位
- }
- mDISKINT[pDisk] = cTmp.file;
- //数据压入返回区
- for (int i = cTmp.start; i <cTmp.start+ cTmp.end; i++)
- {
- // myTmpX[i - cTmp.start] = mCacheREAD[i];
- myTmpX[i - cTmp.start] = m[i];
- //数据压入缓存
-
- }
- for (int i = 0; i < m.Length; i++)
- {
- mDISK[pDisk, i] = m[i];
- }
- string backString = nCode.dbsDeCompress(myTmpX);
-
- cr.Close();
-
- cfs.Close();
- pDisk = pDisk + 1;
- // 读取数据
- // 解压
- // 转化为字符串
- return backString;
- }
- /// <summary>
- /// 得到1个临时的名字
- /// </summary>
- /// <returns></returns>
- private string GetOneName()
- {
- System.Random newRA = new System.Random();
- return DateTime.Now.Date.ToShortDateString() + "@" + DateTime.Now.Hour.ToString() +"_"+ DateTime.Now.Minute.ToString() +"_"+ DateTime.Now.Second.ToString() +"_"+ DateTime.Now.Millisecond.ToString() + "@" + newRA.NextDouble().ToString();
-
- }
- /// <summary>
- /// 读文件
- /// </summary>
- /// <param name="filename"></param>
- /// <returns></returns>
- private string getFileData(string filename)
- {
- StreamReader reader = null;
- string data = string.Empty;
- try
- {
- reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
- data = reader.ReadToEnd();
- reader.Close();
- return data;
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
- return "";
- /*
- StreamReader reader = null;
- string data = string.Empty;
- try
- {
- reader = new StreamReader(filename, System.Text.Encoding.GetEncoding("gb2312"));
- for (string line = reader.ReadLine(); line != null; line = reader.ReadLine())
- {
- if (data == "")
- {
- data = line;
- }
- else
- {
- data = data + "n" + line;
- }
- }
- reader.Close();
- return data;
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (reader != null)
- reader.Close();
- }
- return "";
- */
- }
- /// <summary>
- /// 写文件
- /// </summary>
- /// <param name="filename"></param>
- /// <param name="data"></param>
- private void putFileData(string filename, string data)
- {
- StreamWriter writer = null;
- try
- {
- writer = new StreamWriter(filename, false, System.Text.Encoding.GetEncoding("gb2312"));
- writer.Write(data);
- writer.Close();
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (writer != null)
- writer.Close();
- }
- }
- /// <summary>
- /// 写文件 增加1行
- /// </summary>
- /// <param name="filename"></param>
- /// <param name="data"></param>
- private void AddPutFileData(string filename, string data)
- {
- StreamWriter writer = null;
- try
- {
- writer = new StreamWriter(filename,true, System.Text.Encoding.GetEncoding("gb2312"));
- writer.WriteLine(data);
- writer.Close();
- }
- catch (IOException e)
- {
- Console.WriteLine(e.Message);
- }
- finally
- {
- if (writer != null)
- writer.Close();
- }
- }
- /// <summary>
- /// 退出文本系统时 保存当前缓存中的数据
- /// </summary>
- public void SaveExitData()
- {
-
-
- }
- /// <summary>
- /// 得到特定的键列表
- /// </summary>
- /// <param name="xmlTxt">XML格式 负责在键描述中查找</param>
- /// <returns>键列表</returns>
- public ArrayList SearchOneList(string xmlTxt)
- {
- ArrayList n = new ArrayList();
- n.Clear();
- xmlTxt = xmlTxt.ToLower().Trim();
- //修改 NOW 实现路径
- foreach (System.Collections.DictionaryEntry a in IndexALL)
- {
- //IndexST cTmp = (IndexST)a.Value;
- string ac = a.Key.ToString();
- if (ac.IndexOf(xmlTxt) > -1)
- {
- n.Add(a.Key.ToString());
- }
- }
-
-
- return n;
- }
- /// <summary>
- /// 保存数据
- /// </summary>
- public void SaveIt()
- {
-
- //1 得到1个名称
- string tmpname = GetOneName();
- //得到一个不重复的名字
- while (System.IO.File.Exists(dbdir + "\" + tmpname + ".TDB") == true)
- {
- tmpname = GetOneName();
- }
- string bTmp = "";
- //2 写入数据 数据 + 索引 //合成索引 。。。。 起点 + t + 终点 + t + 名称
- foreach (string a in mIndex)
- {
- bTmp = bTmp + a + "rn";
- }
- //保存数据 //写索引
- putFileData(dbdir + "\" + tmpname + ".IXD", bTmp);
- //写入mCache 数据
- FileStream fs = new FileStream(dbdir + "\" + tmpname + ".TDB", FileMode.CreateNew);
- BinaryWriter writer = null;
- try
- {
- writer = new BinaryWriter(fs);
- //写入硬盘
- writer.Write(mCache, 0, nowP - 1);
- writer.Close();
- fs.Close();
- }
- catch
- {
- if (writer != null)
- writer.Close();
- if (fs != null)
- {
- fs.Close();
- }
- }
- IndexST dTmp = new IndexST();
- Hashtable tTmp = (Hashtable)IndexALL.Clone();
- //修改 NOW 实现路径
- foreach (System.Collections.DictionaryEntry a in tTmp)
- {
- IndexST cTmp = (IndexST)a.Value;
- if (cTmp.file == "NOW")
- {
- dTmp.start = cTmp.start;
- dTmp.end = cTmp.end;
- dTmp.file = tmpname;
- }
- //修改索引
- IndexALL[a.Key] = dTmp;
- }
- }
- /// <summary>
- /// 得到数据的MD5名
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private string getMD5name(string data)
- {
- string strMd5 = System.Web.Security.FormsAuthentication.HashPasswordForStoringInConfigFile(data, "md5");
- return strMd5;
- }
- }
- }
- /*
- FileStream fs = new FileStream(FILE_NAME, FileMode.CreateNew);
- // Create the writer for data.
- BinaryWriter w = new BinaryWriter(fs);
- // Write data to Test.data.
- for (int i = 0; i < 11; i++)
- {
- w.Write( (int) i);
- }
- w.Close();
- fs.Close();
- // Create the reader for data.
- fs = new FileStream(FILE_NAME, FileMode.Open, FileAccess.Read);
- BinaryReader r = new BinaryReader(fs);
- // Read data from Test.data.
- for (int i = 0; i < 11; i++)
- {
- Console.WriteLine(r.ReadInt32());
- }
- r.Close();
- fs.Close();
- */