ClassModelBuilder.cs
上传用户:zhangkuixh
上传日期:2013-09-30
资源大小:5473k
文件大小:327k
- using System;
- using System.IO;
- using System.Collections.Generic;
- using System.Text;
- using System.Collections;
- using System.Data.SqlClient;
- using System.Threading;
- using System.Net;
- /*
- ' 迅龙中文分类搜索引擎 v0.6
- '
- ' LGPL 许可发行
- '
- ' 宁夏大学 张冬 康彩 zd4004@163.com
- '
- ' 官网 http://blog.163.com/zd4004/
- */
- namespace XunLong.ModelBuilder
- {
- /// <summary>
- /// 建立模板
- /// </summary>
- public class ClassModelBuilder
- {
- private static Random raV = new Random();
- /// <summary>
- /// 取出的匹配字符串是否满足要求
- /// </summary>
- /// <param name="data"></param>
- /// <returns>包含 且 《》成对 </returns>
- private bool xStrItOK(string data)
- {
- if ((data.IndexOf("<") == -1) || (data.IndexOf(">") == -1) || (inStrNum(data, "<") != inStrNum(data, ">")))
- {
- return false;
- }
- return false;
- }
- /// <summary>
- /// 某个字符在串中的个数
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- public int inStrNum(string data, string one)
- {
- string c = data.Replace(one, "");
- int cx = data.Length - c.Length;
- return cx;
- }
- /// <summary>
- /// 改变数据 <ZDKC0>" + data1 + "</ZDKC0> 在标题改变 以便标题栏能在完全相同时被识别
- /// 那么 0 位置就肯定为标题兰
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private static string RW_HTMLDB(string data)
- {
- int a1 = data.IndexOf("<ZDKC0>");
- int a2 = data.IndexOf("</ZDKC0>");
- int a3 = data.IndexOf("<ZDbody>");
- int a4 = data.IndexOf("</ZDbody>");
- int a5 = data.IndexOf(">", a3 + 1);
- string data1 = "";
- string data2 = "";
-
- int nowTitle = raV.Next( 25,984562);
- try
- {
- if (a1 > 0 & a2 > 0 & a2 > a1)
- {
- data1 = data.Substring(a1 + 7, a2 - a1 - 7);
- }
- if (a3 > 0 & a5 > 0 & a5 > a3)
- {
- data2 = data.Substring(a5 + 1, a4 - a5 - 1);
- }
- data1 = data1.Replace("*", "#");
- data2 = data2.Replace("*", "#");
- return " <ZDKC0>" + nowTitle.ToString()+"_"+data1.Length.ToString() + "</ZDKC0> <ZDbody>" + data2 + "</ZDbody>";
- }
- catch
- {
- return " <ZDKC0>" + nowTitle.ToString() + "_" + data.Length.ToString() + "</ZDKC0> <ZDbody>" + data + "</ZDbody> ";
- }
- }
- /// <summary>
- /// 根据数据建立一个网页模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- public string BuilderModel(Hashtable mHTMs2)
- {
- Hashtable mHTMs = new Hashtable();
- mHTMs.Clear();
- foreach (System.Collections.DictionaryEntry de in mHTMs2)
- {
- //原始的数据需要修改以便可以在标题相同的情况 下识别标题
- string NewHTM = RW_HTMLDB(de.Value.ToString());
- mHTMs.Add(de.Key, NewHTM);
- }
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
-
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel,"0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel =de.Key.ToString();
- char[] oneModel_ss = oneModel.ToCharArray();
- if (oneModel.Length > 4)
- {
-
- int h = oneModel.Length;
- if ((5 * h>onemLong)&(onemLong>9000))
- {
- h = onemLong/5;
- }
- if (h > 512)
- {
- h = 512;
- }
- while (true)
- {
- if (h < 4) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- ///////**********************************************
- // string a1= oneModel.Substring(st, 1);
- // string a2= oneModel.Substring(st+h-1, 1);
- ////////////*****************************************
- char a1 = oneModel_ss[st];
-
- if ( a1!= '<') //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- char a2 = oneModel_ss[st + h - 1];
- if(a2 != '>') //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- // if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- // {
- // goto nextcmd3; //取开头和末尾在<>中的数据
- // }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- // if ((de1.Value.ToString().IndexOf(onestr) == -1) & (de1.Value.ToString().Length > 0))
- if (de1.Value.ToString().IndexOf(onestr) == -1)
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- ////Console.WriteLine("M-Dict>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- mtmpp.Add(de2.Key ,de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
-
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key,getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA1>> "+cckc);
- }
- mHTMs= mtmppC;
- string mmoo = GetComDataEND(mHTMs,tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aassBuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<","*");
- mmoo = mmoo.Replace(">", "*");
- string mo = mmoo;
- if (mo.Length == 0)
- {
- return "";
- }
- else
- {
- Hashtable mytpp = new Hashtable();
- mytpp.Clear();
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mo = mo.Replace("*" + dec.Key.ToString() + "*", "<kc*Kc>" + dec.Value.ToString() + "<kc*Kc>" );
- }
- mo = mo.Replace("<kc*Kc><kc*Kc><kc*Kc>", "<kc*Kc>");
- mo = mo.Replace("<kc*Kc><kc*Kc>", "<kc*Kc>");
- mo = mo.Replace("<kc*Kc>", "*");
- return mo;
- // ArrayList mm = new ArrayList();
-
- }
- }
- /// <summary>
- /// 把cckc中的*0*提取出来
- /// </summary>
- /// <param name="data"></param>
- /// <returns></returns>
- private string getXnX(string data)
- {
- if (data.IndexOf("*") == -1)
- {
- return "";
- }
- string mkc = "";
- int ii = 0;
- while (true)
- {
- ii = ii + 1;
- if (data.IndexOf("*") == -1)
- {
- // mkc = mkc.Replace("###", "#");
- // mkc = mkc.Replace("##", "#");
- for (int uu = 0; uu < 1000; uu++)
- {
- try
- {
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + "><" + uu.ToString() + ">", "<《3-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + ">", "<《2-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<《3-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<《2-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + "><" + uu.ToString() + ">", "<《3-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + ">", "<《2-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<《3-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<《2-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + "><" + uu.ToString() + ">", "<《3-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + ">", "<《2-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<《3-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<《2-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + "><" + uu.ToString() + ">", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + ">", "<" + uu.ToString() + ">");
- }
- catch
- {
- for (int uu_i = 0; uu_i < 800; uu_i++)
- {
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + "><" + uu.ToString() + ">", "<《3-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<" + uu.ToString() + "><" + uu.ToString() + ">", "<《2-" + uu.ToString() + "》>");
- mkc = mkc.Replace("<《3-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- mkc = mkc.Replace("<《2-" + uu.ToString() + "》>", "<" + uu.ToString() + ">");
- }
- }
- }
- return mkc;
- }
-
- //1 取得第一个* 取得第2个* 截取第一个和第2个 之间的数据 截断字符串
- int a1 = data.IndexOf("*");
- int a2 = data.IndexOf("*", a1 + 1);
- // string aa = data.Substring(a1, a2 - a1);
- // mkc = mkc + aa;
- string aa = data.Substring(a1+1, a2 - a1-1);
- // mkc = mkc + "#<"+aa+">#";
- mkc = mkc + "<" + aa + ">";
- data = data.Substring(a2 + 1, data.Length - a2-1);
- }
- }
- /// <summary>
- /// 《2》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aassBuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict2>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA2>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- string mmoo = GetComDataEND(mHTMs,tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN2>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《3》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass3BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict3>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- try
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- catch
- {
- return "";
- }
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA3>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- string mmoo = GetComDataEND(mHTMs,tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass4BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN3>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《4》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass4BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict4>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA4>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- //string mmoo = aass5BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN4>> " + mmoo);
- return mmoo;
-
- }
- }
- /// <summary>
- /// 《5》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass5BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict5>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- //mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA5>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- //string mmoo = aass6BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN5>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《6》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass6BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict6>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA6>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- //string mmoo = aass7BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN6>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《7》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass7BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //Console.WriteLine("M-Dict7>> " + tdictI.ToString() + " || " + onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- //Console.WriteLine("M-DATA7>> " + cckc);
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- // string mmoo = aass8BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN7>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《8》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass8BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
-
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- //string mmoo = aass9BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- //Console.WriteLine("TRETURN8>> " + mmoo);
- return mmoo;
- }
- }
- /// <summary>
- /// 《9》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass9BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- //mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- // string mmoo = aass10BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- return mmoo;
- }
- }
- //10
- /// <summary>
- /// 《10》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass10BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- // string mmoo = aass11BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- return mmoo;
- }
- }
- //11
- /// <summary>
- /// 《11》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass11BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1)&(de1.Value.ToString().Length>0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- //mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- // string mmoo = aass12BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- return mmoo;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////
- //12
- ////////////////////////////////////////////////////////////////////////////////
- //12
- /// <summary>
- /// 《12》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass12BuilderModel(Hashtable mHTMs)
- {
- Console.Write("..");
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1) & (de1.Value.ToString().Length > 0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- //string mmoo = aass13BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- return mmoo;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////
- //13
- ////////////////////////////////////////////////////////////////////////////////
- //13
- /// <summary>
- /// 《13》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass13BuilderModel(Hashtable mHTMs)
- {
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1) & (de1.Value.ToString().Length > 0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- //mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else
- {
- mtmpp.Add(de2.Key, tmp_str.Replace(onestr, "*" + tdictI.ToString() + "*"));
- }
- }
- mHTMs = mtmpp;
- tdictI = tdictI + 1; //字典序号
- //oneModel 截取 取出碎片 压入 newModels
- string oneModelTmp = oneModel.Replace(onestr, "*");
- string[] mykc = oneModelTmp.Split('*');
- foreach (string ddee in mykc)
- {
- if ((ddee.Length > 4) & (newModels.Contains(ddee) == false))
- {
- newModels.Add(ddee, "0");
- }
- }
- goto NewStart; //因为碎片变化 所以重新开始扫描
- nextcmd3:
- st = st + 1;
- }
- nextcmd1:
- h = h - 1;
- }
- }
- nextcmd2: ;
- goto NewStart; //本次遍历结束 去掉本字符串
- }
- //mHTMs 处理新的 网页 只留下 *0*
- Hashtable mtmppC = new Hashtable();
- mtmppC.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- string cckc = de2.Value.ToString();
- //把cckc中的*0*提取出来
- mtmppC.Add(de2.Key, getXnX(de2.Value.ToString()));
- // mtmppC.Add(de2.Key, de2.Value.ToString());
- }
- mHTMs = mtmppC;
- //从模板得到一个项目模板
- // string mmoo = aass14BuilderModel(mHTMs);
- string mmoo = GetComDataEND(mHTMs, tdict);
- if (mmoo.Length == 0)
- {
- //从模板得到一个项目模板
- mmoo = aass3BuilderModel(mHTMs);
- }
- mmoo = mmoo.Replace("<", "*");
- mmoo = mmoo.Replace(">", "*");
- if (mmoo.Length == 0)
- {
- return "";
- }
- else
- {
- foreach (System.Collections.DictionaryEntry dec in tdict)
- {
- mmoo = mmoo.Replace("*" + dec.Key.ToString() + "*", dec.Value.ToString());
- }
- return mmoo;
- }
- }
- ////////////////////////////////////////////////////////////////////////////////
- //14
- ////////////////////////////////////////////////////////////////////////////////
- //14
- /// <summary>
- /// 《14》根据子模板建立一个超级模板
- /// </summary>
- /// <param name="mHTMs"></param>
- /// <returns></returns>
- private string aass14BuilderModel(Hashtable mHTMs)
- {
- //得到最短的串
- string oneModel = "";
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel.Length == 0)
- {
- oneModel = de.Value.ToString();
- }
- else
- {
- if (oneModel.Length > de.Value.ToString().Length)
- {
- oneModel = de.Value.ToString();
- }
- }
- }
- foreach (System.Collections.DictionaryEntry de in mHTMs)
- {
- if (oneModel != de.Value.ToString()) //不相同
- {
- if (de.Value.ToString().IndexOf(oneModel) == -1) //不包含
- {
- goto cXStart;
- }
- }
- }
- return oneModel;
- cXStart:
- //得到匹配临时模板的原始长度
- int onemLong = oneModel.Length;
- Hashtable tdict = new Hashtable();
- int tdictI = 0; //字典序号
- tdict.Clear();
- Hashtable oneModels = new Hashtable();
- oneModels.Clear();
- oneModels.Add(oneModel, "0"); //碎片如果相同 就不用继续寻找添加
- //存放临时碎片
- Hashtable newModels = new Hashtable();
- newModels.Clear();
- oneModel = "";
- NewStart: //开始遍历匹配串
- if (oneModel.Length > 0)
- {
- oneModels.Remove(oneModel); //移除旧的字符串 压入分解后的字符串
- }
- foreach (System.Collections.DictionaryEntry de in newModels) //加入新的碎片项
- {
- if (oneModels.Contains(de.Key) == false)
- {
- oneModels.Add(de.Key, "0");
- }
- }
- newModels.Clear();
- foreach (System.Collections.DictionaryEntry de in oneModels)
- {
- oneModel = de.Key.ToString();
- if (oneModel.Length > 2)
- {
- int h = oneModel.Length;
- while (true)
- {
- if (h < 3) //最短字符不能小于5 >=四
- { goto nextcmd2; }
- int st = 0;
- while (true)
- {
- if (st + h > oneModel.Length)
- { goto nextcmd1; }
- string a1 = oneModel.Substring(st, 1);
- string a2 = oneModel.Substring(st + h - 1, 1);
- if ((a1 != "<") | (a2 != ">")) //判断是否合法 含有 < >
- {
- goto nextcmd3;
- }
- string onestr = oneModel.Substring(st, h);
- if ((onestr.Substring(0, 1) != "<") | (onestr.Substring(onestr.Length - 1, 1) != ">") | (onestr.IndexOf(">") == -1) | (onestr.IndexOf("<") == -1))
- {
- goto nextcmd3; //取开头和末尾在<>中的数据
- }
- //遍历数据 是否符合全部数据
- foreach (System.Collections.DictionaryEntry de1 in mHTMs)
- {
- if ((de1.Value.ToString().IndexOf(onestr) == -1) & (de1.Value.ToString().Length > 0))
- {
- goto nextcmd3;
- }
- }
- //符合所有项都存在的条件 压入字典
- tdict.Add(tdictI, onestr);
- //数据替换
- Hashtable mtmpp = new Hashtable();
- mtmpp.Clear();
- foreach (System.Collections.DictionaryEntry de2 in mHTMs)
- {
- // mtmpp.Add(de2.Key, de2.Value.ToString().Replace(onestr, "*" + tdictI.ToString() + "*"));
- string tmp_str = de2.Value.ToString();
- if (tmp_str == null)
- {
- }
- else