SgmlReader.cs
上传用户:jingke1993
上传日期:2022-06-08
资源大小:140k
文件大小:65k
源码类别:

xml/soap/webservice

开发平台:

Visual C++

  1. /*
  2. * An XmlReader implementation for loading SGML (including HTML) converting it
  3. * to well formed XML, by adding missing quotes, empty attribute values, ignoring
  4. * duplicate attributes, case folding on tag names, adding missing closing tags
  5. * based on SGML DTD information, and so on.
  6. *
  7. * Copyright (c) 2002 Microsoft Corporation. All rights reserved.
  8. *
  9. * Chris Lovett
  10. */
  11. using System;
  12. using System.Xml;
  13. using System.IO;
  14. using System.Collections;
  15. using System.Text;
  16. using System.Reflection;
  17. namespace Sgml {
  18.     /// <summary>
  19.     /// SGML is case insensitive, so here you can choose between converting
  20.     /// to lower case or upper case tags.  "None" means that the case is left
  21.     /// alone, except that end tags will be folded to match the start tags.
  22.     /// </summary>
  23.     public enum CaseFolding {
  24.         None,
  25.         ToUpper,
  26.         ToLower
  27.     }
  28.     /// <summary>
  29.     /// This stack maintains a high water mark for allocated objects so the client
  30.     /// can reuse the objects in the stack to reduce memory allocations, this is
  31.     /// used to maintain current state of the parser for element stack, and attributes
  32.     /// in each element.
  33.     /// </summary>
  34.     internal class HWStack {
  35.         object[] items;
  36.         int size;
  37.         int count;
  38.         int growth;
  39.         public HWStack(int growth) {
  40.             this.growth = growth;
  41.         }
  42.         public int Count {
  43.             get { return this.count; }
  44.             set { this.count = value; }
  45.         }
  46.         public int Size {
  47.             get { return this.size; }
  48.         }
  49.         // returns the item at the requested index or null if index is out of bounds
  50.         public object this[int i] {
  51.             get { return (i>=0 && i < this.size) ? items[i] : null; }
  52.             set { this.items[i] = value; }
  53.         }
  54.         public object Pop(){
  55.             this.count--;
  56.             if (this.count>0){
  57.                 return items[this.count-1];
  58.             }
  59.             return null;
  60.         }
  61.         // This method tries to reuse a slot, if it returns null then
  62.         // the user has to call the other Push method.
  63.         public object Push(){
  64.             if (this.count == this.size){
  65.                 int newsize = this.size+this.growth;
  66.                 object[] newarray = new object[newsize];
  67.                 if (this.items != null)
  68.                     Array.Copy(this.items, newarray, this.size);
  69.                 this.size = newsize;
  70.                 this.items = newarray;
  71.             }
  72.             return items[this.count++];
  73.         }        
  74.         public void RemoveAt(int i){
  75.             this.items[i] = null;
  76.             Array.Copy(this.items, i+1, this.items, i, this.count - i - 1);
  77.             this.count--;
  78.         }
  79.     }
  80.     /// <summary>
  81.     /// This class represents an attribute.  The AttDef is assigned
  82.     /// from a validation process, and is used to provide default values.
  83.     /// </summary>
  84.     internal class Attribute {
  85.         internal string Name;    // the atomized name (using XmlNameTable).
  86.         internal AttDef DtdType; // the AttDef of the attribute from the SGML DTD.
  87.         internal char QuoteChar; // the quote character used for the attribute value.
  88.         internal string literalValue; // tha attribute value
  89.         /// <summary>
  90.         /// Attribute objects are reused during parsing to reduce memory allocations, 
  91.         /// hence the Reset method. 
  92.         /// </summary>
  93.         public void Reset(string name, string value, char quote) {
  94.             this.Name = name;
  95.             this.literalValue = value;
  96.             this.QuoteChar = quote;
  97.             this.DtdType = null;
  98.         }
  99.         public string Value {
  100.             get {
  101.                 if (this.literalValue != null) 
  102.                     return this.literalValue;
  103.                 if (this.DtdType != null) 
  104.                     return this.DtdType.Default;
  105.                 return null;
  106.             }
  107.             set {
  108.                 this.literalValue = value;
  109.             }
  110.         }
  111.         public bool IsDefault {
  112.             get {
  113.                 return (this.literalValue == null);
  114.             }
  115.         }
  116.     }    
  117.     /// <summary>
  118.     /// This class models an XML node, an array of elements in scope is maintained while parsing
  119.     /// for validation purposes, and these Node objects are reused to reduce object allocation,
  120.     /// hence the reset method.  
  121.     /// </summary>
  122.     internal class Node {
  123.         internal XmlNodeType NodeType;
  124.         internal string Value;
  125.         internal XmlSpace Space;
  126.         internal string XmlLang;
  127.         internal bool IsEmpty;        
  128.         internal string Name;
  129.         internal ElementDecl DtdType; // the DTD type found via validation
  130.         internal State CurrentState;
  131.         internal bool Simulated; // tag was injected into result stream.
  132.         HWStack attributes = new HWStack(10);
  133.         /// <summary>
  134.         /// Attribute objects are reused during parsing to reduce memory allocations, 
  135.         /// hence the Reset method. 
  136.         /// </summary>
  137.         public void Reset(string name, XmlNodeType nt, string value) {           
  138.             this.Value = value;
  139.             this.Name = name;
  140.             this.NodeType = nt;
  141.             this.Space = XmlSpace.None;
  142.             this.XmlLang= null;
  143.             this.IsEmpty = true;
  144.             this.attributes.Count = 0;
  145.             this.DtdType = null;
  146.         }
  147.         public Attribute AddAttribute(string name, string value, char quotechar, bool caseInsensitive) {
  148.             Attribute a;
  149.             // check for duplicates!
  150.             for (int i = 0, n = this.attributes.Count; i < n; i++) {
  151.                 a = (Attribute)this.attributes[i];             
  152.                 if (caseInsensitive && string.Compare(a.Name, name, true) == 0) {
  153.                     return null;
  154.                 } else if ((object)a.Name == (object)name) {
  155.                     return null; 
  156.                 }
  157.             }
  158.             // This code makes use of the high water mark for attribute objects,
  159.             // and reuses exisint Attribute objects to avoid memory allocation.
  160.             a = (Attribute)this.attributes.Push();
  161.             if (a == null) {
  162.                 a = new Attribute();
  163.                 this.attributes[this.attributes.Count-1] = a;
  164.             }
  165.             a.Reset(name, value, quotechar);
  166.             return a;
  167.         }
  168.         public void RemoveAttribute(string name) {
  169.             for (int i = 0, n = this.attributes.Count; i < n; i++) {
  170.                 Attribute a  = (Attribute)this.attributes[i];
  171.                 if (a.Name == name) {
  172.                     this.attributes.RemoveAt(i);
  173.                     return;
  174.                 }
  175.             }
  176.         }
  177.         public void CopyAttributes(Node n) {
  178.             for (int i = 0, len = n.attributes.Count; i < len; i++) {
  179.                 Attribute a = (Attribute)n.attributes[i];
  180.                 Attribute na = this.AddAttribute(a.Name, a.Value, a.QuoteChar, false);
  181.                 na.DtdType = a.DtdType;
  182.             }
  183.         }
  184.         public int AttributeCount {
  185.             get {
  186.                 return this.attributes.Count;
  187.             }
  188.         }
  189.         public int GetAttribute(string name) {
  190.             for (int i = 0, n = this.attributes.Count; i < n; i++) {
  191.                 Attribute a = (Attribute)this.attributes[i];
  192.                 if (a.Name == name) {
  193.                     return i;
  194.                 }
  195.             }
  196.             return -1;
  197.         }
  198.         public Attribute GetAttribute(int i) {
  199.             if (i>=0 && i<this.attributes.Count) {
  200.                 Attribute a = (Attribute)this.attributes[i];
  201.                 return a;
  202.             }
  203.             return null;
  204.         }
  205.     }
  206.     // This enum is used to track the current state of te SgmlReader
  207.     internal enum State {
  208.         Initial,    // The initial state (Read has not been called yet)
  209.         Markup,     // Expecting text or markup
  210.         EndTag,     // Positioned on an end tag
  211.         Attr,       // Positioned on an attribute
  212.         AttrValue,  // Positioned in an attribute value
  213.         Text,       // Positioned on a Text node.
  214.         PartialTag, // Positioned on a text node, and we have hit a start tag
  215.         AutoClose,  // We are auto-closing tags (this is like State.EndTag), but end tag was generated
  216.         CData,      // We are on a CDATA type node, eg. <scipt> where we have special parsing rules.
  217.         PartialText,
  218.         PseudoStartTag, // we pushed a pseudo-start tag, need to continue with previous start tag.
  219.         Eof
  220.     }
  221.     /// <summary>
  222.     /// SgmlReader is an XmlReader API over any SGML document (including built in 
  223.     /// support for HTML).  
  224.     /// </summary>
  225.     public class SgmlReader : XmlReader {
  226.         SgmlDtd dtd;
  227.         Entity current;
  228.         State state;
  229.         XmlNameTable nametable;
  230.         char partial;
  231.         object endTag;
  232.         HWStack stack;
  233.         Node node; // current node (except for attributes)
  234.         // Attributes are handled separately using these members.
  235.         Attribute a;
  236.         int apos; // which attribute are we positioned on in the collection.
  237.         Uri baseUri;
  238.         StringBuilder sb;
  239.         StringBuilder name;
  240.         TextWriter log;
  241.         bool foundRoot;
  242.         // autoclose support
  243.         Node newnode;
  244.         int poptodepth;
  245.         int rootCount;
  246.         bool isHtml;
  247.         string rootElementName;
  248.         string href;
  249.         string errorLogFile;
  250.         Entity lastError;
  251.         string proxy;
  252.         TextReader inputStream;
  253.         string syslit;
  254.         string pubid;
  255.         string subset;
  256.         string docType;
  257.         WhitespaceHandling whitespaceHandling;
  258.         CaseFolding folding = CaseFolding.None;
  259.         bool stripDocType = true;      
  260.         string startTag;
  261.         public SgmlReader() {
  262.             Init();    
  263.             this.nametable = new NameTable();
  264.         }
  265.         /// <summary>
  266.         /// Specify the SgmlDtd object directly.  This allows you to cache the Dtd and share
  267.         /// it across multipl SgmlReaders.  To load a DTD from a URL use the SystemLiteral property.
  268.         /// </summary>
  269.         public SgmlDtd Dtd {
  270.             get { 
  271.                 LazyLoadDtd(this.baseUri);
  272.                 return this.dtd; 
  273.             }
  274.             set { this.dtd = value; }
  275.         }
  276.         private void LazyLoadDtd(Uri baseUri) {
  277.             if (this.dtd == null) {
  278.                 if (this.syslit == null || this.syslit == "") {
  279.                     if (this.docType != null && StringUtilities.EqualsIgnoreCase(this.docType, "html")) {
  280.                         Assembly a = typeof(SgmlReader).Assembly;
  281.                         string name = a.FullName.Split(',')[0]+".Html.dtd";
  282.                         Stream stm = a.GetManifestResourceStream(name);
  283.                         if (stm != null){
  284.                             StreamReader sr = new StreamReader(stm);
  285.                             this.dtd = SgmlDtd.Parse(baseUri, "HTML", null, sr, null, this.proxy, this.nametable);
  286.                         }
  287.                     }
  288.                 } else { 
  289.                     if (baseUri != null) {
  290.                         baseUri = new Uri(baseUri, this.syslit);
  291.                     } else if (this.baseUri != null) {
  292.                         baseUri = new Uri(this.baseUri, this.syslit);
  293.                     } else {
  294.                         baseUri = new Uri(new Uri(Directory.GetCurrentDirectory()+"\"), this.syslit);
  295.                     }
  296.                     this.dtd = SgmlDtd.Parse(baseUri, this.docType, this.pubid, baseUri.AbsoluteUri, this.subset, this.proxy, this.nametable);
  297.                 }
  298.                 if (this.dtd != null && this.dtd.Name != null){
  299.                     switch (this.CaseFolding){
  300.                         case CaseFolding.ToUpper:
  301.                             this.rootElementName = this.dtd.Name.ToUpper();
  302.                             break;
  303.                         case CaseFolding.ToLower:
  304.                             this.rootElementName = this.dtd.Name.ToLower();
  305.                             break;
  306.                         default:
  307.                             this.rootElementName = this.dtd.Name;
  308.                             break;
  309.                     }
  310.                     this.isHtml = StringUtilities.EqualsIgnoreCase(this.dtd.Name, "html");
  311.                 }
  312.             }
  313.         }
  314.         /// <summary>
  315.         /// The name of root element specified in the DOCTYPE tag.
  316.         /// </summary>
  317.         public string DocType {
  318.             get { return this.docType; }
  319.             set { this.docType = value; }
  320.         }
  321.         /// <summary>
  322.         /// The PUBLIC identifier in the DOCTYPE tag
  323.         /// </summary>
  324.         public string PublicIdentifier {
  325.             get { return this.pubid; }
  326.             set { this.pubid = value; }
  327.         }
  328.         /// <summary>
  329.         /// The SYSTEM literal in the DOCTYPE tag identifying the location of the DTD.
  330.         /// </summary>
  331.         public string SystemLiteral {
  332.             get { return this.syslit; }
  333.             set { this.syslit = value; }
  334.         }
  335.         /// <summary>
  336.         /// The DTD internal subset in the DOCTYPE tag
  337.         /// </summary>
  338.         public string InternalSubset {
  339.             get { return this.subset; }
  340.             set { this.subset = value; }
  341.         }
  342.         /// <summary>
  343.         /// The input stream containing SGML data to parse.
  344.         /// You must specify this property or the Href property before calling Read().
  345.         /// </summary>
  346.         public TextReader InputStream {
  347.             get { return this.inputStream; }
  348.             set { this.inputStream = value; Init();}
  349.         }
  350.         /// <summary>
  351.         /// Sometimes you need to specify a proxy server in order to load data via HTTP
  352.         /// from outside the firewall.  For example: "itgproxy:80".
  353.         /// </summary>
  354.         public string WebProxy {
  355.             get { return this.proxy; }
  356.             set { this.proxy = value; }
  357.         }
  358.         /// <summary>
  359.         /// The base Uri is used to resolve relative Uri's like the SystemLiteral and
  360.         /// Href properties.  This is a method because BaseURI is a read-only
  361.         /// property on the base XmlReader class.
  362.         /// </summary>
  363.         public void SetBaseUri(string uri)  {
  364.             this.baseUri = new Uri(uri);
  365.         }
  366.         /// <summary>
  367.         /// Specify the location of the input SGML document as a URL.
  368.         /// </summary>
  369.         public string Href {
  370.             get { return this.href; }
  371.             set { this.href = value; 
  372.                 Init();
  373.                 if (this.baseUri == null) {
  374.                     if (this.href.IndexOf("://")>0) {
  375.                         this.baseUri = new Uri(this.href);
  376.                     } else {
  377.                         this.baseUri = new Uri("file:///"+Directory.GetCurrentDirectory()+"//");
  378.                     }
  379.                 }
  380.             }
  381.         }
  382.         /// <summary>
  383.         /// Whether to strip out the DOCTYPE tag from the output (default true)
  384.         /// </summary>
  385.         public bool StripDocType {
  386.             get { return this.stripDocType; }
  387.             set { this.stripDocType = value; }
  388.         }
  389.         public CaseFolding CaseFolding {
  390.             get { return this.folding; }
  391.             set { this.folding = value; }
  392.         }
  393.         /// <summary>
  394.         /// DTD validation errors are written to this stream.
  395.         /// </summary>
  396.         public TextWriter ErrorLog {
  397.             get { return this.log; }
  398.             set { this.log = value; }
  399.         }
  400.         /// <summary>
  401.         /// DTD validation errors are written to this log file.
  402.         /// </summary>
  403.         public string ErrorLogFile {
  404.             get { return this.errorLogFile; }
  405.             set { this.errorLogFile = value; 
  406.                 this.ErrorLog = new StreamWriter(value); }
  407.         }
  408.         void Log(string msg, params string[] args) {
  409.             if (ErrorLog != null) {
  410.                 string err = String.Format(msg, args);
  411.                 if (this.lastError != this.current) {
  412.                     err = err + "    " + this.current.Context();
  413.                     this.lastError = this.current;
  414.                     ErrorLog.WriteLine("### Error:"+err);
  415.                 } else {
  416.                     string path = "";
  417.                     if (this.current.ResolvedUri != null) {
  418.                         path = this.current.ResolvedUri.AbsolutePath;
  419.                     }
  420.                     ErrorLog.WriteLine("### Error in "+
  421.                         path+"#"+
  422.                         this.current.Name+
  423.                         ", line "+this.current.Line + ", position " + this.current.LinePosition + ": "+
  424.                         err);
  425.                 }
  426.             }
  427.         }
  428.         void Log(string msg, char ch) {
  429.             Log(msg, ch.ToString());
  430.         }
  431.         void Init() {
  432.             this.state = State.Initial;
  433.             this.stack = new HWStack(10);
  434.             this.node = Push(null, XmlNodeType.Document, null);
  435.             this.node.IsEmpty = false;
  436.             this.sb = new StringBuilder();
  437.             this.name = new StringBuilder();
  438.             this.poptodepth = 0;
  439.             this.current = null;
  440.             this.partial = '';
  441.             this.endTag = null;
  442.             this.a = null;
  443.             this.apos = 0;
  444.             this.newnode = null;
  445.             this.rootCount = 0;
  446.             this.foundRoot = false;
  447.         }
  448.         Node Push(string name, XmlNodeType nt, string value) {
  449.             Node result = (Node)this.stack.Push();
  450.             if (result == null) {
  451.                 result = new Node();
  452.                 this.stack[this.stack.Count-1] = result;
  453.             }
  454.             result.Reset(name, nt, value);
  455.             this.node = result;
  456.             return result;
  457.         }
  458.         void SwapTopNodes() {
  459.             int top = this.stack.Count-1;
  460.             if (top > 0) {
  461.                 Node n = (Node)this.stack[top - 1];
  462.                 this.stack[top - 1] = this.stack[top];
  463.                 this.stack[top] = n;
  464.             }
  465.         }
  466.         Node Push(Node n) {
  467.             // we have to do a deep clone of the Node object because
  468.             // it is reused in the stack.
  469.             Node n2 = Push(n.Name, n.NodeType, n.Value);
  470.             n2.DtdType = n.DtdType;
  471.             n2.IsEmpty = n.IsEmpty;
  472.             n2.Space = n.Space;
  473.             n2.XmlLang = n.XmlLang;
  474.             n2.CurrentState = n.CurrentState;
  475.             n2.CopyAttributes(n);
  476.             this.node = n2;
  477.             return n2;
  478.         }
  479.         void Pop() {
  480.             if (this.stack.Count > 1) {
  481.                 this.node = (Node)this.stack.Pop();
  482.             }
  483.         }
  484.         Node Top() {
  485.             int top = this.stack.Count - 1;
  486.             if (top > 0) {
  487.                 return (Node)this.stack[top];
  488.             }
  489.             return null;
  490.         }
  491.         public override XmlNodeType NodeType {
  492.             get { 
  493.                 if (this.state == State.Attr) {
  494.                     return XmlNodeType.Attribute;
  495.                 } 
  496.                 else if (this.state == State.AttrValue) {
  497.                     return XmlNodeType.Text;
  498.                 }
  499.                 else if (this.state == State.EndTag || this.state == State.AutoClose) {
  500.                     return XmlNodeType.EndElement;
  501.                 }
  502.                 return this.node.NodeType;
  503.             }
  504.         }
  505.         public override string Name {
  506.             get {
  507.                 return this.LocalName;
  508.             }
  509.         }
  510.         public override string LocalName { 
  511.             get {
  512.                 string result = null;
  513.                 if (this.state == State.Attr) {
  514.                     result = this.a.Name;
  515.                 } 
  516.                 else if (this.state == State.AttrValue) {
  517.                     result = null;
  518.                 }
  519.                 else {
  520.                     result = this.node.Name;
  521.                 }
  522.                 return result;
  523.             }
  524.         }
  525.         public override string NamespaceURI { 
  526.             get {
  527.                 // SGML has no namespaces, unless this turned out to be an xmlns attribute.
  528.                 if (this.state == State.Attr && StringUtilities.EqualsIgnoreCase(this.a.Name, "xmlns")) {
  529.                     return "http://www.w3.org/2000/xmlns/";
  530.                 }
  531.                 return String.Empty;
  532.             }
  533.         }
  534.         public override string Prefix { 
  535.             get {
  536.                 // SGML has no namespaces.
  537.                 return String.Empty;
  538.             }
  539.         }
  540.         public override bool HasValue { 
  541.             get {
  542.                 if (this.state == State.Attr || this.state == State.AttrValue) {
  543.                     return true;
  544.                 }
  545.                 return (this.node.Value != null);
  546.             }
  547.         }
  548.         public override string Value { 
  549.             get {
  550.                 if (this.state == State.Attr || this.state == State.AttrValue) {
  551.                     return this.a.Value;
  552.                 }
  553.                 return this.node.Value;
  554.             }
  555.         }
  556.         public override int Depth { 
  557.             get {
  558.                 if (this.state == State.Attr) {
  559.                     return this.stack.Count;
  560.                 } 
  561.                 else if (this.state == State.AttrValue) {
  562.                     return this.stack.Count+1;
  563.                 }
  564.                 return this.stack.Count-1;
  565.             }
  566.         }
  567.         public override string BaseURI { 
  568.             get {
  569.                 return this.baseUri == null ? "" : this.baseUri.AbsoluteUri;
  570.             }
  571.         }
  572.         public override bool IsEmptyElement { 
  573.             get {
  574.                 if (this.state == State.Markup || this.state == State.Attr || this.state == State.AttrValue) {
  575.                     return this.node.IsEmpty;
  576.                 }
  577.                 return false;
  578.             }
  579.         }
  580.         public override bool IsDefault { 
  581.             get {
  582.                 if (this.state == State.Attr || this.state == State.AttrValue) 
  583.                     return this.a.IsDefault;
  584.                 return false;
  585.             }
  586.         }
  587.         public override char QuoteChar { 
  588.             get {
  589.                 if (this.a != null) return this.a.QuoteChar;
  590.                 return '';
  591.             }
  592.         }
  593.         public override XmlSpace XmlSpace { 
  594.             get {
  595.                 for (int i = this.stack.Count-1; i > 1; i--) {
  596.                     Node n = (Node)this.stack[i];
  597.                     XmlSpace xs = n.Space;
  598.                     if (xs != XmlSpace.None) return xs;
  599.                 }
  600.                 return XmlSpace.None;
  601.             }
  602.         }
  603.         public override string XmlLang { 
  604.             get {
  605.                 for (int i = this.stack.Count-1; i > 1; i--) {
  606.                     Node n = (Node)this.stack[i];
  607.                     string xmllang = n.XmlLang;
  608.                     if (xmllang != null) return xmllang;
  609.                 }
  610.                 return String.Empty;
  611.             }
  612.         }
  613.         public WhitespaceHandling WhitespaceHandling {
  614.             get {
  615.                 return this.whitespaceHandling;
  616.             } 
  617.             set {
  618.                 this.whitespaceHandling = value;
  619.             }
  620.         }
  621.         public override int AttributeCount { 
  622.             get {
  623.                 if (this.state == State.Attr || this.state == State.AttrValue) 
  624.                     return 0;
  625.                 if (this.node.NodeType == XmlNodeType.Element ||
  626.                     this.node.NodeType == XmlNodeType.DocumentType)
  627.                     return this.node.AttributeCount;
  628.                 return 0;
  629.             }
  630.         }
  631.         public override string GetAttribute(string name) {
  632.             if (this.state != State.Attr && this.state != State.AttrValue) {
  633.                 int i = this.node.GetAttribute(name);
  634.                 if (i>=0) return GetAttribute(i);
  635.             }
  636.             return null;
  637.         }
  638.         public override string GetAttribute(string name, string namespaceURI) {
  639.             return GetAttribute(name); // SGML has no namespaces.
  640.         }
  641.         public override string GetAttribute(int i) {
  642.             if (this.state != State.Attr && this.state != State.AttrValue) {
  643.                 Attribute a = this.node.GetAttribute(i);
  644.                 if (a != null)
  645.                     return a.Value;
  646.             }
  647.             throw new IndexOutOfRangeException();
  648.         }
  649.         public override string this [ int i ] { 
  650.             get {
  651.                 return GetAttribute(i);
  652.             }
  653.         }
  654.         public override string this [ string name ] { 
  655.             get {
  656.                 return GetAttribute(name);
  657.             }
  658.         }
  659.         public override string this [ string name,string namespaceURI ] { 
  660.             get {
  661.                 return GetAttribute(name, namespaceURI);
  662.             }
  663.         }
  664.         public override bool MoveToAttribute(string name) {
  665.             int i = this.node.GetAttribute(name);
  666.             if (i>=0) {
  667.                 MoveToAttribute(i);
  668.                 return true;
  669.             }
  670.             return false;
  671.         }
  672.         public override bool MoveToAttribute(string name, string ns) {
  673.             return MoveToAttribute(name);
  674.         }
  675.         public override void MoveToAttribute(int i) {
  676.             Attribute a = this.node.GetAttribute(i);
  677.             if (a != null) {
  678.                 this.apos = i;
  679.                 this.a = a;
  680.                 if (this.state != State.Attr) {
  681.                     this.node.CurrentState = this.state;//save current state.
  682.                 }
  683.                 this.state = State.Attr;
  684.                 return;
  685.             }
  686.             throw new IndexOutOfRangeException();
  687.         }
  688.         public override bool MoveToFirstAttribute() {
  689.             if (this.node.AttributeCount>0) {
  690.                 MoveToAttribute(0);
  691.                 return true;
  692.             }
  693.             return false;
  694.         }
  695.         public override bool MoveToNextAttribute() {
  696.             if (this.state != State.Attr && this.state != State.AttrValue) {
  697.                 return MoveToFirstAttribute();
  698.             }
  699.             if (this.apos<this.node.AttributeCount-1) {
  700.                 MoveToAttribute(this.apos+1);
  701.                 return true;
  702.             }
  703.             return false;
  704.         }
  705.         public override bool MoveToElement() {
  706.             if (this.state == State.Attr || this.state == State.AttrValue) {
  707.                 this.state = this.node.CurrentState;
  708.                 this.a = null;
  709.                 return true;
  710.             }
  711.             return (this.node.NodeType == XmlNodeType.Element);
  712.         }
  713.         bool IsHtml {
  714.             get {
  715.               return this.isHtml;
  716.             }
  717.         }
  718.         public Encoding GetEncoding(){
  719.             if (this.current == null) {
  720.                 OpenInput();
  721.             }
  722.             return this.current.GetEncoding();
  723.         }
  724.         void OpenInput(){
  725.             LazyLoadDtd(this.baseUri);
  726.             if (this.Href != null) {
  727.                 this.current = new Entity("#document", null, this.href, this.proxy);
  728.             } else if (this.inputStream != null) {
  729.                 this.current = new Entity("#document", null, this.inputStream, this.proxy);           
  730.             } else {
  731.                 throw new InvalidOperationException("You must specify input either via Href or InputStream properties");
  732.             }
  733.             this.current.Html = this.IsHtml;
  734.             this.current.Open(null, this.baseUri);
  735.             if (this.current.ResolvedUri != null)
  736.                 this.baseUri = this.current.ResolvedUri;
  737.             if (this.current.Html && this.dtd == null){
  738.                 this.docType = "HTML";
  739.                 LazyLoadDtd(this.baseUri);
  740.             }
  741.         }
  742.         public override bool Read() {
  743.             if (current == null) {
  744.                 OpenInput();
  745.             }
  746.             State start = this.state;
  747.             if (node.Simulated) {
  748.                 // return the next node
  749.                 node.Simulated = false;
  750.                 this.node = Top();
  751.                 this.state = this.node.CurrentState;
  752.                 return true;
  753.             }
  754.             bool foundnode = false;
  755.             while (! foundnode) {
  756.                 switch (this.state) {
  757.                     case State.Initial:
  758.                         this.state = State.Markup;
  759.                         this.current.ReadChar();
  760.                         goto case State.Markup;
  761.                     case State.Eof:
  762.                         if (this.current.Parent != null) {
  763.                             this.current.Close();
  764.                             this.current = this.current.Parent;
  765.                         } else {                           
  766.                             return false;
  767.                         }
  768.                         break;
  769.                     case State.EndTag:
  770.                         if (this.endTag == (object)this.node.Name) {
  771.                             Pop(); // we're done!
  772.                             this.state = State.Markup;
  773.                             goto case State.Markup;
  774.                         }                     
  775.                         Pop(); // close one element
  776.                         foundnode = true;// return another end element.
  777.                         break;
  778.                     case State.Markup:
  779.                         if (this.node.IsEmpty) {
  780.                             Pop();
  781.                         }
  782.                         Node n = this.node;
  783.                         foundnode = ParseMarkup();
  784.                         break;
  785.                     case State.PartialTag:
  786.                         Pop(); // remove text node.
  787.                         this.state = State.Markup;
  788.                         foundnode = ParseTag(this.partial);
  789.                         break;
  790.                     case State.PseudoStartTag:
  791.                         foundnode = ParseStartTag('<');                        
  792.                         break;
  793.                     case State.AutoClose:
  794.                         Pop(); // close next node.
  795.                         if (this.stack.Count <= this.poptodepth) {
  796.                             this.state = State.Markup;
  797.                             if (this.newnode != null) {
  798.                                 Push(this.newnode); // now we're ready to start the new node.
  799.                                 this.newnode = null;
  800.                                 this.state = State.Markup;
  801.                             } else if (this.node.NodeType == XmlNodeType.Document) {
  802.                                 this.state = State.Eof;
  803.                                 goto case State.Eof;
  804.                             }
  805.                         } 
  806.                         foundnode = true;
  807.                         break;
  808.                     case State.CData:
  809.                         foundnode = ParseCData();
  810.                         break;
  811.                     case State.Attr:
  812.                         goto case State.AttrValue;
  813.                     case State.AttrValue:
  814.                         this.state = State.Markup;
  815.                         goto case State.Markup;
  816.                     case State.Text:
  817.                         Pop();
  818.                         goto case State.Markup;
  819.                     case State.PartialText:
  820.                         if (ParseText(this.current.Lastchar, false)) {
  821.                             this.node.NodeType = XmlNodeType.Whitespace;
  822.                         }
  823.                         foundnode = true;
  824.                         break;
  825.                 }
  826.                 if (foundnode && this.node.NodeType == XmlNodeType.Whitespace && this.whitespaceHandling == WhitespaceHandling.None) {
  827.                     // strip out whitespace (caller is probably pretty printing the XML).
  828.                     foundnode = false;
  829.                 }
  830.                 if (!foundnode && this.state == State.Eof && this.stack.Count>1) {
  831.                     this.poptodepth = 1;
  832.                     state = State.AutoClose;
  833.                     this.node = Top();
  834.                     return true;
  835.                 }
  836.             }
  837.             if (!foundRoot && (this.NodeType == XmlNodeType.Element ||
  838.                     this.NodeType == XmlNodeType.Text ||
  839.                     this.NodeType == XmlNodeType.CDATA)) {
  840.                 foundRoot = true;
  841.                 if (this.IsHtml && (this.NodeType != XmlNodeType.Element ||
  842.                     string.Compare(this.LocalName, "html", true, System.Globalization.CultureInfo.InvariantCulture) != 0)) {
  843.                     // Simulate an HTML root element!
  844.                     this.node.CurrentState = this.state;
  845.                     Node root = Push("html", XmlNodeType.Element, null);
  846.                     SwapTopNodes(); // make html the outer element.
  847.                     this.node = root;
  848.                     root.Simulated = true;
  849.                     root.IsEmpty = false;
  850.                     this.state = State.Markup;
  851.                     //this.state = State.PseudoStartTag;
  852.                     //this.startTag = name;
  853.                 }
  854.                 return true;
  855.             }
  856.             return true;
  857.         }
  858.         bool ParseMarkup() {
  859.             char ch = this.current.Lastchar;
  860.             if (ch == '<') {
  861.                 ch = this.current.ReadChar();
  862.                 return ParseTag(ch);
  863.             } 
  864.             else if (ch != Entity.EOF) {
  865.                 if (this.node.DtdType != null && this.node.DtdType.ContentModel.DeclaredContent == DeclaredContent.CDATA) {
  866.                     // e.g. SCRIPT or STYLE tags which contain unparsed character data.
  867.                     this.partial = '';
  868.                     this.state = State.CData;
  869.                     return false;
  870.                 }
  871.                 else if (ParseText(ch, true)) {
  872.                     this.node.NodeType = XmlNodeType.Whitespace;
  873.                 }
  874.                 return true;
  875.             }
  876.             this.state = State.Eof;
  877.             return false;
  878.         }
  879.         static string declterm = " trn><";
  880.         bool ParseTag(char ch) {
  881.             if (ch == '%') {
  882.                 return ParseAspNet();
  883.             }
  884.             if (ch == '!') {
  885.                 ch = this.current.ReadChar();
  886.                 if (ch == '-') {
  887.                     return ParseComment();
  888.                 } else if (ch == '[') {
  889.                     return ParseConditionalBlock();
  890.                 }else if (ch != '_' && !Char.IsLetter(ch)) {
  891.                     // perhaps it's one of those nasty office document hacks like '<![if ! ie ]>'
  892.                     string value = this.current.ScanToEnd(this.sb, "Recovering", ">"); // skip it
  893.                     Log("Ignoring invalid markup '<!"+value+">");
  894.                     return false;
  895.                 }
  896.                 else {
  897.                     string name = this.current.ScanToken(this.sb, SgmlReader.declterm, false);
  898.                     if (name == "DOCTYPE") {
  899.                         ParseDocType();
  900.                         // In SGML DOCTYPE SYSTEM attribute is optional, but in XML it is required,
  901.                         // therefore if there is no SYSTEM literal then add an empty one.
  902.                         if (this.GetAttribute("SYSTEM") == null && this.GetAttribute("PUBLIC") != null) {
  903.                             this.node.AddAttribute("SYSTEM", "", '"', this.folding == CaseFolding.None);
  904.                         }
  905.                         if (stripDocType) {
  906.                             return false;
  907.                         } else {
  908.                             this.node.NodeType = XmlNodeType.DocumentType;
  909.                             return true;
  910.                         }
  911.                     } 
  912.                     else {
  913.                         Log("Invalid declaration '<!{0}...'.  Expecting '<!DOCTYPE' only.", name);
  914.                         this.current.ScanToEnd(null, "Recovering", ">"); // skip it
  915.                         return false;
  916.                     }
  917.                 }
  918.             } 
  919.             else if (ch == '?') {
  920.                 this.current.ReadChar();// consume the '?' character.
  921.                 return ParsePI();
  922.             }
  923.             else if (ch == '/') {
  924.                 return ParseEndTag();
  925.             }
  926.             else {
  927.                 return ParseStartTag(ch);
  928.             }
  929.             return true;
  930.         }
  931.         string ScanName(string terminators) {
  932.             string name = this.current.ScanToken(this.sb, terminators, false);
  933.             switch (this.folding){
  934.                 case CaseFolding.ToUpper:
  935.                     name = name.ToUpper();
  936.                     break;
  937.                 case CaseFolding.ToLower:
  938.                     name = name.ToLower();
  939.                     break;
  940.             }
  941.             return this.nametable.Add(name);
  942.         }
  943.         static string tagterm = " trn=/><";
  944.         static string aterm = " trn='"/>";
  945.         static string avterm = " trn>";
  946.         bool ParseStartTag(char ch) {
  947.             string name = null;
  948.             if (state != State.PseudoStartTag){
  949.                 if (SgmlReader.tagterm.IndexOf(ch)>=0) {
  950.                     this.sb.Length = 0;
  951.                     this.sb.Append('<');
  952.                     this.state = State.PartialText;
  953.                     return false;
  954.                 }
  955.                 name = ScanName(SgmlReader.tagterm);                
  956.             } else {
  957.                 name = this.startTag;
  958.                 state = State.Markup;
  959.             }
  960.             Node n = Push(name, XmlNodeType.Element, null);
  961.             n.IsEmpty = false;
  962.             Validate(n);
  963.             ch = this.current.SkipWhitespace();
  964.             while (ch != Entity.EOF && ch != '>') {
  965.                 if (ch == '/') {
  966.                     n.IsEmpty = true;
  967.                     ch = this.current.ReadChar();
  968.                     if (ch != '>') {
  969.                         Log("Expected empty start tag '/>' sequence instead of '{0}'", ch);
  970.                         this.current.ScanToEnd(null, "Recovering", ">");
  971.                         return false;
  972.                     }
  973.                     break;
  974.                 } 
  975.                 else if (ch == '<') {
  976.                     Log("Start tag '{0}' is missing '>'", name);
  977.                     break;
  978.                 }
  979.                 string aname = ScanName(SgmlReader.aterm);
  980.                 ch = this.current.SkipWhitespace();
  981.                 if (aname == "," || aname == "=" || aname == ":" || aname == ";") {
  982.                     continue;
  983.                 }
  984.                 string value = null;
  985.                 char quote = '';
  986.                 if (ch == '=' || ch == '"' || ch == ''') {
  987.                     if (ch == '=' ){
  988.                         this.current.ReadChar();
  989.                         ch = this.current.SkipWhitespace();
  990.                     }
  991.                     if (ch == ''' || ch == '"') {
  992.                         quote = ch;
  993.                         value = ScanLiteral(this.sb, ch);
  994.                     } 
  995.                     else if (ch != '>') {
  996.                         string term = SgmlReader.avterm;
  997.                         value = this.current.ScanToken(this.sb, term, false);
  998.                     }
  999.                 } 
  1000.                 if (aname.Length > 0) {
  1001.                     Attribute a = n.AddAttribute(aname, value, quote, this.folding == CaseFolding.None);
  1002.                     if (a == null) {
  1003.                         Log("Duplicate attribute '{0}' ignored", aname);
  1004.                     } else {
  1005.                         ValidateAttribute(n, a);
  1006.                     }
  1007.                 }
  1008.                 ch = this.current.SkipWhitespace();
  1009.             }
  1010.             if (ch == Entity.EOF) {
  1011.                 this.current.Error("Unexpected EOF parsing start tag '{0}'", name);
  1012.             } 
  1013.             else if (ch == '>') {
  1014.                 this.current.ReadChar(); // consume '>'
  1015.             }
  1016.             if (this.Depth == 1) {
  1017.                 if (this.rootCount == 1) {
  1018.                     // Hmmm, we found another root level tag, soooo, the only
  1019.                     // thing we can do to keep this a valid XML document is stop
  1020.                     this.state = State.Eof;
  1021.                     return false;
  1022.                 }
  1023.                 this.rootCount++;
  1024.             }
  1025.             ValidateContent(n);
  1026.             return true;
  1027.         }
  1028.         bool ParseEndTag() {
  1029.             this.state = State.EndTag;
  1030.             this.current.ReadChar(); // consume '/' char.
  1031.             string name = this.ScanName(SgmlReader.tagterm);
  1032.             char ch = this.current.SkipWhitespace();
  1033.             if (ch != '>') {
  1034.                 Log("Expected empty start tag '/>' sequence instead of '{0}'", ch);
  1035.                 this.current.ScanToEnd(null, "Recovering", ">");
  1036.             }
  1037.             this.current.ReadChar(); // consume '>'
  1038.             this.endTag = name;
  1039.             // Make sure there's a matching start tag for it.                        
  1040.             bool caseInsensitive = (this.folding == CaseFolding.None);
  1041.             this.node = (Node)this.stack[this.stack.Count-1];
  1042.             for (int i = this.stack.Count-1; i>0; i--) {
  1043.                 Node n = (Node)this.stack[i];
  1044.                 if (caseInsensitive && string.Compare(n.Name, name, true) == 0) {
  1045.                     this.endTag = n.Name;
  1046.                     return true;
  1047.                 } else if ((object)n.Name == (object)name) {
  1048.                     return true;
  1049.                 }
  1050.             }
  1051.             Log("No matching start tag for '</{0}>'", name);
  1052.             this.state = State.Markup;
  1053.             return false;
  1054.         }
  1055.         bool ParseAspNet() {
  1056.             string value = "<%" + this.current.ScanToEnd(this.sb, "AspNet", "%>") + "%>";
  1057.             Push(null, XmlNodeType.CDATA, value);         
  1058.             return true;
  1059.         }
  1060.         bool ParseComment() {
  1061.             char ch = this.current.ReadChar();
  1062.             if (ch != '-') {
  1063.                 Log("Expecting comment '<!--' but found {0}", ch);
  1064.                 this.current.ScanToEnd(null, "Comment", ">");
  1065.                 return false;
  1066.             }
  1067.             string value = this.current.ScanToEnd(this.sb, "Comment", "-->");
  1068.             
  1069.             // Make sure it's a valid comment!
  1070.             int i = value.IndexOf("--");
  1071.             while (i>=0) {
  1072.                 int j = i+2;
  1073.                 while (j<value.Length && value[j]=='-')
  1074.                     j++;
  1075.                 if (i>0) {
  1076.                     value = value.Substring(0, i-1)+"-"+value.Substring(j);
  1077.                 } 
  1078.                 else {
  1079.                     value = "-"+value.Substring(j);
  1080.                 }
  1081.                 i = value.IndexOf("--");
  1082.             }
  1083.             if (value.Length>0 && value[value.Length-1] == '-') {
  1084.                 value += " "; // '-' cannot be last character
  1085.             }
  1086.             Push(null, XmlNodeType.Comment, value);         
  1087.             return true;
  1088.         }
  1089.         static string cdataterm = "trn[<>";
  1090.         bool ParseConditionalBlock(){
  1091.             char ch = current.ReadChar(); // skip '['
  1092.             ch = current.SkipWhitespace();
  1093.             string name = current.ScanToken(sb, cdataterm, false);
  1094.             if (name != "CDATA"){
  1095.                 Log("Expecting CDATA but found '{0}'", name);
  1096.                 current.ScanToEnd(null, "CDATA", ">");
  1097.                 return false;
  1098.             }
  1099.             ch = current.SkipWhitespace();
  1100.             if (ch != '[') {
  1101.                 Log("Expecting '[' but found '{0}'", ch);
  1102.                 current.ScanToEnd(null, "CDATA", ">");
  1103.                 return false;
  1104.             }
  1105.             string value = current.ScanToEnd(sb, "CDATA", "]]>");
  1106.                         
  1107.             Push(null, XmlNodeType.CDATA, value);         
  1108.             return true;
  1109.         }
  1110.         static string dtterm = " trn>";
  1111.         void ParseDocType() {
  1112.             char ch = this.current.SkipWhitespace();
  1113.             string name = this.ScanName(SgmlReader.dtterm);
  1114.             Push(name, XmlNodeType.DocumentType, null);
  1115.             ch = this.current.SkipWhitespace();
  1116.             if (ch != '>') {
  1117.                 string subset = "";
  1118.                 string pubid = "";
  1119.                 string syslit = "";
  1120.                 if (ch != '[') {
  1121.                     string token = this.current.ScanToken(this.sb, SgmlReader.dtterm, false);
  1122.                     if (token == "PUBLIC") {
  1123.                         ch = this.current.SkipWhitespace();
  1124.                         if (ch == '"' || ch == ''') {
  1125.                             pubid = this.current.ScanLiteral(this.sb, ch);
  1126.                             this.node.AddAttribute(token, pubid, ch, this.folding == CaseFolding.None);  
  1127.                         }
  1128.                     } 
  1129.                     else if (token != "SYSTEM") {
  1130.                         Log("Unexpected token in DOCTYPE '{0}'", token);
  1131.                         this.current.ScanToEnd(null, "DOCTYPE", ">");
  1132.                     }
  1133.                     ch = this.current.SkipWhitespace();
  1134.                     if (ch == '"' || ch == ''') {
  1135.                         token = this.nametable.Add("SYSTEM");
  1136.                         syslit = this.current.ScanLiteral(this.sb, ch);
  1137.                         this.node.AddAttribute(token, syslit, ch, this.folding == CaseFolding.None);  
  1138.                     }
  1139.                     ch = this.current.SkipWhitespace();
  1140.                 }
  1141.                 if (ch == '[') {
  1142.                     subset = this.current.ScanToEnd(this.sb, "Internal Subset", "]");
  1143.                     this.node.Value = subset;
  1144.                 }
  1145.                 ch = this.current.SkipWhitespace();
  1146.                 if (ch != '>') {
  1147.                     Log("Expecting end of DOCTYPE tag, but found '{0}'", ch);
  1148.                     this.current.ScanToEnd(null, "DOCTYPE", ">");
  1149.                 }
  1150.                 if (this.dtd == null) {
  1151.                     this.docType = name;
  1152.                     this.pubid = pubid;
  1153.                     this.syslit = syslit;
  1154.                     this.subset = subset;
  1155.                     LazyLoadDtd(this.current.ResolvedUri);
  1156.                 }
  1157.             }           
  1158.             this.current.ReadChar();
  1159.         }
  1160.         static string piterm = " trn?";
  1161.         bool ParsePI() {
  1162.             string name = this.current.ScanToken(this.sb, SgmlReader.piterm, false);
  1163.             string value = null;
  1164.             if (this.current.Lastchar != '?') {
  1165.                 // Notice this is not "?>".  This is because Office generates bogus PI's that end with "/>".
  1166.                 value = this.current.ScanToEnd(this.sb, "Processing Instruction", ">");
  1167.             }
  1168.             else {
  1169.                 // error recovery.
  1170.                 value = this.current.ScanToEnd(this.sb, "Processing Instruction", ">");
  1171.             }
  1172.             // skip xml declarations, since these are generated in the output instead.
  1173.             if (name != "xml"){
  1174.                 Push(nametable.Add(name), XmlNodeType.ProcessingInstruction, value);
  1175.                 return true;
  1176.             }
  1177.             return false;
  1178.         }
  1179.         bool ParseText(char ch, bool newtext) {
  1180.             bool ws = !newtext || this.current.IsWhitespace;
  1181.             if (newtext) this.sb.Length = 0;
  1182.             //this.sb.Append(ch);
  1183.             //ch = this.current.ReadChar();
  1184.             this.state = State.Text;
  1185.             while (ch != Entity.EOF) {
  1186.                 if (ch == '<') {
  1187.                     ch = this.current.ReadChar();
  1188.                     if (ch == '/' || ch == '!' || ch == '?' || Char.IsLetter(ch)) {
  1189.                         // Hit a tag, so return XmlNodeType.Text token
  1190.                         // and remember we partially started a new tag.
  1191.                         this.state = State.PartialTag;
  1192.                         this.partial = ch;
  1193.                         break;
  1194.                     } 
  1195.                     else {
  1196.                         // not a tag, so just proceed.
  1197.                         this.sb.Append('<'); 
  1198.                         this.sb.Append(ch);
  1199.                         ws = false;
  1200.                         ch = this.current.ReadChar();
  1201.                     }
  1202.                 } 
  1203.                 else if (ch == '&') {
  1204.                     ExpandEntity(this.sb, '<');
  1205.                     ws = false;
  1206.                     ch = this.current.Lastchar;
  1207.                 }
  1208.                 else {
  1209.                     if (!this.current.IsWhitespace) ws = false;
  1210.                     this.sb.Append(ch);
  1211.                     ch = this.current.ReadChar();
  1212.                 }
  1213.             }
  1214.             string value = this.sb.ToString();
  1215.             Push(null, XmlNodeType.Text, value);
  1216.             return ws;
  1217.         }
  1218.         // This version is slightly different from Entity.ScanLiteral in that
  1219.         // it also expands entities.
  1220.         public string ScanLiteral(StringBuilder sb, char quote) {
  1221.             sb.Length = 0;
  1222.             char ch = this.current.ReadChar();
  1223.             while (ch != Entity.EOF && ch != quote ) {
  1224.                 if (ch == '&') {
  1225.                     ExpandEntity(this.sb, quote);
  1226.                     ch = this.current.Lastchar;
  1227.                 }               
  1228.                 else {
  1229.                     sb.Append(ch);
  1230.                     ch = this.current.ReadChar();
  1231.                 }
  1232.             }
  1233.             this.current.ReadChar(); // consume end quote.          
  1234.             return sb.ToString();
  1235.         }
  1236.         bool ParseCData() {
  1237.             // Like ParseText(), only it doesn't allow elements in the content.  
  1238.             // It allows comments and processing instructions and text only and
  1239.             // text is not returned as text but CDATA (since it may contain angle brackets).
  1240.             // And initial whitespace is ignored.  It terminates when we hit the
  1241.             // end tag for the current CDATA node (e.g. </style>).
  1242.             bool ws = this.current.IsWhitespace;
  1243.             this.sb.Length = 0;
  1244.             char ch = this.current.Lastchar;
  1245.             if (this.partial != '') {
  1246.                 Pop(); // pop the CDATA
  1247.                 switch (this.partial) {
  1248.                     case '!':
  1249.                         this.partial = ' '; // and pop the comment next time around
  1250.                         return ParseComment();
  1251.                     case '?':
  1252.                         this.partial = ' '; // and pop the PI next time around
  1253.                         return ParsePI();
  1254.                     case '/':
  1255.                         this.state = State.EndTag;
  1256.                         return true;    // we are done!
  1257.                     case ' ':
  1258.                         break; // means we just needed to pop the Comment, PI or CDATA.
  1259.                 }
  1260.             } else {
  1261.                 ch = this.current.ReadChar();
  1262.             }            
  1263.             
  1264.             // if this.partial == '!' then parse the comment and return
  1265.             // if this.partial == '?' then parse the processing instruction and return.            
  1266.             while (ch != Entity.EOF) {
  1267.                 if (ch == '<') {
  1268.                     ch = this.current.ReadChar();
  1269.                     if (ch == '!') {
  1270.                         ch = this.current.ReadChar();
  1271.                         if (ch == '-') {
  1272.                             // return what CDATA we have accumulated so far
  1273.                             // then parse the comment and return to here.
  1274.                             if (ws) {
  1275.                                 this.partial = ' '; // pop comment next time through
  1276.                                 return ParseComment();
  1277.                             } 
  1278.                             else {
  1279.                                 // return what we've accumulated so far then come
  1280.                                 // back in and parse the comment.
  1281.                                 this.partial = '!';
  1282.                                 break; 
  1283.                             }
  1284. #if FIX
  1285.                         } else if (ch == '['){
  1286.                             // We are about to wrap this node as a CDATA block because of it's
  1287.                             // type in the DTD, but since we found a CDATA block in the input
  1288.                             // we have to parse it as a CDATA block, otherwise we will attempt
  1289.                             // to output nested CDATA blocks which of course is illegal.
  1290.                             if (this.ParseConditionalBlock()){
  1291.                                 this.partial = ' ';
  1292.                                 return true;
  1293.                             }
  1294. #endif
  1295.                         } else {
  1296.                             // not a comment, so ignore it and continue on.
  1297.                             this.sb.Append('<');
  1298.                             this.sb.Append('!');
  1299.                             this.sb.Append(ch);
  1300.                             ws = false;
  1301.                         }
  1302.                     } 
  1303.                     else if (ch == '?') {
  1304.                         // processing instruction.
  1305.                         this.current.ReadChar();// consume the '?' character.
  1306.                         if (ws) {
  1307.                             this.partial = ' '; // pop PI next time through
  1308.                             return ParsePI();
  1309.                         } 
  1310.                         else {
  1311.                             this.partial = '?';
  1312.                             break;
  1313.                         }
  1314.                     }
  1315.                     else if (ch == '/') {
  1316.                         // see if this is the end tag for this CDATA node.
  1317.                         string temp = this.sb.ToString();
  1318.                         if (ParseEndTag() && this.endTag == (object)this.node.Name) {
  1319.                             if (ws || temp == "") {
  1320.                                 // we are done!
  1321.                                 return true;
  1322.                             } 
  1323.                             else {
  1324.                                 // return CDATA text then the end tag
  1325.                                 this.partial = '/';
  1326.                                 this.sb.Length = 0; // restore buffer!
  1327.                                 this.sb.Append(temp); 
  1328.                                 this.state = State.CData;
  1329.                                 break;
  1330.                             }
  1331.                         } 
  1332.                         else {
  1333.                             // wrong end tag, so continue on.
  1334.                             this.sb.Length = 0; // restore buffer!
  1335.                             this.sb.Append(temp); 
  1336.                             this.sb.Append("</"+this.endTag+">");
  1337.                             ws = false;
  1338.                         }
  1339.                     }
  1340.                     else {
  1341.                         // must be just part of the CDATA block, so proceed.
  1342.                         this.sb.Append('<'); 
  1343.                         this.sb.Append(ch);
  1344.                         ws = false;
  1345.                     }
  1346.                 } 
  1347.                 else {
  1348.                     if (!this.current.IsWhitespace && ws) ws = false;
  1349.                     this.sb.Append(ch);
  1350.                 }
  1351.                 ch = this.current.ReadChar();
  1352.             }
  1353.             string value = this.sb.ToString();
  1354.             Push(null, XmlNodeType.CDATA, value);
  1355.             if (this.partial == '')
  1356.                 this.partial = ' ';// force it to pop this CDATA next time in.
  1357.             return true;
  1358.         }
  1359.         void ExpandEntity(StringBuilder sb, char terminator) {
  1360.             char ch = this.current.ReadChar();
  1361.             if (ch == '#') {
  1362.                 string charent = this.current.ExpandCharEntity();
  1363.                 sb.Append(charent);
  1364.                 ch = this.current.Lastchar;
  1365.             } 
  1366.             else {
  1367.                 this.name.Length = 0;
  1368.                 while (ch != Entity.EOF && 
  1369.                     (Char.IsLetter(ch) || ch == '_' || ch == '-')) {
  1370.                     this.name.Append(ch);
  1371.                     ch = this.current.ReadChar();
  1372.                 }
  1373.                 string name = this.name.ToString();
  1374.                 if (this.dtd != null && name != "") {
  1375.                     Entity e = (Entity)this.dtd.FindEntity(name);
  1376.                     if (e != null) {
  1377.                         if (e.Internal) {
  1378.                             sb.Append(e.Literal);
  1379.                             if (ch != terminator) 
  1380.                                 ch = this.current.ReadChar();
  1381.                             return;
  1382.                         } 
  1383.                         else {
  1384.                             Entity ex = new Entity(name, e.PublicId, e.Uri, this.current.Proxy);
  1385.                             e.Open(this.current, new Uri(e.Uri));
  1386.                             this.current = ex;
  1387.                             this.current.ReadChar();
  1388.                             return;
  1389.                         }
  1390.                     } 
  1391.                     else {
  1392.                         Log("Undefined entity '{0}'", name);
  1393.                     }
  1394.                 }
  1395.                 // Entity is not defined, so just keep it in with the rest of the
  1396.                 // text.
  1397.                 sb.Append("&");
  1398.                 sb.Append(name);
  1399.                 if (ch != terminator) {
  1400.                     sb.Append(ch);
  1401.                     ch = this.current.ReadChar();
  1402.                 }
  1403.             }
  1404.         }
  1405.         public override bool EOF { 
  1406.             get {
  1407.                 return this.state == State.Eof;
  1408.             }
  1409.         }
  1410.         public override void Close() {
  1411.             if (this.current != null) {
  1412.                 this.current.Close();
  1413.                 this.current = null;
  1414.             }
  1415.             if (this.log != null) {
  1416.                 this.log.Close();
  1417.                 this.log = null;
  1418.             }
  1419.         }
  1420.         public override ReadState ReadState { 
  1421.             get {
  1422.                 if (this.state == State.Initial) return ReadState.Initial;
  1423.                 else if (this.state == State.Eof) return ReadState.EndOfFile;
  1424.                 return ReadState.Interactive;
  1425.             }
  1426.         }
  1427.         public override string ReadString() {
  1428.             if (this.node.NodeType == XmlNodeType.Element) {
  1429.                 this.sb.Length = 0;
  1430.                 while (Read()) {
  1431.                     switch (this.NodeType) {
  1432.                         case XmlNodeType.CDATA:
  1433.                         case XmlNodeType.SignificantWhitespace:
  1434.                         case XmlNodeType.Whitespace:
  1435.                         case XmlNodeType.Text:
  1436.                             this.sb.Append(this.node.Value);
  1437.                             break;
  1438.                         default:
  1439.                             return this.sb.ToString();
  1440.                     }
  1441.                 }
  1442.                 return this.sb.ToString();
  1443.             }
  1444.             return this.node.Value;
  1445.         }
  1446.         public override string ReadInnerXml() {
  1447.             StringWriter sw = new StringWriter();
  1448.             XmlTextWriter xw = new XmlTextWriter(sw);
  1449.             xw.Formatting = Formatting.Indented;
  1450.             switch (this.NodeType) {
  1451.                 case XmlNodeType.Element:
  1452.                     Read();
  1453.                     while (!this.EOF && this.NodeType != XmlNodeType.EndElement) {
  1454.                         xw.WriteNode(this, true);
  1455.                     }
  1456.                     Read(); // consume the end tag
  1457.                     break;
  1458.                 case XmlNodeType.Attribute:
  1459.                     sw.Write(this.Value);
  1460.                     break;
  1461.                 default:
  1462.                     // return empty string according to XmlReader spec.
  1463.                     break;
  1464.             }
  1465.             xw.Close();
  1466.             return sw.ToString();
  1467.         }
  1468.         public override string ReadOuterXml() {
  1469.             StringWriter sw = new StringWriter();
  1470.             XmlTextWriter xw = new XmlTextWriter(sw);
  1471.             xw.Formatting = Formatting.Indented;
  1472.             xw.WriteNode(this, true);
  1473.             xw.Close();
  1474.             return sw.ToString();
  1475.         }
  1476.         public override XmlNameTable NameTable { 
  1477.             get {
  1478.                 return this.nametable;
  1479.             }
  1480.         }
  1481.         public override string LookupNamespace(string prefix) {           
  1482.             return null;// there are no namespaces in SGML.
  1483.         }
  1484.         public override void ResolveEntity() {
  1485.             // We never return any entity reference nodes, so this should never be called.
  1486.             throw new InvalidOperationException("Not on an entity reference.");
  1487.         }
  1488.         public override bool ReadAttributeValue() {
  1489.             if (this.state == State.Attr) {
  1490.                 this.state = State.AttrValue;
  1491.                 return true;
  1492.             } 
  1493.             else if (this.state == State.AttrValue) {
  1494.                 return false;
  1495.             }
  1496.             throw new InvalidOperationException("Not on an attribute.");
  1497.         }   
  1498.         void Validate(Node node) {
  1499.             if (this.dtd != null) {
  1500.                 ElementDecl e = this.dtd.FindElement(node.Name);
  1501.                 if (e != null) {
  1502.                     node.DtdType = e;
  1503.                     if (e.ContentModel.DeclaredContent == DeclaredContent.EMPTY) 
  1504.                         node.IsEmpty = true;
  1505.                 }
  1506.             }
  1507.         }
  1508.         void ValidateAttribute(Node node, Attribute a) {
  1509.             ElementDecl e = node.DtdType;
  1510.             if (e != null) {
  1511.                 AttDef ad = e.FindAttribute(a.Name);
  1512.                 if (ad != null) {
  1513.                     a.DtdType = ad;
  1514.                 }
  1515.             }
  1516.         }   
  1517.         void ValidateContent(Node node) {
  1518.             if (this.dtd != null) {
  1519.                 // See if this element is allowed inside the current element.
  1520.                 // If it isn't, then auto-close elements until we find one
  1521.                 // that it is allowed to be in.                                  
  1522.                 string name = this.nametable.Add(node.Name.ToUpper()); // DTD is in upper case
  1523.                 int i = 0;
  1524.                 int top = this.stack.Count-2;
  1525.                 if (node.DtdType != null) { 
  1526.                     // it is a known element, let's see if it's allowed in the
  1527.                     // current context.
  1528.                     for (i = top; i>0; i--) {
  1529.                         Node n = (Node)this.stack[i];
  1530.                         if (n.IsEmpty) 
  1531.                             continue; // we'll have to pop this one
  1532.                         ElementDecl f = n.DtdType;
  1533.                         if (f != null) {
  1534.                             if (f.Name == this.dtd.Name)
  1535.                                 break; // can't pop the root element.
  1536.                             if (f.CanContain(name, this.dtd)) {
  1537.                                 break;
  1538.                             } 
  1539.                             else if (!f.EndTagOptional) {
  1540.                                 // If the end tag is not optional then we can't
  1541.                                 // auto-close it.  We'll just have to live with the
  1542.                                 // junk we've found and move on.
  1543.                                 break;
  1544.                             }
  1545.                         } 
  1546.                         else {
  1547.                             // Since we don't understand this tag anyway,
  1548.                             // we might as well allow this content!
  1549.                             break;
  1550.                         }
  1551.                     }
  1552.                 }
  1553.                 if (i == 0) {
  1554.                     // Tag was not found or is not allowed anywhere, ignore it and 
  1555.                     // continue on.
  1556.                 }
  1557.                 else if (i < top) {
  1558.                     Node n = (Node)this.stack[top];
  1559.                     if (i == top - 1 && name == n.Name) {
  1560.                         // e.g. p not allowed inside p, not an interesting error.
  1561.                     } else {
  1562.                         string closing = "";
  1563.                         for (int k = top; k >= i+1; k--) {
  1564.                             if (closing != "") closing += ",";
  1565.                             Node n2 = (Node)this.stack[k];
  1566.                             closing += "<"+n2.Name+">";
  1567.                         }
  1568.                         Log("Element '{0}' not allowed inside '{1}', closing {2}.", 
  1569.                             name, n.Name, closing);
  1570.                     }
  1571.                     this.state = State.AutoClose;
  1572.                     this.newnode = node;
  1573.                     Pop(); // save this new node until we pop the others
  1574.                     this.poptodepth = i+1;
  1575.                 }
  1576.             }
  1577.         }
  1578.     }
  1579. }