xml/soap/webservice

开发平台：
C/C++

DTDScanner.cpp：源码内容
							/*
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation, and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.ibm.com .  For more information
 * on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
/*
 * $Log: DTDScanner.cpp,v $
 * Revision 1.22  2001/12/06 17:51:18  tng
 * Performance Enhancement. The ContentSpecNode constructor always copied the QName
 * that was passed to it.  Added a second constructor that allows the QName to be just assigned, not copied.
 * That was because there are some cases in which a temporary QName was constructed, passed to ContentSpecNode, and then deleted.
 * There were examples of that in TraverseSchema and DTDScanner.
 * By Henry Zongaro.
 *
 * Revision 1.21  2001/11/13 13:27:28  tng
 * Move root element check to XMLScanner.
 *
 * Revision 1.20  2001/09/05 20:49:10  knoaman
 * Fix for complexTypes with mixed content model.
 *
 * Revision 1.19  2001/08/02 16:54:39  tng
 * Reset some Scanner flags in scanReset().
 *
 * Revision 1.18  2001/07/13 16:57:11  tng
 * ScanId fix.
 *
 * Revision 1.17  2001/07/12 20:10:18  tng
 * Partial Markup in Parameter Entity is validity constraint and thus should be just error, not fatal error.
 *
 * Revision 1.16  2001/07/10 21:09:39  tng
 * Give proper error messsage when scanning external id.
 *
 * Revision 1.15  2001/07/10 20:56:17  tng
 * Should check the first char of PI Target Name.
 *
 * Revision 1.14  2001/07/09 13:42:20  tng
 * Partial Markup in Parameter Entity is validity constraint and thus should be just error, not fatal error.
 *
 * Revision 1.13  2001/07/05 14:05:29  tng
 * Encoding String must present for external entity text decl.
 *
 * Revision 1.12  2001/07/05 13:12:19  tng
 * Standalone checking is validity constraint and thus should be just error, not fatal error:
 *
 * Revision 1.11  2001/06/25 14:39:54  knoaman
 * Fix bug #965 - submitted by Matt Lovett
 *
 * Revision 1.10  2001/06/22 12:42:33  tng
 * [Bug 2257] 1.5 thinks a <?xml-stylesheet ...> tag is a <?xml ...> tag
 *
 * Revision 1.9  2001/06/21 14:25:53  knoaman
 * Fix for bug 1946
 *
 * Revision 1.8  2001/06/04 13:25:50  tng
 * the start tag "<?xml" could be followed by (#x20 | #x9 | #xD | #xA)+.  Fixed by Pei Yong Zhang.
 *
 * Revision 1.7  2001/05/28 20:54:06  tng
 * Schema: allocate a fDTDValidator, fSchemaValidator explicitly to avoid wrong cast
 *
 * Revision 1.6  2001/05/11 13:27:09  tng
 * Copyright update.
 *
 * Revision 1.5  2001/05/03 20:34:36  tng
 * Schema: SchemaValidator update
 *
 * Revision 1.4  2001/04/23 18:54:35  tng
 * Reuse grammar should allow users to use any stored element decl as root.  Fixed by Erik Rydgren.
 *
 * Revision 1.3  2001/04/19 18:17:21  tng
 * Schema: SchemaValidator update, and use QName in Content Model
 *
 * Revision 1.2  2001/03/30 16:35:17  tng
 * Schema: Whitespace normalization.
 *
 * Revision 1.1  2001/03/21 21:56:20  tng
 * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
 *
 */
// ---------------------------------------------------------------------------
//  Includes
// ---------------------------------------------------------------------------
#include <util/BinMemInputStream.hpp>
#include <util/FlagJanitor.hpp>
#include <util/Janitor.hpp>
#include <util/XMLUniDefs.hpp>
#include <util/UnexpectedEOFException.hpp>
#include <sax/InputSource.hpp>
#include <framework/XMLDocumentHandler.hpp>
#include <framework/XMLEntityHandler.hpp>
#include <internal/EndOfEntityException.hpp>
#include <internal/XMLScanner.hpp>
#include <validators/common/ContentSpecNode.hpp>
#include <validators/common/MixedContentModel.hpp>
#include <validators/DTD/DTDEntityDecl.hpp>
#include <validators/DTD/DocTypeHandler.hpp>
#include <validators/DTD/DTDScanner.hpp>
// ---------------------------------------------------------------------------
//  Local methods
// ---------------------------------------------------------------------------
//
//  This method automates the grunt work of looking at a char and see if its
//  a repetition suffix. If so, it creates a new correct rep node and wraps
//  the pass node in it. Otherwise, it returns the previous node.
//
static ContentSpecNode*
makeRepNode(const XMLCh testCh, ContentSpecNode* const prevNode)
{
    if (testCh == chQuestion)
    {
        return new ContentSpecNode
        (
            ContentSpecNode::ZeroOrOne
            , prevNode
            , 0
        );
    }
     else if (testCh == chPlus)
    {
        return new ContentSpecNode
        (
            ContentSpecNode::OneOrMore
            , prevNode
            , 0
        );
    }
     else if (testCh == chAsterisk)
    {
        return new ContentSpecNode
        (
            ContentSpecNode::ZeroOrMore
            , prevNode
            , 0
        );
    }
    // Just return the incoming node
    return prevNode;
}
// ---------------------------------------------------------------------------
//  DTDValidator: Constructors and Destructor
// ---------------------------------------------------------------------------
DTDScanner::DTDScanner(DTDGrammar* dtdGrammar, NameIdPool<DTDEntityDecl>* entityDeclPool, DocTypeHandler* const    docTypeHandler) :
    fDocTypeHandler(docTypeHandler)
    , fDumAttDef(0)
    , fDumElemDecl(0)
    , fDumEntityDecl(0)
    , fInternalSubset(false)
    , fNextAttrId(1)
    , fDTDGrammar(dtdGrammar)
    , fPEntityDeclPool(0)
    , fEntityDeclPool(entityDeclPool)
    , fDocTypeReaderId(0)
{
    fPEntityDeclPool = new NameIdPool<DTDEntityDecl>(109);
}
DTDScanner::~DTDScanner()
{
    delete fDumAttDef;
    delete fDumElemDecl;
    delete fDumEntityDecl;
    delete fPEntityDeclPool;
}
// ---------------------------------------------------------------------------
//  DTDScanner: Private scanning methods
// ---------------------------------------------------------------------------
bool DTDScanner::checkForPERef(const  bool    spaceRequired
                                , const bool    inLiteral
                                , const bool    inMarkup
                                , const bool    throwAtEndExt)
{
    bool gotSpace = false;
    //
    //  See if we have any spaces up front. If so, then skip them and set
    //  the gotSpaces flag.
    //
    if (fReaderMgr->skippedSpace())
    {
        fReaderMgr->skipPastSpaces();
        gotSpace = true;
    }
    // If the next char is a percent, then expand the PERef
    if (!fReaderMgr->skippedChar(chPercent))
       return gotSpace;
    while (true)
    {
       if (!expandPERef(false, inLiteral, inMarkup, throwAtEndExt))
          fScanner->emitError(XMLErrs::ExpectedEntityRefName);
       // And skip any more spaces in the expanded value
       if (fReaderMgr->skippedSpace())
       {
          fReaderMgr->skipPastSpaces();
          gotSpace = true;
       }
       if (!fReaderMgr->skippedChar(chPercent))
          break;
    }
    return gotSpace;
}
bool DTDScanner::expandPERef( const   bool    scanExternal
                                , const bool    inLiteral
                                , const bool    inMarkup
                                , const bool    throwEndOfExt)
{
    fScanner->setHasNoDTD(false);
    XMLBufBid bbName(fBufMgr);
    //
    //  If we are in the internal subset and in markup, then this is
    //  an error but we go ahead and do it anyway.
    //
    if (fInternalSubset && inMarkup)
        fScanner->emitError(XMLErrs::PERefInMarkupInIntSubset);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedPEName);
        // Skip the semicolon if that's what we ended up on
        fReaderMgr->skippedChar(chSemiColon);
        return false;
    }
    // If no terminating semicolon, emit an error but try to keep going
    if (!fReaderMgr->skippedChar(chSemiColon))
        fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
    //
    //  Look it up in the PE decl pool and see if it exists. If not, just
    //  emit an error and continue.
    //
    XMLEntityDecl* decl = fPEntityDeclPool->getByKey(bbName.getRawBuffer());
    if (!decl)
    {
        // XML 1.0 Section 4.1
        if (fScanner->getStandalone()) {
            // no need to check fScanner->fHasNoDTD which is for sure false
            // since we are in expandPERef already
            fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
        }
        else {
            if (fScanner->getDoValidation())
                fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
        }
        return false;
    }
    //
    //  If we are a standalone document, then it has to have been declared
    //  in the internal subset. Keep going though.
    //
    if (fScanner->getDoValidation() && fScanner->getStandalone() && !decl->getDeclaredInIntSubset())
        fScanner->getValidator()->emitError(XMLValid::IllegalRefInStandalone, bbName.getRawBuffer());
    //
    //  Okee dokee, we found it. So create either a memory stream with
    //  the entity value contents, or a file stream if its an external
    //  entity.
    //
    if (decl->isExternal())
    {
        // And now create a reader to read this entity
        InputSource* srcUsed;
        XMLReader* reader = fReaderMgr->createReader
        (
            decl->getSystemId()
            , decl->getPublicId()
            , false
            , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral
            , XMLReader::Type_PE
            , XMLReader::Source_External
            , srcUsed
        );
        // Put a janitor on the source so its cleaned up on exit
        Janitor<InputSource> janSrc(srcUsed);
        // If the creation failed then throw an exception
        if (!reader)
            ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId());
        // Set the 'throw at end' flag, to the one we were given
        reader->setThrowAtEnd(throwEndOfExt);
        //
        //  Push the reader. If its a recursive expansion, then emit an error
        //  and return an failure.
        //
        if (!fReaderMgr->pushReader(reader, decl))
        {
            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
            return false;
        }
        //
        //  If the caller wants us to scan the external entity, then lets
        //  do that now.
        //
        if (scanExternal)
        {
            XMLEntityHandler* entHandler = fScanner->getEntityHandler();
            // If we have an entity handler, tell it we are starting this entity
            if (entHandler)
                entHandler->startInputSource(*srcUsed);
            //
            //  Scan the external entity now. The parameter tells it that
            //  it is not in an include section. Get the current reader
            //  level so we can catch partial markup errors and be sure
            //  to get back to here if we get an exception out of the
            //  ext subset scan.
            //
            const unsigned int readerNum = fReaderMgr->getCurrentReaderNum();
            try
            {
                scanExtSubsetDecl(false);
            }
            catch(...)
            {
                // Pop the reader back to the original level
                fReaderMgr->cleanStackBackTo(readerNum);
                // End the input source, even though its not happy
                if (entHandler)
                    entHandler->endInputSource(*srcUsed);
                throw;
            }
            // If we have an entity handler, tell it we are ending this entity
            if (entHandler)
                entHandler->endInputSource(*srcUsed);
        }
    }
     else
    {
        // Create a reader over a memory stream over the entity value
        XMLReader* valueReader = fReaderMgr->createIntEntReader
        (
            decl->getName()
            , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral
            , XMLReader::Type_PE
            , decl->getValue()
            , decl->getValueLen()
            , false
        );
        //
        //  Trt to push the entity reader onto the reader manager stack,
        //  where it will become the subsequent input. If it fails, that
        //  means the entity is recursive, so issue an error. The reader
        //  will have just been discarded, but we just keep going.
        //
        if (!fReaderMgr->pushReader(valueReader, decl))
            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
    }
    return true;
}
bool DTDScanner::getQuotedString(XMLBuffer& toFill)
{
    // Reset the target buffer
    toFill.reset();
    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr->skipIfQuote(quoteCh))
        return false;
    while (true)
    {
        // Get another char
        const XMLCh nextCh = fReaderMgr->getNextChar();
        // See if it matches the starting quote char
        if (nextCh == quoteCh)
            break;
        //
        //  We should never get either an end of file null char here. If we
        //  do, just fail. It will be handled more gracefully in the higher
        //  level code that called us.
        //
        if (!nextCh)
            return false;
        // Else add it to the buffer
        toFill.append(nextCh);
    }
    return true;
}
XMLAttDef*
DTDScanner::scanAttDef(DTDElementDecl& parentElem, XMLBuffer& bufToUse)
{
    // Check for PE ref or optional whitespace
    checkForPERef(false, false, true);
    // Get the name of the attribute
    if (!fReaderMgr->getName(bufToUse))
    {
        fScanner->emitError(XMLErrs::ExpectedAttrName);
        return 0;
    }
    //
    //  Look up this attribute in the parent element's attribute list. If
    //  it already exists, then use the dummy.
    //
    DTDAttDef* decl = parentElem.getAttDef(bufToUse.getRawBuffer());
    if (decl)
    {
        // It already exists, so put out a warning
        fScanner->emitError
        (
            XMLErrs::AttListAlreadyExists
            , bufToUse.getRawBuffer()
            , parentElem.getFullName()
        );
        // Use the dummy decl to parse into and set its name to the name we got
        if (!fDumAttDef)
        {
            fDumAttDef = new DTDAttDef;
            fDumAttDef->setId(fNextAttrId++);
        }
        fDumAttDef->setName(bufToUse.getRawBuffer());
        decl = fDumAttDef;
    }
     else
    {
        //
        //  It does not already exist so create a new one, give it the next
        //  available unique id, and add it
        //
        decl = new DTDAttDef(bufToUse.getRawBuffer());
        decl->setId(fNextAttrId++);
        decl->setExternalAttDeclaration(isReadingExternalEntity());
        parentElem.addAttDef(decl);
    }
    // Set a flag to indicate whether we are doing a dummy parse
    const bool isIgnored = (decl == fDumAttDef);
    // Space is required here, so check for PE ref, and require space
    if (!checkForPERef(true, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    //
    //  Next has to be one of the attribute type strings. This tells us what
    //  is to follow.
    //
    if (fReaderMgr->skippedString(XMLUni::fgCDATAString))
    {
        decl->setType(XMLAttDef::CData);
    }
     else if (fReaderMgr->skippedString(XMLUni::fgIDString))
    {
        if (!fReaderMgr->skippedString(XMLUni::fgRefString))
            decl->setType(XMLAttDef::ID);
        else if (!fReaderMgr->skippedChar(chLatin_S))
            decl->setType(XMLAttDef::IDRef);
        else
            decl->setType(XMLAttDef::IDRefs);
    }
     else if (fReaderMgr->skippedString(XMLUni::fgEntitString))
    {
        if (fReaderMgr->skippedChar(chLatin_Y))
        {
            decl->setType(XMLAttDef::Entity);
        }
         else if (fReaderMgr->skippedString(XMLUni::fgIESString))
        {
            decl->setType(XMLAttDef::Entities);
        }
         else
        {
            fScanner->emitError
            (
                XMLErrs::ExpectedAttributeType
                , decl->getFullName()
                , parentElem.getFullName()
            );
            return 0;
        }
    }
     else if (fReaderMgr->skippedString(XMLUni::fgNmTokenString))
    {
        if (fReaderMgr->skippedChar(chLatin_S))
            decl->setType(XMLAttDef::NmTokens);
        else
            decl->setType(XMLAttDef::NmToken);
    }
     else if (fReaderMgr->skippedString(XMLUni::fgNotationString))
    {
        // Check for PE ref and require space
        if (!checkForPERef(true, false, true))
            fScanner->emitError(XMLErrs::ExpectedWhitespace);
        decl->setType(XMLAttDef::Notation);
        if (!scanEnumeration(*decl, bufToUse, true))
            return 0;
        // Set the value as the enumeration for this decl
        decl->setEnumeration(bufToUse.getRawBuffer());
    }
     else if (fReaderMgr->skippedChar(chOpenParen))
    {
        decl->setType(XMLAttDef::Enumeration);
        if (!scanEnumeration(*decl, bufToUse, false))
            return 0;
        // Set the value as the enumeration for this decl
        decl->setEnumeration(bufToUse.getRawBuffer());
    }
     else
    {
        fScanner->emitError
        (
            XMLErrs::ExpectedAttributeType
            , decl->getFullName()
            , parentElem.getFullName()
        );
        return 0;
    }
    // Space is required here, so check for PE ref, and require space
    if (!checkForPERef(true, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // And then scan for the optional default value declaration
    scanDefaultDecl(*decl);
    // If validating, then do a couple of validation constraints
    if (fScanner->getDoValidation())
    {
        if (decl->getType() == XMLAttDef::ID)
        {
            if ((decl->getDefaultType() != XMLAttDef::Implied)
            &&  (decl->getDefaultType() != XMLAttDef::Required))
            {
                fScanner->getValidator()->emitError(XMLValid::BadIDAttrDefType, decl->getFullName());
            }
        }
        // if attdef is xml:space, check correct enumeration (default|preserve)
        const XMLCh fgXMLSpace[] = { chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_s, chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull };
        if (!XMLString::compareString(decl->getFullName(),fgXMLSpace)) {
            const XMLCh fgPreserve[] = { chLatin_p, chLatin_r, chLatin_e, chLatin_s, chLatin_e, chLatin_r, chLatin_v, chLatin_e, chNull };
            const XMLCh fgDefault[] = { chLatin_d, chLatin_e, chLatin_f, chLatin_a, chLatin_u, chLatin_l, chLatin_t, chNull };
            bool ok = false;
            if (decl->getType() == XMLAttDef::Enumeration) {
                RefVectorOf<XMLCh>* enumVector = XMLString::tokenizeString(decl->getEnumeration());
                int size = enumVector->size();
                ok = (size == 1 &&
                     (!XMLString::compareString(enumVector->elementAt(0), fgDefault) ||
                      !XMLString::compareString(enumVector->elementAt(0), fgPreserve))) ||
                     (size == 2 &&
                     (!XMLString::compareString(enumVector->elementAt(0), fgDefault) &&
                      !XMLString::compareString(enumVector->elementAt(1), fgPreserve))) ||
                     (size == 2 &&
                     (!XMLString::compareString(enumVector->elementAt(1), fgDefault) &&
                      !XMLString::compareString(enumVector->elementAt(0), fgPreserve)));
                delete enumVector;
            }
            if (!ok)
                fScanner->getValidator()->emitError(XMLValid::IllegalXMLSpace);
        }
    }
    // If we have a doc type handler, tell it about this attdef.
    if (fDocTypeHandler)
        fDocTypeHandler->attDef(parentElem, *decl, isIgnored);
    return decl;
}
void DTDScanner::scanAttListDecl()
{
    // Space is required here, so check for a PE ref
    if (!checkForPERef(true, false, true))
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    //
    //  Next should be the name of the element it belongs to, so get a buffer
    //  and get the name into it.
    //
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedElementName);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    //
    //  Find this element's declaration. If it has not been declared yet,
    //  we will force one into the list, but not mark it as declared.
    //
    DTDElementDecl* elemDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
    if (!elemDecl)
    {
        //
        //  Lets fault in a declaration and add it to the pool. We mark
        //  it having been created because of an attlist. Later, if its
        //  declared, this will be updated.
        //
        elemDecl = new DTDElementDecl(bbName.getRawBuffer(), fEmptyNamespaceId);
        elemDecl->setCreateReason(XMLElementDecl::AttList);
        elemDecl->setExternalElemDeclaration(isReadingExternalEntity());
        fDTDGrammar->putElemDecl((XMLElementDecl*) elemDecl);
    }
    // If we have a doc type handler, tell it the att list is starting
    if (fDocTypeHandler)
        fDocTypeHandler->startAttList(*elemDecl);
    //
    //  Now we loop until we are done with all of the attributes in this
    //  list. We need a buffer to use for local processing.
    //
    XMLBufBid   bbTmp(fBufMgr);
    XMLBuffer&  tmpBuf = bbTmp.getBuffer();
    bool        seenAnId = false;
    while (true)
    {
        // Get the next char out and see what it tells us to do
        const XMLCh nextCh = fReaderMgr->peekNextChar();
        // Watch for EOF
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        if (nextCh == chCloseAngle)
        {
            // We are done with this attribute list
            fReaderMgr->getNextChar();
            break;
        }
         else if (XMLReader::isWhitespace(nextCh))
        {
            //
            //  If advanced callbacks are enabled and we have a doc
            //  type handler, then gather up the white space and call
            //  back on the doctype handler. Otherwise, just skip
            //  whitespace.
            //
            if (fDocTypeHandler)
            {
                fReaderMgr->getSpaces(tmpBuf);
                fDocTypeHandler->doctypeWhitespace
                (
                    tmpBuf.getRawBuffer()
                    , tmpBuf.getLen()
                );
            }
             else
            {
                fReaderMgr->skipPastSpaces();
            }
        }
         else if (nextCh == chPercent)
        {
            // Eat the percent and expand the ref
            fReaderMgr->getNextChar();
            expandPERef(false, false, true);
        }
         else
        {
            //
            //  It must be an attribute name, so scan it. We let
            //  it use our local buffer for its name scanning.
            //
            XMLAttDef* attDef = scanAttDef(*elemDecl, tmpBuf);
            if (!attDef)
            {
                fReaderMgr->skipPastChar(chCloseAngle);
                break;
            }
            //
            //  If we are validating and its an ID type, then we have to
            //  make sure that we have not seen an id attribute yet. Set
            //  the flag to say that we've seen one now also.
            //
            if (fScanner->getDoValidation())
            {
                if (attDef->getType() == XMLAttDef::ID)
                {
                    if (seenAnId)
                        fScanner->getValidator()->emitError(XMLValid::MultipleIdAttrs, elemDecl->getFullName());
                    seenAnId = true;
                }
            }
        }
    }
    // If we have a doc type handler, tell it the att list is ending
    if (fDocTypeHandler)
        fDocTypeHandler->endAttList(*elemDecl);
}
//
//  This method is called to scan the value of an attribute in content. This
//  involves some normalization and replacement of general entity and
//  character references.
//
//  End of entity's must be dealt with here. During DTD scan, they can come
//  from external entities. During content, they can come from any entity.
//  We just eat the end of entity and continue with our scan until we come
//  to the closing quote. If an unterminated value causes us to go through
//  subsequent entities, that will cause errors back in the calling code,
//  but there's little we can do about it here.
//
bool DTDScanner::scanAttValue(const   XMLCh* const        attrName
                                ,       XMLBuffer&          toFill
                                , const XMLAttDef::AttTypes type)
{
    enum States
    {
        InWhitespace
        , InContent
    };
    // Reset the target buffer
    toFill.reset();
    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr->skipIfQuote(quoteCh))
        return false;
    //
    //  We have to get the current reader because we have to ignore closing
    //  quotes until we hit the same reader again.
    //
    const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
    //
    //  Loop until we get the attribute value. Note that we use a double
    //  loop here to avoid the setup/teardown overhead of the exception
    //  handler on every round.
    //
    XMLCh   nextCh;
    XMLCh   secondCh = 0;
    States  curState = InContent;
    bool    firstNonWS = false;
    bool    gotLeadingSurrogate = false;
    bool    escaped;
    while (true)
    {
    try
    {
        while(true)
        {
            // Get another char. Use second char from prevous is its there
            if (secondCh)
            {
                nextCh = secondCh;
                secondCh = 0;
            }
             else
            {
                nextCh = fReaderMgr->getNextChar();
            }
            if (!nextCh)
                ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
            // Check for our ending quote in the same entity
            if (nextCh == quoteCh)
            {
                if (curReader == fReaderMgr->getCurrentReaderNum())
                    return true;
                // Watch for spillover into a previous entity
                if (curReader > fReaderMgr->getCurrentReaderNum())
                {
                    fScanner->emitError(XMLErrs::PartialMarkupInEntity);
                    return false;
                }
            }
            //
            //  Check for an entity ref now, before we let it affect our
            //  whitespace normalization logic below. We ignore the empty flag
            //  in this one.
            //
            escaped = false;
            if (nextCh == chAmpersand)
            {
                if (scanEntityRef(nextCh, secondCh, escaped) != EntityExp_Returned)
                {
                    gotLeadingSurrogate = false;
                    continue;
                }
            }
            // Check for correct surrogate pairs
            if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
            {
                if (gotLeadingSurrogate)
                    fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                else
                    gotLeadingSurrogate = true;
            }
             else
            {
                if (gotLeadingSurrogate)
                {
                    if ((nextCh < 0xDC00) && (nextCh > 0xDFFF))
                        fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                }
                gotLeadingSurrogate = false;
                // Its got to at least be a valid XML character
                if (!XMLReader::isXMLChar(nextCh))
                {
                    XMLCh tmpBuf[9];
                    XMLString::binToText
                    (
                        nextCh
                        , tmpBuf
                        , 8
                        , 16
                    );
                    fScanner->emitError
                    (
                        XMLErrs::InvalidCharacter
                        , attrName
                        , tmpBuf
                    );
                }
            }
            //
            //  If its not escaped, then make sure its not a < character, which
            //  is not allowed in attribute values.
            //
            if (!escaped && (nextCh == chOpenAngle))
                fScanner->emitError(XMLErrs::BracketInAttrValue, attrName);
            //
            //  If the attribute is a CDATA type we do simple replacement of
            //  tabs and new lines with spaces, if the character is not escaped
            //  by way of a char ref.
            //
            //  Otherwise, we do the standard non-CDATA normalization of
            //  compressing whitespace to single spaces and getting rid of
            //  leading and trailing whitespace.
            //
            if (type == XMLAttDef::CData)
            {
                if (!escaped)
                {
                    if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
                        nextCh = chSpace;
                }
            }
             else
            {
                if (curState == InWhitespace)
                {
                    if (!XMLReader::isWhitespace(nextCh))
                    {
                        if (firstNonWS)
                            toFill.append(chSpace);
                        curState = InContent;
                        firstNonWS = true;
                    }
                     else
                    {
                        continue;
                    }
                }
                 else if (curState == InContent)
                {
                    if (XMLReader::isWhitespace(nextCh))
                    {
                        curState = InWhitespace;
                        continue;
                    }
                    firstNonWS = true;
                }
            }
            // Else add it to the buffer
            toFill.append(nextCh);
        }
    }
    catch(const EndOfEntityException&)
    {
        // Just eat it and continue.
        gotLeadingSurrogate = false;
        escaped = false;
    }
    }
    return true;
}
bool DTDScanner::scanCharRef(XMLCh& first, XMLCh& second)
{
    bool gotOne = false;
    unsigned int value = 0;
    //
    //  Set the radix. Its supposed to be a lower case x if hex. But, in
    //  order to recover well, we check for an upper and put out an error
    //  for that.
    //
    unsigned int radix = 10;
    if (fReaderMgr->skippedChar(chLatin_x))
    {
        radix = 16;
    }
     else if (fReaderMgr->skippedChar(chLatin_X))
    {
        fScanner->emitError(XMLErrs::HexRadixMustBeLowerCase);
        radix = 16;
    }
    while (true)
    {
        const XMLCh nextCh = fReaderMgr->peekNextChar();
        // Watch for EOF
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        // Break out on the terminating semicolon
        if (nextCh == chSemiColon)
        {
            fReaderMgr->getNextChar();
            break;
        }
        //
        //  Convert this char to a binary value, or bail out if its not
        //  one.
        //
        unsigned int nextVal;
        if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
            nextVal = (unsigned int)(nextCh - chDigit_0);
        else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
            nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
        else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
            nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
        else
        {
            //
            //  If we got at least a sigit, then do an unterminated ref
            //  error. Else, do an expected a numerical ref thing.
            //
            if (gotOne)
                fScanner->emitError(XMLErrs::UnterminatedCharRef);
            else
                fScanner->emitError(XMLErrs::ExpectedNumericalCharRef);
            return false;
        }
        //
        //  Make sure its valid for the radix. If not, then just eat the
        //  digit and go on after issueing an error. Else, update the
        //  running value with this new digit.
        //
        if (nextVal >= radix)
        {
            XMLCh tmpStr[2];
            tmpStr[0] = nextCh;
            tmpStr[1] = chNull;
            fScanner->emitError(XMLErrs::BadDigitForRadix, tmpStr);
        }
         else
        {
            value = (value * radix) + nextVal;
        }
        // Indicate that we got at least one good digit
        gotOne = true;
        // Eat the char we just processed
        fReaderMgr->getNextChar();
    }
    // Return the char (or chars)
    if (value >= 0x10000)
    {
        value -= 0x10000;
        first = XMLCh((value >> 10) + 0xD800);
        second = XMLCh((value & 0x3FF) + 0xDC00);
    }
     else
    {
        first = XMLCh(value);
        second = 0;
    }
    return true;
}
ContentSpecNode*
DTDScanner::scanChildren(const DTDElementDecl& elemDecl, XMLBuffer& bufToUse)
{
    // Check for a PE ref here, but don't require spaces
    checkForPERef(false, false, true);
    // We have to check entity nesting here
    unsigned int curReader;
    //
    //  We know that the caller just saw an opening parenthesis, so we need
    //  to parse until we hit the end of it, recursing for other nested
    //  parentheses we see.
    //
    //  We have to check for one up front, since it could be something like
    //  (((a)*)) etc...
    //
    ContentSpecNode* curNode = 0;
    if (fReaderMgr->skippedChar(chOpenParen))
    {
        curReader = fReaderMgr->getCurrentReaderNum();
        // Lets call ourself and get back the resulting node
        curNode = scanChildren(elemDecl, bufToUse);
        // If that failed, no need to go further, return failure
        if (!curNode)
            return 0;
        if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
            fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
    }
     else
    {
        // Not a nested paren, so it must be a leaf node
        if (!fReaderMgr->getName(bufToUse))
        {
            fScanner->emitError(XMLErrs::ExpectedElementName);
            return 0;
        }
        //
        //  Create a leaf node for it. If we can find the element id for
        //  this element, then use it. Else, we have to fault in an element
        //  decl, marked as created because of being in a content model.
        //
        XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
        if (!decl)
        {
            decl = new DTDElementDecl(bufToUse.getRawBuffer(), fEmptyNamespaceId);
            decl->setCreateReason(XMLElementDecl::InContentModel);
            decl->setExternalElemDeclaration(isReadingExternalEntity());
            fDTDGrammar->putElemDecl(decl);
        }
        curNode = new ContentSpecNode(decl->getElementName());
        // Check for a PE ref here, but don't require spaces
        const bool gotSpaces = checkForPERef(false, false, true);
        // Check for a repetition character after the leaf
        const XMLCh repCh = fReaderMgr->peekNextChar();
        ContentSpecNode* tmpNode = makeRepNode(repCh, curNode);
        if (tmpNode != curNode)
        {
            if (gotSpaces)
                fScanner->emitError(XMLErrs::UnexpectedWhitespace);
            fReaderMgr->getNextChar();
            curNode = tmpNode;
        }
    }
    // Check for a PE ref here, but don't require spaces
    checkForPERef(false, false, true);
    //
    //  Ok, the next character tells us what kind of content this particular
    //  model this particular parentesized section is. Its either a choice if
    //  we see ',', a sequence if we see '|', or a single leaf node if we see
    //  a closing paren.
    //
    const XMLCh opCh = fReaderMgr->peekNextChar();
    if ((opCh != chComma)
    &&  (opCh != chPipe)
    &&  (opCh != chCloseParen))
    {
        // Not a legal char, so delete our node and return failure
        fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf);
        delete curNode;
        return 0;
    }
    //
    //  Create the head node of the correct type. We need this to remember
    //  the top of the local tree. If it was a single subexpr, then just
    //  set the head node to the current node. For the others, we'll build
    //  the tree off the second child as we move across.
    //
    ContentSpecNode* headNode = 0;
    ContentSpecNode::NodeTypes curType;
    if (opCh == chComma)
    {
        curType = ContentSpecNode::Sequence;
        headNode = new ContentSpecNode(curType, curNode, 0);
        curNode = headNode;
    }
     else if (opCh == chPipe)
    {
        curType = ContentSpecNode::Choice;
        headNode = new ContentSpecNode(curType, curNode, 0);
        curNode = headNode;
    }
     else
    {
        headNode = curNode;
        fReaderMgr->getNextChar();
    }
    //
    //  If it was a sequence or choice, we just loop until we get to the
    //  end of our section, adding each new leaf or sub expression to the
    //  right child of the current node, and making that new node the current
    //  node.
    //
    if ((opCh == chComma) || (opCh == chPipe))
    {
        ContentSpecNode* lastNode = 0;
        while (true)
        {
            //
            //  The next thing must either be another | or , character followed
            //  by another leaf or subexpression, or a closing parenthesis, or a
            //  PE ref.
            //
            if (fReaderMgr->lookingAtChar(chPercent))
            {
                checkForPERef(false, false, true);
            }
             else if (fReaderMgr->skippedSpace())
            {
                // Just skip whitespace
                fReaderMgr->skipPastSpaces();
            }
             else if (fReaderMgr->skippedChar(chCloseParen))
            {
                //
                //  We've hit the end of this section, so break out. But, we
                //  need to see if we left a partial sequence of choice node
                //  without a second node. If so, we have to undo that and
                //  put its left child into the right node of the previous
                //  node.
                //
                if ((curNode->getType() == ContentSpecNode::Choice)
                ||  (curNode->getType() == ContentSpecNode::Sequence))
                {
                    if (!curNode->getSecond())
                    {
                        ContentSpecNode* saveFirst = curNode->orphanFirst();
                        lastNode->setSecond(saveFirst);
                        curNode = lastNode;
                    }
                }
                break;
            }
             else if (fReaderMgr->skippedChar(opCh))
            {
                // Check for a PE ref here, but don't require spaces
                checkForPERef(false, false, true);
                if (fReaderMgr->skippedChar(chOpenParen))
                {
                    curReader = fReaderMgr->getCurrentReaderNum();
                    // Recurse to handle this new guy
                    ContentSpecNode* subNode = scanChildren(elemDecl, bufToUse);
                    // If it failed, we are done, clean up here and return failure
                    if (!subNode)
                    {
                        delete headNode;
                        return 0;
                    }
                    if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
                        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
                    // Else patch it in and make it the new current
                    ContentSpecNode* newCur = new ContentSpecNode
                    (
                        curType
                        , subNode
                        , 0
                    );
                    curNode->setSecond(newCur);
                    lastNode = curNode;
                    curNode = newCur;
                }
                 else
                {
                    //
                    //  Got to be a leaf node, so get a name. If we cannot get
                    //  one, then clean up and get outa here.
                    //
                    if (!fReaderMgr->getName(bufToUse))
                    {
                        delete headNode;
                        fScanner->emitError(XMLErrs::ExpectedElementName);
                        return 0;
                    }
                    //
                    //  Create a leaf node for it. If we can find the element
                    //  id for this element, then use it. Else, we have to
                    //  fault in an element decl, marked as created because
                    //  of being in a content model.
                    //
                    XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
                    if (!decl)
                    {
                        decl = new DTDElementDecl(bufToUse.getRawBuffer(), fEmptyNamespaceId);
                        decl->setCreateReason(XMLElementDecl::InContentModel);
                        decl->setExternalElemDeclaration(isReadingExternalEntity());
                        fDTDGrammar->putElemDecl(decl);
                    }
                    ContentSpecNode* tmpLeaf = new ContentSpecNode(decl->getElementName());
                    // Check for a repetition character after the leaf
                    const XMLCh repCh = fReaderMgr->peekNextChar();
                    ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf);
                    if (tmpLeaf != tmpLeaf2)
                        fReaderMgr->getNextChar();
                    //
                    //  Create a new sequence or choice node, with the leaf
                    //  (or rep surrounding it) we just got as its first node.
                    //  Make the new node the second node of the current node,
                    //  and then make it the current node.
                    //
                    ContentSpecNode* newCur = new ContentSpecNode
                    (
                        curType
                        , tmpLeaf2
                        , 0
                    );
                    curNode->setSecond(newCur);
                    lastNode = curNode;
                    curNode = newCur;
                }
            }
             else
            {
                // Cannot be valid
                if (opCh == chComma)
                {
                    fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen);
                }
                 else
                {
                    fScanner->emitError
                    (
                        XMLErrs::ExpectedSeqOrCloseParen
                        , elemDecl.getFullName()
                    );
                }
                delete headNode;
                return 0;
            }
        }
    }
    //
    //  We saw the terminating parenthesis so lets check for any repetition
    //  character, and create a node for that, making the head node the child
    //  of it.
    //
    XMLCh repCh = fReaderMgr->peekNextChar();
    ContentSpecNode* retNode = makeRepNode(repCh, headNode);
    if (retNode != headNode)
        fReaderMgr->getNextChar();
    return retNode;
}
//
//  We get here after the '<!--' part of the comment. We scan past the
//  terminating '-->' It will calls the appropriate handler with the comment
//  text, if one is provided. A comment can be in either the document or
//  the DTD, so the fInDocument flag is used to know which handler to send
//  it to.
//
void DTDScanner::scanComment()
{
    enum States
    {
        InText
        , OneDash
        , TwoDashes
    };
    // Get a buffer for this
    XMLBufBid bbComment(fBufMgr);
    //
    //  Get the comment text into a temp buffer. Be sure to use temp buffer
    //  two here, since its to be used for stuff that is potentially longer
    //  than just a name.
    //
    States curState = InText;
    while (true)
    {
        // Get the next character
        const XMLCh nextCh = fReaderMgr->getNextChar();
        //  Watch for an end of file
        if (!nextCh)
        {
            fScanner->emitError(XMLErrs::UnterminatedComment);
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        }
        // Make sure its a valid XML character
        if (!XMLReader::isXMLChar(nextCh))
        {
            XMLCh tmpBuf[9];
            XMLString::binToText
            (
                nextCh
                , tmpBuf
                , 8
                , 16
            );
            fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
        }
        if (curState == InText)
        {
            // If its a dash, go to OneDash state. Otherwise take as text
            if (nextCh == chDash)
                curState = OneDash;
            else
                bbComment.append(nextCh);
        }
         else if (curState == OneDash)
        {
            //
            //  If its another dash, then we change to the two dashes states.
            //  Otherwise, we have to put in the deficit dash and the new
            //  character and go back to InText.
            //
            if (nextCh == chDash)
            {
                curState = TwoDashes;
            }
             else
            {
                bbComment.append(chDash);
                bbComment.append(nextCh);
                curState = InText;
            }
        }
         else if (curState == TwoDashes)
        {
            // The next character must be the closing bracket
            if (nextCh != chCloseAngle)
            {
                fScanner->emitError(XMLErrs::IllegalSequenceInComment);
                fReaderMgr->skipPastChar(chCloseAngle);
                return;
            }
            break;
        }
    }
    // If there is a doc type handler, then pass on the comment stuff
    if (fDocTypeHandler)
        fDocTypeHandler->doctypeComment(bbComment.getRawBuffer());
}
bool DTDScanner::scanContentSpec(DTDElementDecl& toFill)
{
    //
    //  Check for for a couple of the predefined content type strings. If
    //  its not one of these, its got to be a parenthesized reg ex type
    //  expression.
    //
    if (fReaderMgr->skippedString(XMLUni::fgEmptyString))
    {
        toFill.setModelType(DTDElementDecl::Empty);
        return true;
    }
    if (fReaderMgr->skippedString(XMLUni::fgAnyString))
    {
        toFill.setModelType(DTDElementDecl::Any);
        return true;
    }
    // Its got to be a parenthesized regular expression
    if (!fReaderMgr->skippedChar(chOpenParen))
    {
        fScanner->emitError
        (
            XMLErrs::ExpectedContentSpecExpr
            , toFill.getFullName()
        );
        return false;
    }
    // Get the current reader id, so we can test for partial markup
    const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
    // We could have a PE ref here, but don't require space
    checkForPERef(false, false, true);
    //
    //  Now we look for a PCDATA string. If its PCDATA, then it must be a
    //  MIXED model. Otherwise, it must be a regular list of children in
    //  a regular expression perhaps.
    //
    bool status;
    if (fReaderMgr->skippedString(XMLUni::fgPCDATAString))
    {
        // Set the model to mixed
        toFill.setModelType(DTDElementDecl::Mixed_Simple);
        status = scanMixed(toFill);
        //
        //  If we are validating we have to check that there are no multiple
        //  uses of any child elements.
        //
        if (fScanner->getDoValidation())
        {
            if (((const MixedContentModel*)toFill.getContentModel())->hasDups())
                fScanner->getValidator()->emitError(XMLValid::RepElemInMixed);
        }
    }
     else
    {
        //
        //  We have to do a recursive scan of the content model. Create a
        //  buffer for it to use, for efficiency. It returns the top ofthe
        //  content spec node tree, which we set if successful.
        //
        toFill.setModelType(DTDElementDecl::Children);
        XMLBufBid bbTmp(fBufMgr);
        ContentSpecNode* resNode = scanChildren(toFill, bbTmp.getBuffer());
        status = (resNode != 0);
        if (status)
            toFill.setContentSpec(resNode);
    }
    // Make sure we are on the same reader as where we started
    if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
    return status;
}
void DTDScanner::scanDefaultDecl(DTDAttDef& toFill)
{
    if (fReaderMgr->skippedString(XMLUni::fgRequiredString))
    {
        toFill.setDefaultType(XMLAttDef::Required);
        return;
    }
    if (fReaderMgr->skippedString(XMLUni::fgImpliedString))
    {
        toFill.setDefaultType(XMLAttDef::Implied);
        return;
    }
    if (fReaderMgr->skippedString(XMLUni::fgFixedString))
    {
        //
        //  There must be space before the fixed value. If there is not, then
        //  emit an error but keep going.
        //
        if (!fReaderMgr->skippedSpace())
            fScanner->emitError(XMLErrs::ExpectedWhitespace);
        else
            fReaderMgr->skipPastSpaces();
        toFill.setDefaultType(XMLAttDef::Fixed);
    }
     else
    {
        toFill.setDefaultType(XMLAttDef::Default);
    }
    //
    //  If we got here, its fixed or default, so we need to get a value.
    //  If we don't, then emit an error but just set the default value to
    //  an empty string and try to keep going.
    //
    XMLBufBid bbValue(fBufMgr);
    if (!scanAttValue(toFill.getFullName(), bbValue.getBuffer(), toFill.getType()))
        fScanner->emitError(XMLErrs::ExpectedDefAttrDecl);
    toFill.setValue(bbValue.getRawBuffer());
}
//
//  This method handles the high level logic of scanning the DOCType
//  declaration. This kicks off both the scanning of the internal subset and
//  the scanning of the external subset, if any.
//
//  When we get here the '<!DOCTYPE' part has already been scanned, which is
//  what told us that we had a doc type decl to parse.
//
void DTDScanner::scanDocTypeDecl(const bool reuseGrammar)
{
    // There must be some space after DOCTYPE
    if (!fReaderMgr->skipPastSpaces())
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        // Just skip the Doctype declaration and return
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // Get a buffer for the root element
    XMLBufBid bbRootName(fBufMgr);
    //
    //  Get a name from the input, which should be the name of the root
    //  element of the upcoming content.
    //
    fReaderMgr->getName(bbRootName.getBuffer());
    if (bbRootName.isEmpty())
    {
        fScanner->emitError(XMLErrs::NoRootElemInDOCTYPE);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    //
    //  Store the root element name for later check
    //
    fScanner->setRootElemName(bbRootName.getRawBuffer());
    //
    //  This element obviously is not going to exist in the element decl
    //  pool yet, but we need to call docTypeDecl. So force it into
    //  the element decl pool, marked as being there because it was in
    //  the DOCTYPE. Later, when its declared, the status will be updated.
    //
    //  Only do this if we are not reusing the validator! If we are reusing,
    //  then look it up instead. It has to exist!
    //
    DTDElementDecl* rootDecl;
    Janitor<DTDElementDecl> janSrc(0);
    if (reuseGrammar)
    {
        Grammar* fGrammar = fDTDGrammar;
        if (fGrammar->getGrammarType() == Grammar::DTDGrammarType) {
            rootDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbRootName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
            if (rootDecl)
                fDTDGrammar->setRootElemId(rootDecl->getId());
            else {
                rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
                rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
                rootDecl->setExternalElemDeclaration(isReadingExternalEntity());
                fDTDGrammar->setRootElemId(fDTDGrammar->putElemDecl(rootDecl));
            }
        }
        else {
            rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
            rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
            rootDecl->setExternalElemDeclaration(isReadingExternalEntity());
            janSrc.reset(rootDecl);
        }
    }
     else
    {
        rootDecl = new DTDElementDecl(bbRootName.getRawBuffer(), fEmptyNamespaceId);
        rootDecl->setCreateReason(DTDElementDecl::AsRootElem);
        rootDecl->setExternalElemDeclaration(isReadingExternalEntity());
        fDTDGrammar->setRootElemId(fDTDGrammar->putElemDecl(rootDecl));
    }
    // Skip any spaces after the name
    fReaderMgr->skipPastSpaces();
    //
    //  And now if we are looking at a >, then we are done. It is not
    //  required to have an internal or external subset, though why you
    //  would not escapes me.
    //
    if (fReaderMgr->skippedChar(chCloseAngle)) {
        //
        //  If we have a doc type handler and advanced callbacks are enabled,
        //  call the doctype event.
        //
        if (fDocTypeHandler)
            fDocTypeHandler->doctypeDecl(*rootDecl, 0, 0, false);
        return;
    }
    // either internal/external subset
    if(!reuseGrammar) {
        if (fScanner->getValidationScheme() == XMLScanner::Val_Auto)
            fScanner->setDoValidation(true, false);
    }
    bool    hasIntSubset = false;
    bool    hasExtSubset = false;
    XMLCh*  sysId = 0;
    XMLCh*  pubId = 0;
    //
    //  If the next character is '[' then we have no external subset cause
    //  there is no system id, just the opening character of the internal
    //  subset. Else, has to be an id.
    //
    // Just look at the next char, don't eat it.
    if (fReaderMgr->peekNextChar() == chOpenSquare)
    {
        hasIntSubset = true;
    }
     else
    {
        // Indicate we have an external subset
        hasExtSubset = true;
        fScanner->setHasNoDTD(false);
        // Get buffers for the ids
        XMLBufBid bbPubId(fBufMgr);
        XMLBufBid bbSysId(fBufMgr);
        // Get the external subset id
        if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_External))
        {
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        // Get copies of the ids we got
        pubId = XMLString::replicate(bbPubId.getRawBuffer());
        sysId = XMLString::replicate(bbSysId.getRawBuffer());
        // Skip spaces and check again for the opening of an internal subset
        fReaderMgr->skipPastSpaces();
        // Just look at the next char, don't eat it.
        if (fReaderMgr->peekNextChar() == chOpenSquare) {
            hasIntSubset = true;
        }
    }
    // Insure that the ids get cleaned up, if they got allocated
    ArrayJanitor<XMLCh> janSysId(sysId);
    ArrayJanitor<XMLCh> janPubId(pubId);
    //
    //  If we have a doc type handler and advanced callbacks are enabled,
    //  call the doctype event.
    //
    if (fDocTypeHandler)
        fDocTypeHandler->doctypeDecl(*rootDecl, pubId, sysId, hasIntSubset);
    //
    //  Ok, if we had an internal subset, we are just past the [ character
    //  and need to parse that first.
    //
    if (hasIntSubset)
    {
        // Eat the opening square bracket
        fReaderMgr->getNextChar();
        // We can't have any internal subset if we are reusing the validator
        if (reuseGrammar)
            ThrowXML(RuntimeException, XMLExcepts::Val_CantHaveIntSS);
        // Indicate we are in the internal subset now
        FlagJanitor<bool> janContentFlag(&fInternalSubset, true);
        //
        //  And try to scan the internal subset. If we fail, try to recover
        //  by skipping forward tot he close angle and returning.
        //
        if (!scanInternalSubset())
        {
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        //
        //  Do a sanity check that some expanded PE did not propogate out of
        //  the doctype. This could happen if it was terminated early by bad
        //  syntax.
        //
        if (fReaderMgr->getReaderDepth() > 1)
        {
            fScanner->emitError(XMLErrs::PEPropogated);
            // Ask the reader manager to pop back down to the main level
            fReaderMgr->cleanStackBackTo(1);
        }
        fReaderMgr->skipPastSpaces();
    }
    // And that should leave us at the closing > of the DOCTYPE line
    if (!fReaderMgr->skippedChar(chCloseAngle))
    {
        //
        //  Do a special check for the common scenario of an extra ] char at
        //  the end. This is easy to recover from.
        //
        if (fReaderMgr->skippedChar(chCloseSquare)
        &&  fReaderMgr->skippedChar(chCloseAngle))
        {
            fScanner->emitError(XMLErrs::ExtraCloseSquare);
        }
         else
        {
            fScanner->emitError(XMLErrs::UnterminatedDOCTYPE);
            fReaderMgr->skipPastChar(chCloseAngle);
        }
    }
    //
    //  If we had an external subset, then we need to deal with that one
    //  next. If we are reusing the validator, then don't scan it.
    //
    if (hasExtSubset && !reuseGrammar)
    {
        // Indicate we are in the external subset now
        FlagJanitor<bool> janContentFlag(&fInternalSubset, false);
        // And now create a reader to read this entity
        InputSource* srcUsed;
        XMLReader* reader = fReaderMgr->createReader
        (
            sysId
            , pubId
            , false
            , XMLReader::RefFrom_NonLiteral
            , XMLReader::Type_General
            , XMLReader::Source_External
            , srcUsed
        );
        // Put a janitor on the input source
        Janitor<InputSource> janSrc(srcUsed);
        //
        //  If it failed then throw an exception
        //
        if (!reader)
            ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenDTD, srcUsed->getSystemId());
        //
        //  In order to make the processing work consistently, we have to
        //  make this look like an external entity. So create an entity
        //  decl and fill it in and push it with the reader, as happens
        //  with an external entity. Put a janitor on it to insure it gets
        //  cleaned up. The reader manager does not adopt them.
        //
        const XMLCh gDTDStr[] = { chLatin_D, chLatin_T, chLatin_D , chNull };
        DTDEntityDecl* declDTD = new DTDEntityDecl(gDTDStr);
        declDTD->setSystemId(sysId);
        Janitor<DTDEntityDecl> janDecl(declDTD);
        // Mark this one as a throw at end
        reader->setThrowAtEnd(true);
        // And push it onto the stack, with its pseudo name
        fReaderMgr->pushReader(reader, declDTD);
        // Tell it its not in an include section
        scanExtSubsetDecl(false);
    }
}
//
//  This is called after seeing '<!ELEMENT' which indicates that an element
//  markup is starting. This guy scans the rest of it and adds it to the
//  element decl pool if it has not already been declared.
//
void DTDScanner::scanElementDecl()
{
    //
    //  Space is legal (required actually) here so check for a PE ref. If
    //  we don't get our whitespace, then issue and error, but try to keep
    //  going.
    //
    if (!checkForPERef(true, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // Get a buffer for the element name and scan in the name
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedElementName);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // Look this guy up in the element decl pool
    DTDElementDecl* decl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
    //
    //  If it does not exist, then we need to create it. If it does and
    //  its marked as declared, then that's an error, but we still need to
    //  scan over the content model so use the dummy declaration that the
    //  parsing code can fill in.
    //
    if (decl)
    {
        if (decl->isDeclared())
        {
            if (fScanner->getDoValidation())
                fScanner->getValidator()->emitError(XMLValid::ElementAlreadyExists, bbName.getRawBuffer());
            if (!fDumElemDecl)
                fDumElemDecl = new DTDElementDecl(bbName.getRawBuffer(), fEmptyNamespaceId);
            else
                fDumElemDecl->setElementName(bbName.getRawBuffer(),fEmptyNamespaceId);
        }
    }
     else
    {
        //
        //  Create the new empty declaration to fill in and put it into
        //  the decl pool.
        //
        decl = new DTDElementDecl(bbName.getRawBuffer(), fEmptyNamespaceId);
        fDTDGrammar->putElemDecl(decl);
    }
    // Set a flag for whether we will ignore this one
    const bool isIgnored = (decl == fDumElemDecl);
    // Mark this one if being externally declared
    decl->setExternalElemDeclaration(isReadingExternalEntity());
    // Mark this one as being declared
    decl->setCreateReason(XMLElementDecl::Declared);
    // Another check for a PE ref, with at least required whitespace
    if (!checkForPERef(true, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // And now scan the content model for this guy.
    if (!scanContentSpec(*decl))
    {
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // Another check for a PE ref, but we don't require whitespace here
    checkForPERef(false, false, true);
    // And we should have the ending angle bracket
    if (!fReaderMgr->skippedChar(chCloseAngle))
    {
        fScanner->emitError(XMLErrs::UnterminatedElementDecl, bbName.getRawBuffer());
        fReaderMgr->skipPastChar(chCloseAngle);
    }
    //
    //  If we have a DTD handler tell it about the new element decl. We
    //  tell it if its one that can be ignored, cause its an override of a
    //  previously existing decl. If it is being ignored, only call back
    //  if advanced callbacks are enabled.
    //
    if (fDocTypeHandler)
        fDocTypeHandler->elementDecl(*decl, isIgnored);
}
//
//  This method will process a general or parameter entity reference. The
//  entity name and entity text will be stored in the entity pool. The value
//  of the entity will be scanned for any other parameter entity or char
//  references which will be expanded. So the stored value can only have
//  general entity references when done.
//
void DTDScanner::scanEntityDecl()
{
    //
    //  Space is required here, but we cannot check for a PE Ref since
    //  there could be a legal (no-ref) percent sign here. Since any
    //  entity that ended here would be illegal, we just skip spaces
    //  and then check for a percent.
    //
    if (!fReaderMgr->lookingAtSpace())
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    else
        fReaderMgr->skipPastSpaces();
    const bool isPEDecl = fReaderMgr->skippedChar(chPercent);
    //
    //  If a PE decl, then eat the percent and check for spaces or a
    //  PE ref on the other side of it. At least spaces are required.
    //
    if (isPEDecl)
    {
        if (!checkForPERef(true, false, true))
            fScanner->emitError(XMLErrs::ExpectedWhitespace);
    }
    //
    //  Now lets get a name, which should be the name of the entity. We
    //  have to get a buffer for this.
    //
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedPEName);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // If namespaces are enabled, then no colons allowed
    if (fScanner->getDoNamespaces())
    {
        if (XMLString::indexOf(bbName.getRawBuffer(), chColon) != -1)
            fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
    }
    //
    //  See if this entity already exists. If so, then the existing one
    //  takes precendence. So we use the local dummy decl to parse into
    //  and just ignore the results.
    //
    DTDEntityDecl* entityDecl;
    if (isPEDecl)
        entityDecl = fPEntityDeclPool->getByKey(bbName.getRawBuffer());
    else
        entityDecl = fEntityDeclPool->getByKey(bbName.getRawBuffer());
    if (entityDecl)
    {
        if (!fDumEntityDecl)
            fDumEntityDecl = new DTDEntityDecl;
        fDumEntityDecl->setName(bbName.getRawBuffer());
        entityDecl = fDumEntityDecl;
    }
     else
    {
        // Its not in existence already, then create an entity decl for it
        entityDecl = new DTDEntityDecl(bbName.getRawBuffer());
        //
        //  Set the declaration location. The parameter indicates whether its
        //  declared in the content/internal subset, so we know whether or not
        //  its in the external subset.
        //
        entityDecl->setDeclaredInIntSubset(fInternalSubset);
        // Add it to the appropriate entity decl pool
        if (isPEDecl)
            fPEntityDeclPool->put(entityDecl);
         else
            fEntityDeclPool->put(entityDecl);
    }
    // Set a flag that indicates whether we are ignoring this one
    const bool isIgnored = (entityDecl == fDumEntityDecl);
    // Set the PE flag on it
    entityDecl->setIsParameter(isPEDecl);
    //
    //  Space is legal (required actually) here so check for a PE ref. If
    //  we don't get our whitespace, then issue an error, but try to keep
    //  going.
    //
    if (!checkForPERef(true, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // save the hasNoDTD status for Entity Constraint Checking
    bool hasNoDTD = fScanner->getHasNoDTD();
    if (hasNoDTD && isPEDecl)
        fScanner->setHasNoDTD(false);
    // According to the type call the value scanning method
    if (!scanEntityDef(*entityDecl, isPEDecl))
    {
        fReaderMgr->skipPastChar(chCloseAngle);
        fScanner->setHasNoDTD(true);
        fScanner->emitError(XMLErrs::ExpectedEntityValue);
        return;
    }
    if (hasNoDTD)
        fScanner->setHasNoDTD(true);
    // Space is legal (but not required) here so check for a PE ref
    checkForPERef(false, false, true);
    // And then we have to have the closing angle bracket
    if (!fReaderMgr->skippedChar(chCloseAngle))
    {
        fScanner->emitError(XMLErrs::UnterminatedEntityDecl, entityDecl->getName());
        fReaderMgr->skipPastChar(chCloseAngle);
    }
    //
    //  If we have a doc type handler, then call it. But only call it for
    //  ignored elements if advanced callbacks are enabled.
    //
    if (fDocTypeHandler)
        fDocTypeHandler->entityDecl(*entityDecl, isPEDecl, isIgnored);
}
//
//  This method will scan a general/character entity ref. It will either
//  expand a char ref and return the value directly, or it will expand
//  a general entity and a reader for it onto the reader stack.
//
//  The return value indicates whether the value was returned directly or
//  pushed as a reader or it failed.
//
//  The escaped flag tells the caller whether the returnd parameter resulted
//  from a character reference, which escapes the character in some cases. It
//  only makes any difference if the return indicates the value was returned
//  directly.
//
//  NOTE: This is only called when scanning attribute values, so we always
//  expand general entities.
//
DTDScanner::EntityExpRes
DTDScanner::scanEntityRef(XMLCh& firstCh, XMLCh& secondCh, bool& escaped)
{
    // Assume no escape and no second char
    escaped = false;
    secondCh = 0;
    // We have to insure its all done in a single entity
    const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
    //
    //  If the next char is a pound, then its a character reference and we
    //  need to expand it always.
    //
    if (fReaderMgr->skippedChar(chPound))
    {
        //
        //  Its a character reference, so scan it and get back the numeric
        //  value it represents. If it fails, just return immediately.
        //
        if (!scanCharRef(firstCh, secondCh))
            return EntityExp_Failed;
        if (curReader != fReaderMgr->getCurrentReaderNum())
            fScanner->emitError(XMLErrs::PartialMarkupInEntity);
        // Its now escaped since it was a char ref
        escaped = true;
        return EntityExp_Returned;
    }
    // Get the name of the general entity
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedEntityRefName);
        return EntityExp_Failed;
    }
    //
    //  Next char must be a semi-colon. But if its not, just emit
    //  an error and try to continue.
    //
    if (!fReaderMgr->skippedChar(chSemiColon))
        fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
    // Make sure it was all in one entity reader
    if (curReader != fReaderMgr->getCurrentReaderNum())
        fScanner->emitError(XMLErrs::PartialMarkupInEntity);
    // Look it up the name the general entity pool
    XMLEntityDecl* decl = fEntityDeclPool->getByKey(bbName.getRawBuffer());
    // If it does not exist, then obviously an error
    if (!decl)
    {
        // XML 1.0 Section 4.1
        if (fScanner->getStandalone() || fScanner->getHasNoDTD()) {
            fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
        }
        else {
            if (fScanner->getDoValidation())
                fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
        }
        return EntityExp_Failed;
    }
    //
    //  If we are a standalone document, then it has to have been declared
    //  in the internal subset. Keep going though.
    //
    if (fScanner->getDoValidation() && fScanner->getStandalone() && !decl->getDeclaredInIntSubset())
        fScanner->getValidator()->emitError(XMLValid::IllegalRefInStandalone, bbName.getRawBuffer());
    //
    //  If its a special char reference, then its escaped and we can return
    //  it directly.
    //
    if (decl->getIsSpecialChar())
    {
        firstCh = decl->getValue()[0];
        escaped = true;
        return EntityExp_Returned;
    }
    if (decl->isExternal())
    {
        // If its unparsed, then its not valid here
        // XML 1.0 Section 4.4.4 the appearance of a reference to an unparsed entity is forbidden.
        if (decl->isUnparsed())
        {
            fScanner->emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
            return EntityExp_Failed;
        }
        // We are in an attribute value, so not valid.
        // XML 1.0 Section 4.4.4 a reference to an external entity in an attribute value is forbidden.
        fScanner->emitError(XMLErrs::NoExtRefsInAttValue);
        // And now create a reader to read this entity
        InputSource* srcUsed;
        XMLReader* reader = fReaderMgr->createReader
        (
            decl->getSystemId()
            , decl->getPublicId()
            , false
            , XMLReader::RefFrom_NonLiteral
            , XMLReader::Type_General
            , XMLReader::Source_External
            , srcUsed
        );
        // Put a janitor on the source so it gets cleaned up on exit
        Janitor<InputSource> janSrc(srcUsed);
        //
        //  If the creation failed then throw an exception
        //
        if (!reader)
            ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId());
        //
        //  Push the reader. If its a recursive expansion, then emit an error
        //  and return an failure.
        //
        if (!fReaderMgr->pushReader(reader, decl))
        {
            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
            return EntityExp_Failed;
        }
        // If it starts with the XML string, then parse a text decl
        if (fScanner->checkXMLDecl(true))
            scanTextDecl();
    }
     else
    {
        //
        //  Create a reader over a memory stream over the entity value
        //  We force it to assume UTF-16 by passing in an encoding
        //  string. This way it won't both trying to predecode the
        //  first line, looking for an XML/TextDecl.
        //
        XMLReader* valueReader = fReaderMgr->createIntEntReader
        (
            decl->getName()
            , XMLReader::RefFrom_NonLiteral
            , XMLReader::Type_General
            , decl->getValue()
            , decl->getValueLen()
            , false
        );
        //
        //  Trt to push the entity reader onto the reader manager stack,
        //  where it will become the subsequent input. If it fails, that
        //  means the entity is recursive, so issue an error. The reader
        //  will have just been discarded, but we just keep going.
        //
        if (!fReaderMgr->pushReader(valueReader, decl))
            fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
    }
    return EntityExp_Pushed;
}
//
//  This method will scan a quoted literal of an entity value. It has to
//  deal with replacement of PE references; however, since this is a DTD
//  scanner, all such entity literals are in entity decls and therefore
//  general entities are not expanded.
//
bool DTDScanner::scanEntityLiteral(XMLBuffer& toFill, const bool isPE)
{
    toFill.reset();
    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr->skipIfQuote(quoteCh))
        return false;
    // Get a buffer for pulling in entity names when we see GE refs
    XMLBufBid bbName(fBufMgr);
    XMLBuffer& nameBuf = bbName.getBuffer();
    // Remember the current reader
    const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
    //
    //  Loop until we see the ending quote character, handling any references
    //  in the process.
    //
    XMLCh   nextCh;
    XMLCh   secondCh = 0;
    bool    gotLeadingSurrogate = false;
    while (true)
    {
        // Get the second char if we have one, else get another
        if (secondCh)
        {
            nextCh = secondCh;
            secondCh = 0;
        }
         else
        {
            nextCh = fReaderMgr->getNextChar();
        }
        //
        //  Watch specifically for EOF and issue a more meaningful error
        //  if that occurs (since an unterminated quoted char can cause
        //  this easily.)
        //
        if (!nextCh)
        {
            fScanner->emitError(XMLErrs::UnterminatedEntityLiteral);
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        }
        //
        //  Break out on our terminating quote char when we are back in the
        //  same reader. Otherwise, we might trigger on a nested quote char
        //  in an expanded entity.
        //
        if ((nextCh == quoteCh)
        &&  (fReaderMgr->getCurrentReaderNum() == orgReader))
        {
            break;
        }
        if (nextCh == chPercent)
        {
            //
            //  Put the PE's value on the reader stack and then jump back
            //  to the top to start processing it. The parameter indicates
            //  that it should not scan the reference's content as an external
            //  subset.
            //
            expandPERef(false, true, true);
            continue;
        }
        //
        //  Ok, now that all the other special stuff is checked, we can
        //  look for a general entity. In here, we cannot have a naked &
        //  and will only expand numerical char refs or the intrinsic char
        //  refs. Others will be left alone.
        //
        if (nextCh == chAmpersand)
        {
            //
            //  Here, we only expand numeric char refs, but not any general
            //  entities. However, the stupid XML spec requires that we check
            //  and make sure it does refer to a general entity if its not
            //  a char ref (i.e. no naked '&' chars.)
            //
            if (fReaderMgr->skippedChar(chPound))
            {
                // If it failed, then just jump back to the top and try to pick up
                if (!scanCharRef(nextCh, secondCh))
                {
                    gotLeadingSurrogate = false;
                    continue;
                }
            }
             else
            {
                if (!fReaderMgr->getName(nameBuf))
                {
                    fScanner->emitError(XMLErrs::ExpectedEntityRefName);
                }
                 else
                {
                    //
                    //  Since we are not expanding any of this, we have to
                    //  put the amp and name into the target buffer as data.
                    //
                    toFill.append(chAmpersand);
                    toFill.append(nameBuf.getRawBuffer());
                    // Make sure we skipped a trailing semicolon
                    if (!fReaderMgr->skippedChar(chSemiColon))
                    {
                        fScanner->emitError
                        (
                            XMLErrs::UnterminatedEntityRef
                            , nameBuf.getRawBuffer()
                        );
                    }
                    // And make the new character the semicolon
                    nextCh = chSemiColon;
                }
                // Either way here we reset the surrogate flag
                gotLeadingSurrogate = false;
            }
        }
        if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
        {
            if (gotLeadingSurrogate)
                fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
            else
                gotLeadingSurrogate = true;
        }
         else
        {
            if (gotLeadingSurrogate)
            {
                if ((nextCh < 0xDC00) && (nextCh > 0xDFFF))
                    fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
            }
             else if (!XMLReader::isXMLChar(nextCh))
            {
                XMLCh tmpBuf[9];
                XMLString::binToText
                (
                    nextCh
                    , tmpBuf
                    , 8
                    , 16
                );
                fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                fReaderMgr->skipPastChar(quoteCh);
                return false;
            }
            gotLeadingSurrogate = false;
        }
        // Looks ok, so add it to the literal
        toFill.append(nextCh);
    }
    //
    //  If we got here and did not get back to the original reader level,
    //  then we propogated some entity out of the literal, so issue an
    //  error, but don't fail.
    //
    if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
        fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
    return true;
}
//
//  This method is called after the entity name has been scanned, and any
//  PE referenced following the name is handled. The passed decl will be
//  filled in with the info scanned.
//
bool DTDScanner::scanEntityDef(DTDEntityDecl& decl, const bool isPEDecl)
{
    // Its got to be an entity literal
    if (fReaderMgr->lookingAtChar(chSingleQuote)
    ||  fReaderMgr->lookingAtChar(chDoubleQuote))
    {
        // Get a buffer for the literal
        XMLBufBid bbValue(fBufMgr);
        if (!scanEntityLiteral(bbValue.getBuffer(), isPEDecl))
            return false;
        // Set it on the entity decl
        decl.setValue(bbValue.getRawBuffer());
        return true;
    }
    //
    //  Its got to be an external entity, so there must be an external id.
    //  Get buffers for them and scan an external id into them.
    //
    XMLBufBid bbPubId(fBufMgr);
    XMLBufBid bbSysId(fBufMgr);
    if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_External))
        return false;
    // Fill in the id fields of the decl with the info we got
    decl.setPublicId(bbPubId.getRawBuffer());
    decl.setSystemId(bbSysId.getRawBuffer());
    // If its a PE decl, we are done
    bool gotSpaces = checkForPERef(false, false, true);
    if (isPEDecl)
    {
        //
        //  Check for a common error here. NDATA is not allowed for PEs
        //  so check for the NDATA string. If found give a nice meaningful
        //  error and continue parsing to eat the NDATA text.
        //
        if (gotSpaces)
        {
            if (fReaderMgr->skippedString(XMLUni::fgNDATAString))
                fScanner->emitError(XMLErrs::NDATANotValidForPE);
        }
         else
        {
            return true;
        }
    }
    // If looking at close angle now, we are done
    if (fReaderMgr->lookingAtChar(chCloseAngle))
        return true;
    // Else we had to have seem the whitespace
    if (!gotSpaces)
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // We now have to see a notation data string
    if (!fReaderMgr->skippedString(XMLUni::fgNDATAString))
        fScanner->emitError(XMLErrs::ExpectedNDATA);
    // Space is required here, but try to go on if not
    if (!checkForPERef(false, false, true))
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
    // Get a name
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedNotationName);
        return false;
    }
    // Set the decl's notation name
    decl.setNotationName(bbName.getRawBuffer());
    return true;
}
//
//  This method is called after an attribute decl name or a notation decl has
//  been scanned and then an opening parenthesis was see, indicating the list
//  of values. It scans the enumeration values and creates a single string
//  which has a single space between each value.
//
//  The terminating close paren ends this scan.
//
bool DTDScanner::scanEnumeration( const   DTDAttDef&  attDef
                                    ,       XMLBuffer&  toFill
                                    , const bool        notation)
{
    // Reset the passed buffer
    toFill.reset();
    // Check for PE ref but don't require space
    checkForPERef(false, false, true);
    // If this is a notation, we need an opening paren
    if (notation)
    {
        if (!fReaderMgr->skippedChar(chOpenParen))
            fScanner->emitError(XMLErrs::ExpectedOpenParen);
    }
    // We need a local buffer to use as well
    XMLBufBid bbTmp(fBufMgr);
    while (true)
    {
        // Space is allowed here for either type so check for PE ref
        checkForPERef(false, false, true);
        // And then get either a name or a name token
        bool success;
        if (notation)
            success = fReaderMgr->getName(bbTmp.getBuffer());
        else
            success = fReaderMgr->getNameToken(bbTmp.getBuffer());
        if (!success)
        {
            fScanner->emitError
            (
                XMLErrs::ExpectedEnumValue
                , attDef.getFullName()
            );
            return false;
        }
        // Append this value to the target value
        toFill.append(bbTmp.getRawBuffer(), bbTmp.getLen());
        // Space is allowed here for either type so check for PE ref
        checkForPERef(false, false, true);
        // Check for the terminating paren
        if (fReaderMgr->skippedChar(chCloseParen))
            break;
        // And append a space separator
        toFill.append(chSpace);
        // Check for the pipe character separator
        if (!fReaderMgr->skippedChar(chPipe))
        {
            fScanner->emitError(XMLErrs::ExpectedEnumSepOrParen);
            return false;
        }
    }
    return true;
}
bool DTDScanner::scanEq()
{
    fReaderMgr->skipPastSpaces();
    if (fReaderMgr->skippedChar(chEqual))
    {
        fReaderMgr->skipPastSpaces();
        return true;
    }
    return false;
}
//
//  This method is called when an external entity reference is seen in the
//  DTD or an external DTD subset is encountered, and their contents pushed
//  onto the reader stack. This method will scan that contents.
//
void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect)
{
    bool bAcceptDecl = !inIncludeSect;
    // Get a buffer for whitespace
    XMLBufBid bbSpace(fBufMgr);
    //
    //  If we have a doc type handler and we are not being called recursively
    //  to handle an include section, tell it the ext subset starts
    //
    if (fDocTypeHandler && !inIncludeSect)
        fDocTypeHandler->startExtSubset();
    //
    //  We have to play a trick here if the current entity we are parsing
    //  is a PE. Because the spooling code will put out a whitespace before
    //  and after an expanded PE if its being scanned outside the context of
    //  a literal entity, this will confuse this external subset code.
    //
    //  So, we see if that is what is happening and, if so, eat the single
    //  space, a check for the <?xml string. If we find it, we parse that
    //  markup right now and put the space back.
    //
    if (fReaderMgr->isScanningPERefOutOfLiteral())
    {
        if (fReaderMgr->skippedSpace())
        {
            if (fScanner->checkXMLDecl(true))
            {
                scanTextDecl();
                bAcceptDecl = false;
                // <TBD> Figure out how to do this
                // fReaderMgr->unGet(chSpace);
            }
        }
    }
    // Get the current reader number
    const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
    //
    //  Loop until we hit the end of the external subset entity. Note that
    //  we use a double loop here in order to avoid the overhead of doing
    //  the exception setup/teardown work on every loop.
    //
    bool inMarkup = false;
    bool inCharData = false;
    while (true)
    {
    try
    {
        while (true)
        {
            const XMLCh nextCh = fReaderMgr->peekNextChar();
            if (nextCh == chOpenAngle)
            {
                // Get the reader we started this on
                const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
                //
                //  Now scan the markup. Set the flag so that we will know that
                //  we were in markup if an end of entity exception occurs.
                //
                fReaderMgr->getNextChar();
                inMarkup = true;
                scanMarkupDecl(bAcceptDecl);
                inMarkup = false;
                //
                //  And see if we got back to the same level. If not, then its
                //  a partial markup error.
                //
                if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
                    fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
            }
             else if (XMLReader::isWhitespace(nextCh))
            {
                //
                //  If we have a doc type handler, and advanced callbacks are
                //  enabled, then gather up whitespace and call back. Otherwise
                //  just skip whitespaces.
                //
                if (fDocTypeHandler)
                {
                    inCharData = true;
                    fReaderMgr->getSpaces(bbSpace.getBuffer());
                    inCharData = false;
                    fDocTypeHandler->doctypeWhitespace
                    (
                        bbSpace.getRawBuffer()
                        , bbSpace.getLen()
                    );
                }
                 else
                {
                    //
                    //  If we hit an end of entity in the middle of white
                    //  space, that's fine. We'll just come back in here
                    //  again on the next round and skip some more.
                    //
                    fReaderMgr->skipPastSpaces();
                }
            }
             else if (nextCh == chPercent)
            {
                //
                //  Expand (and scan if external) the reference value. Tell
                //  it to throw an end of entity exception at the end of the
                //  entity.
                //
                fReaderMgr->getNextChar();
                expandPERef(true, false, false, true);
            }
             else if (inIncludeSect && (nextCh == chCloseSquare))
            {
                //
                //  Its the end of a conditional include section. So scan it and
                //  decrement the include depth counter.
                //
                fReaderMgr->getNextChar();
                if (!fReaderMgr->skippedChar(chCloseSquare))
                {
                    fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                    fReaderMgr->skipPastChar(chCloseAngle);
                }
                 else if (!fReaderMgr->skippedChar(chCloseAngle))
                {
                    fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                    fReaderMgr->skipPastChar(chCloseAngle);
                }
                return;
            }
             else
            {
                fReaderMgr->getNextChar();
                if (!XMLReader::isXMLChar(nextCh))
                {
                    XMLCh tmpBuf[9];
                    XMLString::binToText
                    (
                        nextCh
                        , tmpBuf
                        , 8
                        , 16
                    );
                    fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                }
                 else
                {
                    fScanner->emitError(XMLErrs::InvalidDocumentStructure);
                }
                // Try to get realigned
                static const XMLCh toSkip[] =
                {
                    chPercent, chCloseSquare, chOpenAngle, chNull
                };
                fReaderMgr->skipUntilInOrWS(toSkip);
            }
            bAcceptDecl = false;
        }
    }
    catch(const EndOfEntityException& toCatch)
    {
        //
        //  If the external entity ended while we were in markup, then that's
        //  a partial markup error.
        //
        if (inMarkup)
        {
            fScanner->emitError(XMLErrs::PartialMarkupInEntity);
            inMarkup = false;
        }
        // If we were in char data, then send what we got
        if (inCharData)
        {
            // Send what we got, then rethrow
            if (fDocTypeHandler)
            {
                fDocTypeHandler->doctypeWhitespace
                (
                    bbSpace.getRawBuffer()
                    , bbSpace.getLen()
                );
            }
            inCharData = false;
        }
        //
        //  If the entity that just ended was the entity that we started
        //  on, then this is the end of the external subset.
        //
        if (orgReader == toCatch.getReaderNum())
            break;
    }
    }
    // If we have a doc type handler, tell it the ext subset ends
    if (fDocTypeHandler)
        fDocTypeHandler->endExtSubset();
}
//
//  This method will scan for an id, either public or external.
//
//
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
//                     | 'PUBLIC' S PubidLiteral S SystemLiteral
// [83] PublicID ::= 'PUBLIC' S PubidLiteral
//
bool DTDScanner::scanId(          XMLBuffer&  pubIdToFill
                            ,       XMLBuffer&  sysIdToFill
                            , const IDTypes     whatKind)
{
    // Clean out both return buffers
    pubIdToFill.reset();
    sysIdToFill.reset();
    //
    //  Check first for the system id first. If we find it, and system id
    //  is one of the legal values, then lets try to scan it.
    //
    // 'SYSTEM' S SystemLiteral
    if (fReaderMgr->skippedString(XMLUni::fgSysIDString))
    {
        // If they were looking for a public id, then we failed
        if (whatKind == IDType_Public)
        {
            fScanner->emitError(XMLErrs::ExpectedPublicId);
            return false;
        }
        // We must skip spaces
        if (!fReaderMgr->skipPastSpaces())
        {
            fScanner->emitError(XMLErrs::ExpectedWhitespace);
            return false;
        }
        // Get the system literal value
        return scanSystemLiteral(sysIdToFill);
    }
    // Now scan for public id
    // 'PUBLIC' S PubidLiteral S SystemLiteral
    //  or
    // 'PUBLIC' S PubidLiteral
    // If we don't have any public id string => Error
    if (!fReaderMgr->skippedString(XMLUni::fgPubIDString)) {
        fScanner->emitError(XMLErrs::ExpectedSystemOrPublicId);
        return false;
    }
    //
    //  So following this we must have whitespace, a public literal, whitespace,
    //  and a system literal.
    //
    if (!fReaderMgr->skipPastSpaces())
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        //
        //  Just in case, if they just forgot the whitespace but the next char
        //  is a single or double quote, then keep going.
        //
        const XMLCh chPeek = fReaderMgr->peekNextChar();
        if ((chPeek != chDoubleQuote) && (chPeek != chSingleQuote))
            return false;
    }
    if (!scanPublicLiteral(pubIdToFill))
        return false;
    // If they wanted a public id, then this is all
    if (whatKind == IDType_Public)
        return true;
    // check if there is any space follows
    bool hasSpace = fReaderMgr->skipPastSpaces();
    //
    //  In order to recover best here we need to see if
    //  the next thing is a quote or not
    //
    const XMLCh chPeek = fReaderMgr->peekNextChar();
    const bool bIsQuote =  ((chPeek == chDoubleQuote)
                         || (chPeek == chSingleQuote));
    if (!hasSpace)
    {
        if (whatKind == IDType_External)
        {
            //
            //  If its an external Id, then we need to see the system id.
            //  So, emit the error. But, if the next char is a quote, don't
            //  give up since its probably going to work. The user just
            //  missed the separating space. Otherwise, fail.
            //
            fScanner->emitError(XMLErrs::ExpectedWhitespace);
            if (!bIsQuote)
                return false;
        }
         else
        {
            //
            //  We can legally return here. But, if the next char is a quote,
            //  then that's probably not what was desired, since its probably
            //  just that space was forgotten and there really is a system
            //  id to follow.
            //
            //  So treat it like missing whitespace if so and keep going.
            //  Else, just return success.
            //
            if (bIsQuote)
                fScanner->emitError(XMLErrs::ExpectedWhitespace);
             else
                return true;
        }
    }
    if (bIsQuote) {
        // there is a quote coming, scan the system literal
        if (!scanSystemLiteral(sysIdToFill))
            return false;
    }
    else {
        // no quote, if expecting exteral id, this is an error
        if (whatKind == IDType_External)
            fScanner->emitError(XMLErrs::ExpectedQuotedString);
    }
    return true;
}
//
//  This method will scan the contents of an ignored section. It assumes that
//  we already are in the body, i.e. we've seen <![IGNORE[ at this point. So
//  we have to just scan until we see a matching ]]> closing markup.
//
void DTDScanner::scanIgnoredSection()
{
    //
    //  Depth starts at one because we are already in one section and want
    //  to parse until we hit its end.
    //
    unsigned long depth = 1;
    while (true)
    {
        const XMLCh nextCh = fReaderMgr->getNextChar();
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        if (nextCh == chOpenAngle)
        {
            if (fReaderMgr->skippedChar(chBang)
            &&  fReaderMgr->skippedChar(chOpenSquare))
            {
                depth++;
            }
        }
         else if (nextCh == chCloseSquare)
        {
            if (fReaderMgr->skippedChar(chCloseSquare))
            {
                while (fReaderMgr->skippedChar(chCloseSquare))
                {
                    // Do nothing, just skip them
                }
                if (fReaderMgr->skippedChar(chCloseAngle))
                {
                    depth--;
                    if (!depth)
                        break;
                }
            }
        }
         else if (!XMLReader::isXMLChar(nextCh))
        {
            XMLCh tmpBuf[9];
            XMLString::binToText
            (
                nextCh
                , tmpBuf
                , 8
                , 16
            );
            fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
        }
    }
}
//
//  This method scans the entire internal subset. All we can have here is
//  decl markup, and PE references. The expanded PE references must contain
//  whole markup, so we don't have to worry about their content at this
//  level. We just scan them, expand them, push them, and parse their content
//  right there, via the expandERef() method.
//
bool DTDScanner::scanInternalSubset()
{
    // If we have a doc type handler, tell it the internal subset starts
    if (fDocTypeHandler)
        fDocTypeHandler->startIntSubset();
    // Get a buffer for whitespace
    XMLBufBid bbSpace(fBufMgr);
    bool noErrors = true;
    while (true)
    {
        const XMLCh nextCh = fReaderMgr->peekNextChar();
        //
        //  If we get an end of file marker, just unget it and return a
        //  failure status. The caller will then see the end of file and
        //  faill out correctly.
        //
        if (!nextCh)
            return false;
        // Watch for the end of internal subset marker
        if (nextCh == chCloseSquare)
        {
            fReaderMgr->getNextChar();
            break;
        }
        if (nextCh == chPercent)
        {
            //
            //  Expand (and scan if external) the reference value. Tell
            //  it to set the reader to cause an end of entity exception
            //  when this reader dies, which is what the scanExtSubset
            //  method wants (who is called to scan this.)
            //
            fReaderMgr->getNextChar();
            expandPERef(true, false, false, true);
        }
         else if (nextCh == chOpenAngle)
        {
            // Remember this reader before we start the scan
            const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
            // And scan this markup
            fReaderMgr->getNextChar();
            scanMarkupDecl(false);
            // If we did not get back to entry level, then partial markup
            if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
                fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
        }
         else if (XMLReader::isWhitespace(nextCh))
        {
            //
            //  IF we are doing advanced callbacks and have a doc type
            //  handler, then get the whitespace and call the doc type
            //  handler with it. Otherwise, just skip whitespace.
            //
            if (fDocTypeHandler)
            {
                fReaderMgr->getSpaces(bbSpace.getBuffer());
                fDocTypeHandler->doctypeWhitespace
                (
                    bbSpace.getRawBuffer()
                    , bbSpace.getLen()
                );
            }
             else
            {
                fReaderMgr->skipPastSpaces();
            }
        }
         else
        {
            // Not valid, so emit an error
            XMLCh tmpBuf[9];
            XMLString::binToText
            (
                fReaderMgr->getNextChar()
                , tmpBuf
                , 8
                , 16
            );
            fScanner->emitError
            (
                XMLErrs::InvalidCharacterInIntSubset
                , tmpBuf
            );
            //
            //  If an '>', then probably an abnormally terminated
            //  internal subset so just return.
            //
            if (nextCh == chCloseAngle)
            {
                noErrors = false;
                break;
            }
            //
            //  Otherwise, try to sync back up by scanning forward for
            //  a reasonable start character.
            //
            static const XMLCh toSkip[] =
            {
                chPercent, chCloseSquare, chOpenAngle, chNull
            };
            fReaderMgr->skipUntilInOrWS(toSkip);
        }
    }
    // If we have a doc type handler, tell it the internal subset ends
    if (fDocTypeHandler)
        fDocTypeHandler->endIntSubset();
    return noErrors;
}
//
//  This method is called once we see a < in the input of an int/ext subset,
//  which indicates the start of some sort of markup.
//
void DTDScanner::scanMarkupDecl(const bool parseTextDecl)
{
    //
    //  We only have two valid first characters here. One is a ! which opens
    //  some markup decl. The other is a ?, which could begin either a PI
    //  or a text decl. If parseTextDecl is false, we cannot accept a text
    //  decl.
    //
    const XMLCh nextCh = fReaderMgr->getNextChar();
    if (nextCh == chBang)
    {
        if (fReaderMgr->skippedChar(chDash))
        {
            if (fReaderMgr->skippedChar(chDash))
            {
                scanComment();
            }
             else
            {
                fScanner->emitError(XMLErrs::CommentsMustStartWith);
                fReaderMgr->skipPastChar(chCloseAngle);
            }
        }
         else if (fReaderMgr->skippedChar(chOpenSquare))
        {
            //
            //  Its a conditional section. This is only valid in the external
            //  subset, so issue an error if we aren't there.
            //
            if (fInternalSubset)
            {
                fScanner->emitError(XMLErrs::ConditionalSectInIntSubset);
                fReaderMgr->skipPastChar(chCloseAngle);
                return;
            }
            // A PE ref can happen here, but space is not required
            checkForPERef(false, false, true);
            if (fReaderMgr->skippedString(XMLUni::fgIncludeString))
            {
                checkForPERef(false, false, true);
                // Check for the following open square bracket
                if (!fReaderMgr->skippedChar(chOpenSquare))
                    fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);
                checkForPERef(false, false, true);
                //
                //  Recurse back to the ext subset call again, telling it its
                //  in an include section.
                //
                scanExtSubsetDecl(true);
            }
             else if (fReaderMgr->skippedString(XMLUni::fgIgnoreString))
            {
                checkForPERef(false, false, true);
                // Check for the following open square bracket
                if (!fReaderMgr->skippedChar(chOpenSquare))
                    fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);
                // And scan over the ignored part
                scanIgnoredSection();
            }
             else
            {
                fScanner->emitError(XMLErrs::ExpectedIncOrIgn);
                fReaderMgr->skipPastChar(chCloseAngle);
            }
        }
         else if (fReaderMgr->skippedString(XMLUni::fgAttListString))
        {
            scanAttListDecl();
        }
         else if (fReaderMgr->skippedString(XMLUni::fgElemString))
        {
            scanElementDecl();
        }
         else if (fReaderMgr->skippedString(XMLUni::fgEntityString))
        {
            scanEntityDecl();
        }
         else if (fReaderMgr->skippedString(XMLUni::fgNotationString))
        {
            scanNotationDecl();
        }
         else
        {
            fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
            fReaderMgr->skipPastChar(chCloseAngle);
        }
    }
     else if (nextCh == chQuestion)
    {
        // It could be a PI or the XML declaration. Check for Decl
        if (fScanner->checkXMLDecl(false))
        {
            // If we are not accepting text decls, its an error
            if (parseTextDecl)
            {
                scanTextDecl();
            }
             else
            {
                // Emit the error and skip past this markup
                fScanner->emitError(XMLErrs::TextDeclNotLegalHere);
                fReaderMgr->skipPastChar(chCloseAngle);
            }
        }
         else
        {
            // It has to be a PI
            scanPI();
        }
    }
     else
    {
        // Can't be valid so emit error and try to skip past end of this decl
        fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
        fReaderMgr->skipPastChar(chCloseAngle);
    }
}
//
//  This method is called for a mixed model element's content mode. We've
//  already scanned past the '(PCDATA' part by the time we get here. So
//  everything else is element names separated by | characters until we
//  hit the end. The passed element decl's content model is filled in with
//  the information found.
//
bool DTDScanner::scanMixed(DTDElementDecl& toFill)
{
    //
    //  The terminating star is only required if there is something more
    //  than (PCDATA).
    //
    bool starRequired = false;
    // Get a buffer to be used below to get element names
    XMLBufBid bbName(fBufMgr);
    XMLBuffer& nameBuf = bbName.getBuffer();
    //
    //  Create an initial content spec node. Its just a leaf node with a
    //  PCDATA element id. This current node pointer will be pushed down the
    //  tree as we go.
    //
    ContentSpecNode* curNode =
                 new ContentSpecNode(new QName(XMLUni::fgZeroLenString,
                                               XMLUni::fgZeroLenString,
                                               XMLElementDecl::fgPCDataElemId),
                                     false);
    //
    //  Set the initial leaf as the temporary head. If we hit the first choice
    //  node, it will be set up here. When done, this is the node that's set
    //  as the content spec for the element.
    //
    ContentSpecNode* headNode = curNode;
    // Remember the original node so we can sense the first choice node
    ContentSpecNode* orgNode = curNode;
    //
    //  We just loop around, getting the | character at the top and then
    //  looking for the next element name. We keep up with the last node
    //  and add each new one to its right node.
    //
    while (true)
    {
        //
        //  First of all we check for some grunt work details of skipping
        //  whitespace, expand PE refs, and catching invalid reps.
        //
        if (fReaderMgr->lookingAtChar(chPercent))
        {
            // Expand it and continue
            checkForPERef(false, false, true);
        }
         else if (fReaderMgr->skippedChar(chAsterisk))
        {
            //
            //  Tell them they can't have reps in mixed model, but eat
            //  it and keep going if we are allowed to.
            //
            fScanner->emitError(XMLErrs::NoRepInMixed);
        }
         else if (fReaderMgr->skippedSpace())
        {
            // Spaces are ok at this point, just eat them and continue
            fReaderMgr->skipPastSpaces();
        }
         else
        {
            if (!fReaderMgr->skippedChar(chPipe))
            {
                // Has to be the closing paren now.
                if (!fReaderMgr->skippedChar(chCloseParen))
                {
                    fScanner->emitError(XMLErrs::UnterminatedContentModel);
                    delete headNode;
                    return false;
                }
                if (!fReaderMgr->skippedChar(chAsterisk) && starRequired)
                    fScanner->emitError(XMLErrs::ExpectedAsterisk);
                //
                //  Create a zero or more node and make the original head
                //  node its first child.
                //
                headNode = new ContentSpecNode
                (
                    ContentSpecNode::ZeroOrMore
                    , headNode
                    , 0
                );
                // Store the head node as the content spec of the element.
                toFill.setContentSpec(headNode);
                break;
            }
            // Its more than just a PCDATA, so an ending star will be required now
            starRequired = true;
            // Space is legal here so check for a PE ref, but don't require space
            checkForPERef(false, false, true);
            // Get a name token
            if (!fReaderMgr->getName(nameBuf))
            {
                fScanner->emitError(XMLErrs::ExpectedElementName);
                delete headNode;
                return false;
            }
            //
            //  Create a leaf node for it. If we can find the element id for
            //  this element, then use it. Else, we have to fault in an element
            //  decl, marked as created because of being in a content model.
            //
            XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, nameBuf.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
            if (!decl)
            {
                decl = new DTDElementDecl(nameBuf.getRawBuffer(), fEmptyNamespaceId);
                decl->setCreateReason(XMLElementDecl::InContentModel);
                decl->setExternalElemDeclaration(isReadingExternalEntity());
                fDTDGrammar->putElemDecl(decl);
            }
            //
            //  If the current node is the original node, this is the first choice
            //  node, so create an initial choice node with the current node and
            //  the new element id. Store this as the head node.
            //
            //  Otherwise, we have to steal the right node of the previous choice
            //  and weave in another choice node there, which has the old choice
            //  as its left and the new leaf as its right.
            //
            if (curNode == orgNode)
            {
                curNode = new ContentSpecNode
                (
                    ContentSpecNode::Choice
                    , curNode
                    , new ContentSpecNode(decl->getElementName())
                );
                // Remember the top node
                headNode = curNode;
            }
             else
            {
                ContentSpecNode* oldRight = curNode->orphanSecond();
                curNode->setSecond
                (
                    new ContentSpecNode
                    (
                        ContentSpecNode::Choice
                        , oldRight
                        , new ContentSpecNode(decl->getElementName())
                    )
                );
                // Make the new right node the current node
                curNode = curNode->getSecond();
            }
        }
    }
    return true;
}
//
//  This method is called when we see a '<!NOTATION' string while scanning
//  markup decl. It parses out the notation and its id and stores a new
//  notation decl object in the notation decl pool.
//
void DTDScanner::scanNotationDecl()
{
    // Space is required here so check for a PE ref, and require space
    if (!checkForPERef(true, false, true))
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    //
    //  And now we get a name, which is the name of the notation. Get a
    //  buffer for the name.
    //
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::ExpectedNotationName);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // If namespaces are enabled, then no colons allowed
    if (fScanner->getDoNamespaces())
    {
        if (XMLString::indexOf(bbName.getRawBuffer(), chColon) != -1)
            fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
    }
    // Space is required here so check for a PE ref, and require space
    if (!checkForPERef(true, false, true))
    {
        fScanner->emitError(XMLErrs::ExpectedWhitespace);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    //
    //  And scan an external or public id. We need buffers to use for both
    //  of these.
    //
    XMLBufBid bbPubId(fBufMgr);
    XMLBufBid bbSysId(fBufMgr);
    if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_Either))
    {
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // We can have an optional space or PE ref here
    checkForPERef(false, false, true);
    //
    //  See if it already exists. If so, add it to the notatino decl pool.
    //  Otherwise, if advanced callbacks are on, create a temp one and
    //  call out for that one.
    //
    XMLNotationDecl* decl = fDTDGrammar->getNotationDecl(bbName.getRawBuffer());
    bool isIgnoring = (decl != 0);
    if (isIgnoring)
    {
        fScanner->emitError(XMLErrs::NotationAlreadyExists, bbName.getRawBuffer());
    }
     else
    {
        // Fill in a new notation declaration and add it to the pool
        decl = new XMLNotationDecl
        (
            bbName.getRawBuffer()
            , bbPubId.getRawBuffer()
            , bbSysId.getRawBuffer()
        );
        fDTDGrammar->putNotationDecl(decl);
    }
    //
    //  If we have a document type handler, then tell it about this. If we
    //  are ignoring it, only call out if advanced callbacks are enabled.
    //
    if (fDocTypeHandler)
    {
        fDocTypeHandler->notationDecl
        (
            *decl
            , isIgnoring
        );
    }
    // And one more optional space or PE ref
    checkForPERef(false, false, true);
    // And skip the terminating bracket
    if (!fReaderMgr->skippedChar(chCloseAngle))
        fScanner->emitError(XMLErrs::UnterminatedNotationDecl);
}
//
//  Scans a PI and calls the appropriate callbacks. A PI can happen in either
//  the document or the DTD, so it calls the appropriate handler according
//  to the fInDocument flag.
//
//  At entry we have just scanned the <? part, and need to now start on the
//  PI target name.
//
void DTDScanner::scanPI()
{
    const XMLCh* namePtr = 0;
    const XMLCh* targetPtr = 0;
    //
    //  If there are any spaces here, then warn about it. If we aren't in
    //  'first error' mode, then we'll come back and can easily pick up
    //  again by just skipping them.
    //
    if (fReaderMgr->lookingAtSpace())
    {
        fScanner->emitError(XMLErrs::PINameExpected);
        fReaderMgr->skipPastSpaces();
    }
    // Get a buffer for the PI name and scan it in
    XMLBufBid bbName(fBufMgr);
    if (!fReaderMgr->getName(bbName.getBuffer()))
    {
        fScanner->emitError(XMLErrs::PINameExpected);
        fReaderMgr->skipPastChar(chCloseAngle);
        return;
    }
    // Point the name pointer at the raw data
    namePtr = bbName.getRawBuffer();
    // See if it issome form of 'xml' and emit a warning
    if (!XMLString::compareIString(namePtr, XMLUni::fgXMLString))
        fScanner->emitError(XMLErrs::NoPIStartsWithXML);
    // If namespaces are enabled, then no colons allowed
    if (fScanner->getDoNamespaces())
    {
        if (XMLString::indexOf(namePtr, chColon) != -1)
            fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
    }
    //
    //  If we don't hit a space next, then the PI has no target. If we do
    //  then get out the target. Get a buffer for it as well
    //
    XMLBufBid bbTarget(fBufMgr);
    if (fReaderMgr->skippedSpace())
    {
        // Skip any leading spaces
        fReaderMgr->skipPastSpaces();
        // It does have a target, so lets move on to deal with that.
        while (1)
        {
            const XMLCh nextCh = fReaderMgr->getNextChar();
            // Watch for an end of file, which is always bad here
            if (!nextCh)
            {
                fScanner->emitError(XMLErrs::UnterminatedPI);
                ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
            }
            // Watch for potential terminating character
            if (nextCh == chQuestion)
            {
                // It must be followed by '>' to be a termination of the target
                if (fReaderMgr->skippedChar(chCloseAngle))
                    break;
            }
            // Watch for invalid chars but try to keep going
            if (!XMLReader::isXMLChar(nextCh))
            {
                XMLCh tmpBuf[9];
                XMLString::binToText
                (
                    nextCh
                    , tmpBuf
                    , 8
                    , 16
                );
                fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
            }
            bbTarget.append(nextCh);
        }
    }
     else
    {
        // No target, but make sure its terminated ok
        if (!fReaderMgr->skippedChar(chQuestion))
        {
            fScanner->emitError(XMLErrs::UnterminatedPI);
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        if (!fReaderMgr->skippedChar(chCloseAngle))
        {
            fScanner->emitError(XMLErrs::UnterminatedPI);
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
    }
    // Point the target pointer at the raw data
    targetPtr = bbTarget.getRawBuffer();
    //
    //  If we have a handler, then call it.
    //
    if (fDocTypeHandler)
    {
        fDocTypeHandler->doctypePI
        (
            namePtr
            , targetPtr
        );
    }
}
//
//  This method scans a public literal. It must be quoted and all of its
//  characters must be valid public id characters. The quotes are discarded
//  and the results are returned.
//
bool DTDScanner::scanPublicLiteral(XMLBuffer& toFill)
{
    toFill.reset();
    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr->skipIfQuote(quoteCh)) {
        fScanner->emitError(XMLErrs::ExpectedQuotedString);
        return false;
    }
    while (true)
    {
        const XMLCh nextCh = fReaderMgr->getNextChar();
        // Watch for EOF
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        if (nextCh == quoteCh)
            break;
        //
        //  If its not a valid public id char, then report it but keep going
        //  since that's the best recovery scheme.
        //
        if (!XMLReader::isPublicIdChar(nextCh))
        {
            XMLCh tmpBuf[9];
            XMLString::binToText
            (
                nextCh
                , tmpBuf
                , 8
                , 16
            );
            fScanner->emitError(XMLErrs::InvalidPublicIdChar, tmpBuf);
        }
        toFill.append(nextCh);
    }
    return true;
}
//
//  This method handles scanning in a quoted system literal. It expects to
//  start on the open quote and returns after eating the ending quote. There
//  are not really any restrictions on the contents of system literals.
//
bool DTDScanner::scanSystemLiteral(XMLBuffer& toFill)
{
    toFill.reset();
    // Get the next char which must be a single or double quote
    XMLCh quoteCh;
    if (!fReaderMgr->skipIfQuote(quoteCh)) {
        fScanner->emitError(XMLErrs::ExpectedQuotedString);
        return false;
    }
    while (true)
    {
        const XMLCh nextCh = fReaderMgr->getNextChar();
        // Watch for EOF
        if (!nextCh)
            ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
        // Break out on terminating quote
        if (nextCh == quoteCh)
            break;
        toFill.append(nextCh);
    }
    return true;
}
//
//  This method is called to scan a text decl line, which can be the first
//  line in an external entity or external subset.
//
//  On entry the <? has been scanned, and next should be 'xml' followed by
//  some whitespace, version string, etc...
//    [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'
//
void DTDScanner::scanTextDecl()
{
    // Skip any subsequent whitespace before the version string
    fReaderMgr->skipPastSpaces();
    // Next should be the version string
    XMLBufBid bbVersion(fBufMgr);
    if (fReaderMgr->skippedString(XMLUni::fgVersionString))
    {
        if (!scanEq())
        {
            fScanner->emitError(XMLErrs::ExpectedEqSign);
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        //
        //  Followed by a single or double quoted version. Get a buffer for
        //  the string.
        //
        if (!getQuotedString(bbVersion.getBuffer()))
        {
            fScanner->emitError(XMLErrs::BadXMLVersion);
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        // If its not our supported version, issue an error but continue
        if (XMLString::compareString(bbVersion.getRawBuffer(), XMLUni::fgSupportedVersion))
            fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer());
    }
    // Ok, now we must have an encoding string
    XMLBufBid bbEncoding(fBufMgr);
    fReaderMgr->skipPastSpaces();
    bool gotEncoding = false;
    if (fReaderMgr->skippedString(XMLUni::fgEncodingString))
    {
        // There must be a equal sign next
        if (!scanEq())
        {
            fScanner->emitError(XMLErrs::ExpectedEqSign);
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        // Followed by a single or double quoted version string
        getQuotedString(bbEncoding.getBuffer());
        if (bbEncoding.isEmpty())
        {
            fScanner->emitError(XMLErrs::BadXMLEncoding, bbEncoding.getRawBuffer());
            fReaderMgr->skipPastChar(chCloseAngle);
            return;
        }
        // Indicate that we got an encoding
        gotEncoding = true;
    }
    //
    // Encoding declarations are required in the external entity
    // if there is a text declaration present
    //
    if (!gotEncoding)
    {
      fScanner->emitError(XMLErrs::EncodingRequired);
      fReaderMgr->skipPastChar(chCloseAngle);
      return;
    }
    fReaderMgr->skipPastSpaces();
    if (!fReaderMgr->skippedChar(chQuestion))
    {
        fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
        fReaderMgr->skipPastChar(chCloseAngle);
    }
     else if (!fReaderMgr->skippedChar(chCloseAngle))
    {
        fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
        fReaderMgr->skipPastChar(chCloseAngle);
    }
    //
    //  If we have a document type handler and advanced callbacks are on,
    //  then call the TextDecl callback
    //
    if (fDocTypeHandler)
    {
        fDocTypeHandler->TextDecl
        (
            bbVersion.getRawBuffer()
            , bbEncoding.getRawBuffer()
        );
    }
    //
    //  If we got an encoding string, then we have to call back on the reader
    //  to tell it what the encoding is.
    //
    if (!bbEncoding.isEmpty())
    {
        if (!fReaderMgr->getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
            fScanner->emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
    }
}