SGXMLScanner.cpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:154k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2002, 2003 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Id: SGXMLScanner.cpp,v 1.26 2003/05/18 14:02:04 knoaman Exp $
- */
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <xercesc/internal/SGXMLScanner.hpp>
- #include <xercesc/util/RuntimeException.hpp>
- #include <xercesc/util/UnexpectedEOFException.hpp>
- #include <xercesc/framework/LocalFileInputSource.hpp>
- #include <xercesc/framework/URLInputSource.hpp>
- #include <xercesc/framework/XMLDocumentHandler.hpp>
- #include <xercesc/framework/XMLEntityHandler.hpp>
- #include <xercesc/framework/XMLPScanToken.hpp>
- #include <xercesc/framework/MemoryManager.hpp>
- #include <xercesc/internal/EndOfEntityException.hpp>
- #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
- #include <xercesc/validators/schema/SchemaValidator.hpp>
- #include <xercesc/validators/schema/TraverseSchema.hpp>
- #include <xercesc/validators/schema/XSDDOMParser.hpp>
- #include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
- #include <xercesc/validators/schema/identity/FieldActivator.hpp>
- #include <xercesc/validators/schema/identity/XPathMatcherStack.hpp>
- #include <xercesc/validators/schema/identity/ValueStoreCache.hpp>
- #include <xercesc/validators/schema/identity/IC_Selector.hpp>
- #include <xercesc/validators/schema/identity/ValueStore.hpp>
- XERCES_CPP_NAMESPACE_BEGIN
- // ---------------------------------------------------------------------------
- // SGXMLScanner: Constructors and Destructor
- // ---------------------------------------------------------------------------
- SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
- , MemoryManager* const manager) :
- XMLScanner(valToAdopt, manager)
- , fSeeXsi(false)
- , fElemStateSize(16)
- , fElemState(0)
- , fElemStack(manager)
- , fContent(1023, manager)
- , fEntityTable(0)
- , fRawAttrList(0)
- , fSchemaValidator(0)
- , fMatcherStack(0)
- , fValueStoreCache(0)
- , fFieldActivator(0)
- {
- try
- {
- commonInit();
- if (valToAdopt)
- {
- if (!valToAdopt->handlesSchema())
- ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
- }
- else
- {
- fValidator = fSchemaValidator;
- }
- }
- catch(...)
- {
- cleanUp();
- throw;
- }
- }
- SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
- , DocTypeHandler* const docTypeHandler
- , XMLEntityHandler* const entityHandler
- , XMLErrorReporter* const errHandler
- , XMLValidator* const valToAdopt
- , MemoryManager* const manager) :
- XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, manager)
- , fSeeXsi(false)
- , fElemStateSize(16)
- , fElemState(0)
- , fElemStack(manager)
- , fContent(1023, manager)
- , fEntityTable(0)
- , fRawAttrList(0)
- , fSchemaValidator(0)
- , fMatcherStack(0)
- , fValueStoreCache(0)
- , fFieldActivator(0)
- {
- try
- {
- commonInit();
- if (valToAdopt)
- {
- if (!valToAdopt->handlesSchema())
- ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
- }
- else
- {
- fValidator = fSchemaValidator;
- }
- }
- catch(...)
- {
- cleanUp();
- throw;
- }
- }
- SGXMLScanner::~SGXMLScanner()
- {
- cleanUp();
- }
- // ---------------------------------------------------------------------------
- // XMLScanner: Getter methods
- // ---------------------------------------------------------------------------
- NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
- {
- return 0;
- }
- const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
- {
- return 0;
- }
- // ---------------------------------------------------------------------------
- // SGXMLScanner: Main entry point to scan a document
- // ---------------------------------------------------------------------------
- void SGXMLScanner::scanDocument(const InputSource& src)
- {
- // Bump up the sequence id for this parser instance. This will invalidate
- // any previous progressive scan tokens.
- fSequenceId++;
- try
- {
- // Reset the scanner and its plugged in stuff for a new run. This
- // resets all the data structures, creates the initial reader and
- // pushes it on the stack, and sets up the base document path.
- scanReset(src);
- // If we have a document handler, then call the start document
- if (fDocHandler)
- fDocHandler->startDocument();
- // Scan the prolog part, which is everything before the root element
- // including the DTD subsets.
- scanProlog();
- // If we got to the end of input, then its not a valid XML file.
- // Else, go on to scan the content.
- if (fReaderMgr.atEOF())
- {
- emitError(XMLErrs::EmptyMainEntity);
- }
- else
- {
- // Scan content, and tell it its not an external entity
- if (scanContent(false))
- {
- // Do post-parse validation if required
- if (fValidate)
- {
- // We handle ID reference semantics at this level since
- // its required by XML 1.0.
- checkIDRefs();
- // Then allow the validator to do any extra stuff it wants
- // fValidator->postParseValidation();
- }
- // That went ok, so scan for any miscellaneous stuff
- if (!fReaderMgr.atEOF())
- scanMiscellaneous();
- }
- }
- // If we have a document handler, then call the end document
- if (fDocHandler)
- fDocHandler->endDocument();
- // Reset the reader manager to close all files, sockets, etc...
- fReaderMgr.reset();
- }
- // NOTE:
- //
- // In all of the error processing below, the emitError() call MUST come
- // before the flush of the reader mgr, or it will fail because it tries
- // to find out the position in the XML source of the error.
- catch(const XMLErrs::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLValid::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLException& excToCatch)
- {
- // Emit the error and catch any user exception thrown from here. Make
- // sure in all cases we flush the reader manager.
- fInException = true;
- try
- {
- if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
- emitError
- (
- XMLErrs::XMLException_Warning
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
- emitError
- (
- XMLErrs::XMLException_Fatal
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else
- emitError
- (
- XMLErrs::XMLException_Error
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- }
- catch(...)
- {
- // Flush the reader manager and rethrow user's error
- fReaderMgr.reset();
- throw;
- }
- // If it returned, then reset the reader manager and fall through
- fReaderMgr.reset();
- }
- catch(...)
- {
- // Reset and rethrow
- fReaderMgr.reset();
- throw;
- }
- }
- bool SGXMLScanner::scanNext(XMLPScanToken& token)
- {
- // Make sure this token is still legal
- if (!isLegalToken(token))
- ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);
- // Find the next token and remember the reader id
- unsigned int orgReader;
- XMLTokens curToken;
- bool retVal = true;
- try
- {
- while (true)
- {
- // We have to handle any end of entity exceptions that happen here.
- // We could be at the end of X nested entities, each of which will
- // generate an end of entity exception as we try to move forward.
- try
- {
- curToken = senseNextToken(orgReader);
- break;
- }
- catch(const EndOfEntityException& toCatch)
- {
- // Send an end of entity reference event
- if (fDocHandler)
- fDocHandler->endEntityReference(toCatch.getEntity());
- }
- }
- if (curToken == Token_CharData)
- {
- scanCharData(fCDataBuf);
- }
- else if (curToken == Token_EOF)
- {
- if (!fElemStack.isEmpty())
- {
- const ElemStack::StackElem* topElem = fElemStack.popTop();
- emitError
- (
- XMLErrs::EndedWithTagsOnStack
- , topElem->fThisElement->getFullName()
- );
- }
- retVal = false;
- }
- else
- {
- // Its some sort of markup
- bool gotData = true;
- switch(curToken)
- {
- case Token_CData :
- // Make sure we are within content
- if (fElemStack.isEmpty())
- emitError(XMLErrs::CDATAOutsideOfContent);
- scanCDSection();
- break;
- case Token_Comment :
- scanComment();
- break;
- case Token_EndTag :
- scanEndTag(gotData);
- break;
- case Token_PI :
- scanPI();
- break;
- case Token_StartTag :
- scanStartTag(gotData);
- break;
- default :
- fReaderMgr.skipToChar(chOpenAngle);
- break;
- }
- if (orgReader != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialMarkupInEntity);
- // If we hit the end, then do the miscellaneous part
- if (!gotData)
- {
- // Do post-parse validation if required
- if (fValidate)
- {
- // We handle ID reference semantics at this level since
- // its required by XML 1.0.
- checkIDRefs();
- // Then allow the validator to do any extra stuff it wants
- // fValidator->postParseValidation();
- }
- // That went ok, so scan for any miscellaneous stuff
- scanMiscellaneous();
- if (fValidate)
- fValueStoreCache->endDocument();
- if (fDocHandler)
- fDocHandler->endDocument();
- }
- }
- }
- // NOTE:
- //
- // In all of the error processing below, the emitError() call MUST come
- // before the flush of the reader mgr, or it will fail because it tries
- // to find out the position in the XML source of the error.
- catch(const XMLErrs::Codes)
- {
- // This is a 'first failure' exception, so reset and return failure
- fReaderMgr.reset();
- return false;
- }
- catch(const XMLValid::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and reuturn failure
- fReaderMgr.reset();
- return false;
- }
- catch(const XMLException& excToCatch)
- {
- // Emit the error and catch any user exception thrown from here. Make
- // sure in all cases we flush the reader manager.
- fInException = true;
- try
- {
- if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
- emitError
- (
- XMLErrs::XMLException_Warning
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
- emitError
- (
- XMLErrs::XMLException_Fatal
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else
- emitError
- (
- XMLErrs::XMLException_Error
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- }
- catch(...)
- {
- // Reset and rethrow user error
- fReaderMgr.reset();
- throw;
- }
- // Reset and return failure
- fReaderMgr.reset();
- return false;
- }
- catch(...)
- {
- // Reset and rethrow original error
- fReaderMgr.reset();
- throw;
- }
- // If we hit the end, then flush the reader manager
- if (!retVal)
- fReaderMgr.reset();
- return retVal;
- }
- // ---------------------------------------------------------------------------
- // SGXMLScanner: Private scanning methods
- // ---------------------------------------------------------------------------
- // This method is called from scanStartTag() to handle the very raw initial
- // scan of the attributes. It just fills in the passed collection with
- // key/value pairs for each attribute. No processing is done on them at all.
- unsigned int
- SGXMLScanner::rawAttrScan(const XMLCh* const elemName
- , RefVectorOf<KVStringPair>& toFill
- , bool& isEmpty)
- {
- // Keep up with how many attributes we've seen so far, and how many
- // elements are available in the vector. This way we can reuse old
- // elements until we run out and then expand it.
- unsigned int attCount = 0;
- unsigned int curVecSize = toFill.size();
- // Assume it is not empty
- isEmpty = false;
- // We loop until we either see a /> or >, handling key/value pairs util
- // we get there. We place them in the passed vector, which we will expand
- // as required to hold them.
- while (true)
- {
- // Get the next character, which should be non-space
- XMLCh nextCh = fReaderMgr.peekNextChar();
- // If the next character is not a slash or closed angle bracket,
- // then it must be whitespace, since whitespace is required
- // between the end of the last attribute and the name of the next
- // one.
- //
- if (attCount)
- {
- if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
- {
- if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
- {
- // Ok, skip by them and get another char
- fReaderMgr.getNextChar();
- fReaderMgr.skipPastSpaces();
- nextCh = fReaderMgr.peekNextChar();
- }
- else
- {
- // Emit the error but keep on going
- emitError(XMLErrs::ExpectedWhitespace);
- }
- }
- }
- // Ok, here we first check for any of the special case characters.
- // If its not one, then we do the normal case processing, which
- // assumes that we've hit an attribute value, Otherwise, we do all
- // the special case checks.
- if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
- {
- // Assume its going to be an attribute, so get a name from
- // the input.
- if (!fReaderMgr.getName(fAttNameBuf))
- {
- emitError(XMLErrs::ExpectedAttrName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return attCount;
- }
- // And next must be an equal sign
- if (!scanEq())
- {
- static const XMLCh tmpList[] =
- {
- chSingleQuote, chDoubleQuote, chCloseAngle
- , chOpenAngle, chForwardSlash, chNull
- };
- emitError(XMLErrs::ExpectedEqSign);
- // Try to sync back up by skipping forward until we either
- // hit something meaningful.
- const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
- if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
- {
- // Jump back to top for normal processing of these
- continue;
- }
- else if ((chFound == chSingleQuote)
- || (chFound == chDoubleQuote)
- || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
- {
- // Just fall through assuming that the value is to follow
- }
- else if (chFound == chOpenAngle)
- {
- // Assume a malformed tag and that new one is starting
- emitError(XMLErrs::UnterminatedStartTag, elemName);
- return attCount;
- }
- else
- {
- // Something went really wrong
- return attCount;
- }
- }
- // Next should be the quoted attribute value. We just do a simple
- // and stupid scan of this value. The only thing we do here
- // is to expand entity references.
- if (!basicAttrValueScan(fAttNameBuf.getRawBuffer(), fAttValueBuf))
- {
- static const XMLCh tmpList[] =
- {
- chCloseAngle, chOpenAngle, chForwardSlash, chNull
- };
- emitError(XMLErrs::ExpectedAttrValue);
- // It failed, so lets try to get synced back up. We skip
- // forward until we find some whitespace or one of the
- // chars in our list.
- const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
- if ((chFound == chCloseAngle)
- || (chFound == chForwardSlash)
- || fReaderMgr.getCurrentReader()->isWhitespace(chFound))
- {
- // Just fall through and process this attribute, though
- // the value will be "".
- }
- else if (chFound == chOpenAngle)
- {
- // Assume a malformed tag and that new one is starting
- emitError(XMLErrs::UnterminatedStartTag, elemName);
- return attCount;
- }
- else
- {
- // Something went really wrong
- return attCount;
- }
- }
- // Make sure that the name is basically well formed for namespace
- // enabled rules. It either has no colons, or it has one which
- // is neither the first or last char.
- const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
- if (colonFirst != -1)
- {
- const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
- if (colonFirst != colonLast)
- {
- emitError(XMLErrs::TooManyColonsInName);
- continue;
- }
- else if ((colonFirst == 0)
- || (colonLast == (int)fAttNameBuf.getLen() - 1))
- {
- emitError(XMLErrs::InvalidColonPos);
- continue;
- }
- }
- // And now lets add it to the passed collection. If we have not
- // filled it up yet, then we use the next element. Else we add
- // a new one.
- KVStringPair* curPair = 0;
- if (attCount >= curVecSize)
- {
- curPair = new (fMemoryManager) KVStringPair
- (
- fAttNameBuf.getRawBuffer()
- , fAttValueBuf.getRawBuffer()
- , fMemoryManager
- );
- toFill.addElement(curPair);
- }
- else
- {
- curPair = toFill.elementAt(attCount);
- curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer());
- }
- // And bump the count of attributes we've gotten
- attCount++;
- // And go to the top again for another attribute
- continue;
- }
- // It was some special case character so do all of the checks and
- // deal with it.
- if (!nextCh)
- ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
- if (nextCh == chForwardSlash)
- {
- fReaderMgr.getNextChar();
- isEmpty = true;
- if (!fReaderMgr.skippedChar(chCloseAngle))
- emitError(XMLErrs::UnterminatedStartTag, elemName);
- break;
- }
- else if (nextCh == chCloseAngle)
- {
- fReaderMgr.getNextChar();
- break;
- }
- else if (nextCh == chOpenAngle)
- {
- // Check for this one specially, since its going to be common
- // and it is kind of auto-recovering since we've already hit the
- // next open bracket, which is what we would have seeked to (and
- // skipped this whole tag.)
- emitError(XMLErrs::UnterminatedStartTag, elemName);
- break;
- }
- else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
- {
- // Check for this one specially, which is probably a missing
- // attribute name, e.g. ="value". Just issue expected name
- // error and eat the quoted string, then jump back to the
- // top again.
- emitError(XMLErrs::ExpectedAttrName);
- fReaderMgr.getNextChar();
- fReaderMgr.skipQuotedString(nextCh);
- fReaderMgr.skipPastSpaces();
- continue;
- }
- }
- return attCount;
- }
- // This method will kick off the scanning of the primary content of the
- // document, i.e. the elements.
- bool SGXMLScanner::scanContent(const bool extEntity)
- {
- // Go into a loop until we hit the end of the root element, or we fall
- // out because there is no root element.
- //
- // We have to do kind of a deeply nested double loop here in order to
- // avoid doing the setup/teardown of the exception handler on each
- // round. Doing it this way we only do it when an exception actually
- // occurs.
- bool gotData = true;
- bool inMarkup = false;
- while (gotData)
- {
- try
- {
- while (gotData)
- {
- // Sense what the next top level token is. According to what
- // this tells us, we will call something to handle that kind
- // of thing.
- unsigned int orgReader;
- const XMLTokens curToken = senseNextToken(orgReader);
- // Handle character data and end of file specially. Char data
- // is not markup so we don't want to handle it in the loop
- // below.
- if (curToken == Token_CharData)
- {
- // Scan the character data and call appropriate events. Let
- // him use our local character data buffer for efficiency.
- scanCharData(fCDataBuf);
- continue;
- }
- else if (curToken == Token_EOF)
- {
- // The element stack better be empty at this point or we
- // ended prematurely before all elements were closed.
- if (!fElemStack.isEmpty())
- {
- const ElemStack::StackElem* topElem = fElemStack.popTop();
- emitError
- (
- XMLErrs::EndedWithTagsOnStack
- , topElem->fThisElement->getFullName()
- );
- }
- // Its the end of file, so clear the got data flag
- gotData = false;
- continue;
- }
- // We are in some sort of markup now
- inMarkup = true;
- // According to the token we got, call the appropriate
- // scanning method.
- switch(curToken)
- {
- case Token_CData :
- // Make sure we are within content
- if (fElemStack.isEmpty())
- emitError(XMLErrs::CDATAOutsideOfContent);
- scanCDSection();
- break;
- case Token_Comment :
- scanComment();
- break;
- case Token_EndTag :
- scanEndTag(gotData);
- break;
- case Token_PI :
- scanPI();
- break;
- case Token_StartTag :
- scanStartTag(gotData);
- break;
- default :
- fReaderMgr.skipToChar(chOpenAngle);
- break;
- }
- if (orgReader != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialMarkupInEntity);
- // And we are back out of markup again
- inMarkup = false;
- }
- }
- catch(const EndOfEntityException& toCatch)
- {
- // If we were in some markup when this happened, then its a
- // partial markup error.
- if (inMarkup)
- emitError(XMLErrs::PartialMarkupInEntity);
- // Send an end of entity reference event
- if (fDocHandler)
- fDocHandler->endEntityReference(toCatch.getEntity());
- inMarkup = false;
- }
- }
- // It went ok, so return success
- return true;
- }
- void SGXMLScanner::scanEndTag(bool& gotData)
- {
- // Assume we will still have data until proven otherwise. It will only
- // ever be false if this is the end of the root element.
- gotData = true;
- // Check if the element stack is empty. If so, then this is an unbalanced
- // element (i.e. more ends than starts, perhaps because of bad text
- // causing one to be skipped.)
- if (fElemStack.isEmpty())
- {
- emitError(XMLErrs::MoreEndThanStartTags);
- fReaderMgr.skipPastChar(chCloseAngle);
- ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd);
- }
- // After the </ is the element QName, so get a name from the input
- if (!fReaderMgr.getName(fQNameBuf))
- {
- // It failed so we can't really do anything with it
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipPastChar(chCloseAngle);
- return;
- }
- int prefixColonPos = -1;
- unsigned int uriId = resolveQName
- (
- fQNameBuf.getRawBuffer()
- , fPrefixBuf
- , ElemStack::Mode_Element
- , prefixColonPos
- );
- // Pop the stack of the element we are supposed to be ending. Remember
- // that we don't own this. The stack just keeps them and reuses them.
- //
- // NOTE: We CANNOT do this until we've resolved the element name because
- // the element stack top contains the prefix to URI mappings for this
- // element.
- unsigned int topUri = fElemStack.getCurrentURI();
- const ElemStack::StackElem* topElem = fElemStack.popTop();
- // See if it was the root element, to avoid multiple calls below
- const bool isRoot = fElemStack.isEmpty();
- // Make sure that its the end of the element that we expect
- XMLElementDecl* tempElement = topElem->fThisElement;
- const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
- if ((topUri != uriId) ||
- (!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
- {
- emitError
- (
- XMLErrs::ExpectedEndOfTagX
- , topElem->fThisElement->getFullName()
- );
- }
- // Make sure we are back on the same reader as where we started
- if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
- emitError(XMLErrs::PartialTagMarkupError);
- // Skip optional whitespace
- fReaderMgr.skipPastSpaces();
- // Make sure we find the closing bracket
- if (!fReaderMgr.skippedChar(chCloseAngle))
- {
- emitError
- (
- XMLErrs::UnterminatedEndTag
- , topElem->fThisElement->getFullName()
- );
- }
- // If validation is enabled, then lets pass him the list of children and
- // this element and let him validate it.
- if (fValidate)
- {
- int res = fValidator->checkContent
- (
- topElem->fThisElement
- , topElem->fChildren
- , topElem->fChildCount
- );
- if (res >= 0)
- {
- // One of the elements is not valid for the content. NOTE that
- // if no children were provided but the content model requires
- // them, it comes back with a zero value. But we cannot use that
- // to index the child array in this case, and have to put out a
- // special message.
- if (!topElem->fChildCount)
- {
- fValidator->emitError
- (
- XMLValid::EmptyNotValidForContent
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
- else if ((unsigned int)res >= topElem->fChildCount)
- {
- fValidator->emitError
- (
- XMLValid::NotEnoughElemsForCM
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
- else
- {
- fValidator->emitError
- (
- XMLValid::ElementNotValidForContent
- , topElem->fChildren[res]->getRawName()
- , topElem->fThisElement->getFormattedContentModel()
- );
- }
-
- }
- // call matchers and de-activate context
- int oldCount = fMatcherStack->getMatcherCount();
- if (oldCount ||
- ((SchemaElementDecl*)topElem->fThisElement)->getIdentityConstraintCount()) {
- for (int i = oldCount - 1; i >= 0; i--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(i);
- matcher->endElement(*(topElem->fThisElement), fContent.getRawBuffer());
- }
- if (fMatcherStack->size() > 0) {
- fMatcherStack->popContext();
- }
- // handle everything *but* keyref's.
- int newCount = fMatcherStack->getMatcherCount();
- for (int j = oldCount - 1; j >= newCount; j--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
- IdentityConstraint* ic = matcher->getIdentityConstraint();
- if (ic && (ic->getType() != IdentityConstraint::KEYREF))
- fValueStoreCache->transplant(ic, matcher->getInitialDepth());
- }
- // now handle keyref's...
- for (int k = oldCount - 1; k >= newCount; k--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(k);
- IdentityConstraint* ic = matcher->getIdentityConstraint();
- if (ic && (ic->getType() == IdentityConstraint::KEYREF)) {
- ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth());
- if (values) { // nothing to do if nothing matched!
- values->endDcocumentFragment(fValueStoreCache);
- }
- }
- }
- fValueStoreCache->endElement();
- }
- }
- if(!isRoot)
- ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromElement(topElem->fThisElement, fGrammarType);
- // If we have a doc handler, tell it about the end tag
- if (fDocHandler)
- {
- fDocHandler->endElement
- (
- *topElem->fThisElement
- , uriId
- , isRoot
- , fPrefixBuf.getRawBuffer()
- );
- }
- // reset xsi:type ComplexTypeInfo
- ((SchemaElementDecl*)topElem->fThisElement)->reset();
- if (!isRoot)
- ((SchemaElementDecl*)(fElemStack.topElement()->fThisElement))->
- setXsiComplexTypeInfo(((SchemaValidator*)fValidator)->getCurrentTypeInfo());
- // If this was the root, then done with content
- gotData = !isRoot;
- if (gotData) {
- // Restore the grammar
- fGrammar = fElemStack.getCurrentGrammar();
- fGrammarType = fGrammar->getGrammarType();
- fValidator->setGrammar(fGrammar);
- // Restore the validation flag
- fValidate = fElemStack.getValidationFlag();
- }
- }
- // This method handles the high level logic of scanning the DOCType
- // declaration. This calls the DTDScanner and kicks off both the scanning of
- // the internal subset and the scanning of the external subset, if any.
- //
- // When we get here the '<!DOCTYPE' part has already been scanned, which is
- // what told us that we had a doc type decl to parse.
- void SGXMLScanner::scanDocTypeDecl()
- {
- // Just skips over it
- // REVISIT: Should we issue a warning
- static const XMLCh doctypeIE[] =
- {
- chOpenSquare, chCloseAngle, chNull
- };
- XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
- if (nextCh == chOpenSquare)
- fReaderMgr.skipPastChar(chCloseSquare);
- fReaderMgr.skipPastChar(chCloseAngle);
- }
- // This method is called to scan a start tag when we are processing
- // namespaces. This method is called after we've scanned the < of a
- // start tag. So we have to get the element name, then scan the attributes,
- // after which we are either going to see >, />, or attributes followed
- // by one of those sequences.
- bool SGXMLScanner::scanStartTag(bool& gotData)
- {
- // Assume we will still have data until proven otherwise. It will only
- // ever be false if this is the root and its empty.
- gotData = true;
- // Reset element content
- fContent.reset();
- // The current position is after the open bracket, so we need to read in
- // in the element name.
- if (!fReaderMgr.getName(fQNameBuf))
- {
- emitError(XMLErrs::ExpectedElementName);
- fReaderMgr.skipToChar(chOpenAngle);
- return false;
- }
- // See if its the root element
- const bool isRoot = fElemStack.isEmpty();
- // Skip any whitespace after the name
- fReaderMgr.skipPastSpaces();
- // First we have to do the rawest attribute scan. We don't do any
- // normalization of them at all, since we don't know yet what type they
- // might be (since we need the element decl in order to do that.)
- bool isEmpty;
- unsigned int attCount = rawAttrScan
- (
- fQNameBuf.getRawBuffer()
- , *fRawAttrList
- , isEmpty
- );
- const bool gotAttrs = (attCount != 0);
- // save the contentleafname and currentscope before addlevel, for later use
- ContentLeafNameTypeVector* cv = 0;
- XMLContentModel* cm = 0;
- int currentScope = Grammar::TOP_LEVEL_SCOPE;
- bool laxThisOne = false;
- if (!isRoot) {
- SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement;
- SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
- if ((modelType == SchemaElementDecl::Mixed_Simple)
- || (modelType == SchemaElementDecl::Mixed_Complex)
- || (modelType == SchemaElementDecl::Children))
- {
- cm = tempElement->getContentModel();
- cv = cm->getContentLeafNameTypeVector();
- currentScope = fElemStack.getCurrentScope();
- }
- else if (modelType == SchemaElementDecl::Any) {
- laxThisOne = true;
- }
- }
- // Now, since we might have to update the namespace map for this element,
- // but we don't have the element decl yet, we just tell the element stack
- // to expand up to get ready.
- unsigned int elemDepth = fElemStack.addLevel();
- fElemStack.setValidationFlag(fValidate);
- // Check if there is any external schema location specified, and if we are at root,
- // go through them first before scanning those specified in the instance document
- if (isRoot
- && (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
- if (fExternalSchemaLocation)
- parseSchemaLocation(fExternalSchemaLocation);
- if (fExternalNoNamespaceSchemaLocation)
- resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString);
- }
- // Make an initial pass through the list and find any xmlns attributes or
- // schema attributes.
- if (attCount)
- scanRawAttrListforNameSpaces(fRawAttrList, attCount);
- // Resolve the qualified name to a URI and name so that we can look up
- // the element decl for this element. We have now update the prefix to
- // namespace map so we should get the correct element now.
- int prefixColonPos = -1;
- const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
- unsigned int uriId = resolveQName
- (
- qnameRawBuf
- , fPrefixBuf
- , ElemStack::Mode_Element
- , prefixColonPos
- );
- //if schema, check if we should lax or skip the validation of this element
- bool parentValidation = fValidate;
- if (cv) {
- QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
- // elementDepth will be > 0, as cv is only constructed if element is not
- // root.
- laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
- }
- // Look up the element now in the grammar. This will get us back a
- // generic element decl object. We tell him to fault one in if he does
- // not find it.
- XMLElementDecl* elemDecl = 0;
- bool wasAdded = false;
- bool errorBeforeElementFound = false;
- bool laxBeforeElementFound = false;
- const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
- const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
- unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
- if (uriId != fEmptyNamespaceId) {
- // Check in current grammar before switching if necessary
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- if (!elemDecl && (orgGrammarUri != uriId)) {
- // not found, switch to the specified grammar
- const XMLCh* uriStr = getURIText(uriId);
- bool errorCondition = !switchGrammar(uriStr) && fValidate;
- if (errorCondition && !laxThisOne)
- {
- fValidator->emitError
- (
- XMLValid::GrammarNotFound
- ,uriStr
- );
- errorBeforeElementFound = true;
- }
- else if(errorCondition)
- laxBeforeElementFound = true;
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- }
- if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
- // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , Grammar::TOP_LEVEL_SCOPE
- );
- if(!elemDecl) {
- // still not found in specified uri
- // try emptyNamesapce see if element should be un-qualified.
- elemDecl = fGrammar->getElemDecl
- (
- fEmptyNamespaceId
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- bool errorCondition = elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn;
- if (errorCondition && fValidate) {
- fValidator->emitError
- (
- XMLValid::ElementNotUnQualified
- , elemDecl->getFullName()
- );
- errorBeforeElementFound = true;
- }
- else if(errorCondition)
- laxBeforeElementFound = true;
- }
- }
- if (!elemDecl) {
- // still not found, fault this in and issue error later
- // switch back to original grammar first
- switchGrammar(original_uriStr);
- elemDecl = fGrammar->putElemDecl(uriId
- , nameRawBuf
- , fPrefixBuf.getRawBuffer()
- , qnameRawBuf
- , currentScope
- , true);
- wasAdded = true;
- }
- }
- else if (!elemDecl)
- {
- //the element has no prefix,
- //thus it is either a non-qualified element defined in current targetNS
- //or an element that is defined in the globalNS
- //try unqualifed first
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
- //not found, switch grammar and try globalNS
- bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
- if (errorCondition && !laxThisOne)
- {
- fValidator->emitError
- (
- XMLValid::GrammarNotFound
- , XMLUni::fgZeroLenString
- );
- errorBeforeElementFound = true;
-
- }
- else if(errorCondition)
- laxBeforeElementFound = true;
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- }
- if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
- // if not found, then it may be a reference, try TOP_LEVEL_SCOPE
- elemDecl = fGrammar->getElemDecl
- (
- uriId
- , nameRawBuf
- , qnameRawBuf
- , Grammar::TOP_LEVEL_SCOPE
- );
- if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
- // still Not found in specified uri
- // go to original Grammar again to see if element needs to be fully qualified.
- const XMLCh* uriStr = getURIText(orgGrammarUri);
- bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
- if (errorCondition && !laxThisOne)
- {
- fValidator->emitError
- (
- XMLValid::GrammarNotFound
- ,original_uriStr
- );
- errorBeforeElementFound = true;
- }
- else if(errorCondition)
- laxBeforeElementFound = true;
- elemDecl = fGrammar->getElemDecl
- (
- orgGrammarUri
- , nameRawBuf
- , qnameRawBuf
- , currentScope
- );
- if (elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
- fValidator->emitError
- (
- XMLValid::ElementNotQualified
- , elemDecl->getFullName()
- );
- errorBeforeElementFound = true;
- }
- }
- }
- if (!elemDecl) {
- // still not found, fault this in and issue error later
- // switch back to original grammar first
- switchGrammar(original_uriStr);
- elemDecl = fGrammar->putElemDecl(uriId
- , nameRawBuf
- , fPrefixBuf.getRawBuffer()
- , qnameRawBuf
- , currentScope
- , true);
- wasAdded = true;
- }
- }
- // We do something different here according to whether we found the
- // element or not.
- if (wasAdded)
- {
- if (laxThisOne) {
- fValidate = false;
- fElemStack.setValidationFlag(fValidate);
- }
- else if(fValidate) {
- ((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
- }
- // If validating then emit an error
- if (fValidate)
- {
- // This is to tell the reuse Validator that this element was
- // faulted-in, was not an element in the grammar pool originally
- elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
- fValidator->emitError
- (
- XMLValid::ElementNotDefined
- , elemDecl->getFullName()
- );
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- }
- }
- else
- {
- if(!laxBeforeElementFound) {
- if (fValidate) {
- ((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::VALID);
- }
- }
- // If its not marked declared and validating, then emit an error
- if (!elemDecl->isDeclared()) {
- if (laxThisOne) {
- fValidate = false;
- fElemStack.setValidationFlag(fValidate);
- }
-
- if (fValidate)
- {
- fValidator->emitError
- (
- XMLValid::ElementNotDefined
- , elemDecl->getFullName()
- );
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- ((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
- }
- }
- ((SchemaElementDecl*)elemDecl)->setXsiComplexTypeInfo(0);
- ((SchemaElementDecl*)elemDecl)->setXsiSimpleTypeInfo(0);
- }
- if(errorBeforeElementFound) {
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- }
- // Now we can update the element stack to set the current element
- // decl. We expanded the stack above, but couldn't store the element
- // decl because we didn't know it yet.
- fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
- fElemStack.setCurrentURI(uriId);
- if (isRoot)
- fRootGrammar = fGrammar;
- // Validate the element
- if (fValidate)
- fValidator->validateElement(elemDecl);
- ComplexTypeInfo* typeinfo = ((SchemaElementDecl*)elemDecl)->getComplexTypeInfo();
- if (typeinfo) {
- currentScope = typeinfo->getScopeDefined();
- // switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
- XMLCh* typeName = typeinfo->getTypeName();
- const XMLCh poundStr[] = {chPound, chNull};
- if (!XMLString::startsWith(typeName, poundStr)) {
- const int comma = XMLString::indexOf(typeName, chComma);
- if (comma > 0) {
- XMLBuffer prefixBuf(comma+1, fMemoryManager);
- prefixBuf.append(typeName, comma);
- const XMLCh* uriStr = prefixBuf.getRawBuffer();
- bool errorCondition = !switchGrammar(uriStr) && fValidate;
- if (errorCondition && !laxThisOne)
- {
- fValidator->emitError
- (
- XMLValid::GrammarNotFound
- , prefixBuf.getRawBuffer()
- );
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- }
- else if(errorCondition) {
- ((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::NONE);
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::UNKNOWN);
- }
- }
- }
- }
- fElemStack.setCurrentScope(currentScope);
- // Set element next state
- if (elemDepth >= fElemStateSize) {
- resizeElemState();
- }
- fElemState[elemDepth] = 0;
- fElemStack.setCurrentGrammar(fGrammar);
- // If this is the first element and we are validating, check the root
- // element.
- if (isRoot)
- {
- if (fValidate)
- {
- // Some validators may also want to check the root, call the
- // XMLValidator::checkRootElement
- if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId())) {
- fValidator->emitError(XMLValid::RootElemNotLikeDocType);
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- }
- }
- }
- else if (parentValidation)
- {
- // If the element stack is not empty, then add this element as a
- // child of the previous top element. If its empty, this is the root
- // elem and is not the child of anything.
- fElemStack.addChild(elemDecl->getElementName(), true);
- }
- // Now lets get the fAttrList filled in. This involves faulting in any
- // defaulted and fixed attributes and normalizing the values of any that
- // we got explicitly.
- //
- // We update the attCount value with the total number of attributes, but
- // it goes in with the number of values we got during the raw scan of
- // explictly provided attrs above.
- attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
- // activate identity constraints
- if (fValidate) {
- unsigned int count = ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount();
- if (count || fMatcherStack->getMatcherCount()) {
- fValueStoreCache->startElement();
- fMatcherStack->pushContext();
- fValueStoreCache->initValueStoresFor((SchemaElementDecl*) elemDecl, (int) elemDepth);
- for (unsigned int i = 0; i < count; i++) {
- activateSelectorFor(((SchemaElementDecl*) elemDecl)->getIdentityConstraintAt(i), (int) elemDepth);
- }
- // call all active identity constraints
- count = fMatcherStack->getMatcherCount();
- for (unsigned int j = 0; j < count; j++) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
- matcher->startElement(*elemDecl, uriId, fPrefixBuf.getRawBuffer(), *fAttrList, attCount);
- }
- }
- }
- // Since the element may have default values, call start tag now regardless if it is empty or not
- // If we have a document handler, then tell it about this start tag
- if (fDocHandler)
- {
- fDocHandler->startElement
- (
- *elemDecl
- , uriId
- , fPrefixBuf.getRawBuffer()
- , *fAttrList
- , attCount
- , false
- , isRoot
- );
- }
- // If empty, validate content right now if we are validating and then
- // pop the element stack top. Else, we have to update the current stack
- // top's namespace mapping elements.
- if (isEmpty)
- {
- // Pop the element stack back off since it'll never be used now
- fElemStack.popTop();
- // If validating, then insure that its legal to have no content
- if (fValidate)
- {
- const int res = fValidator->checkContent(elemDecl, 0, 0);
- if (res >= 0)
- {
- fValidator->emitError
- (
- XMLValid::ElementNotValidForContent
- , elemDecl->getFullName()
- , elemDecl->getFormattedContentModel()
- );
- ((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
- }
- // call matchers and de-activate context
- int oldCount = fMatcherStack->getMatcherCount();
- if (oldCount || ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount()) {
- for (int i = oldCount - 1; i >= 0; i--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(i);
- matcher->endElement(*elemDecl, fContent.getRawBuffer());
- }
- if (fMatcherStack->size() > 0) {
- fMatcherStack->popContext();
- }
- // handle everything *but* keyref's.
- int newCount = fMatcherStack->getMatcherCount();
- for (int j = oldCount - 1; j >= newCount; j--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
- IdentityConstraint* ic = matcher->getIdentityConstraint();
- if (ic && (ic->getType() != IdentityConstraint::KEYREF))
- fValueStoreCache->transplant(ic, matcher->getInitialDepth());
- }
- // now handle keyref's...
- for (int k = oldCount - 1; k >= newCount; k--) {
- XPathMatcher* matcher = fMatcherStack->getMatcherAt(k);
- IdentityConstraint* ic = matcher->getIdentityConstraint();
- if (ic && (ic->getType() == IdentityConstraint::KEYREF)) {
- ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth());
- if (values) { // nothing to do if nothing matched!
- values->endDcocumentFragment(fValueStoreCache);
- }
- }
- }
- fValueStoreCache->endElement();
- }
- }
- if(!isRoot)
- ((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromElement(elemDecl, fGrammarType);
- // If we have a doc handler, tell it about the end tag
- if (fDocHandler)
- {
- fDocHandler->endElement
- (
- *elemDecl
- , uriId
- , isRoot
- , fPrefixBuf.getRawBuffer()
- );
- }
- // reset xsi:type ComplexTypeInfo
- ((SchemaElementDecl*)elemDecl)->reset();
- if (!isRoot)
- ((SchemaElementDecl*)(fElemStack.topElement()->fThisElement))->
- setXsiComplexTypeInfo(((SchemaValidator*)fValidator)->getCurrentTypeInfo());
- // If the elem stack is empty, then it was an empty root
- if (isRoot)
- gotData = false;
- else
- {
- // Restore the grammar
- fGrammar = fElemStack.getCurrentGrammar();
- fGrammarType = fGrammar->getGrammarType();
- fValidator->setGrammar(fGrammar);
- // Restore the validation flag
- fValidate = fElemStack.getValidationFlag();
- }
- }
- return true;
- }
- unsigned int
- SGXMLScanner::resolveQName(const XMLCh* const qName
- , XMLBuffer& prefixBuf
- , const short mode
- , int& prefixColonPos)
- {
- // Lets split out the qName into a URI and name buffer first. The URI
- // can be empty.
- prefixColonPos = XMLString::indexOf(qName, chColon);
- if (prefixColonPos == -1)
- {
- // Its all name with no prefix, so put the whole thing into the name
- // buffer. Then map the empty string to a URI, since the empty string
- // represents the default namespace. This will either return some
- // explicit URI which the default namespace is mapped to, or the
- // the default global namespace.
- bool unknown = false;
- prefixBuf.reset();
- return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
- }
- else
- {
- // Copy the chars up to but not including the colon into the prefix
- // buffer.
- prefixBuf.set(qName, prefixColonPos);
- // Watch for the special namespace prefixes. We always map these to
- // special URIs. 'xml' gets mapped to the official URI that its defined
- // to map to by the NS spec. xmlns gets mapped to a special place holder
- // URI that we define (so that it maps to something checkable.)
- const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
- if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
- // if this is an element, it is an error to have xmlns as prefix
- if (mode == ElemStack::Mode_Element)
- emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
- return fXMLNSNamespaceId;
- }
- else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
- return fXMLNamespaceId;
- }
- else
- {
- bool unknown = false;
- unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
- if (unknown)
- emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
- return uriId;
- }
- }
- }
- // ---------------------------------------------------------------------------
- // SGXMLScanner: IC activation methos
- // ---------------------------------------------------------------------------
- void SGXMLScanner::activateSelectorFor(IdentityConstraint* const ic, const int initialDepth) {
- IC_Selector* selector = ic->getSelector();
- if (!selector)
- return;
- XPathMatcher* matcher = selector->createMatcher
- (
- fFieldActivator
- , initialDepth
- , fMemoryManager
- );
- fMatcherStack->addMatcher(matcher);
- matcher->startDocumentFragment();
- }
- // ---------------------------------------------------------------------------
- // SGXMLScanner: Grammar preparsing
- // ---------------------------------------------------------------------------
- Grammar* SGXMLScanner::loadGrammar(const InputSource& src
- , const short grammarType
- , const bool toCache)
- {
- Grammar* loadedGrammar = 0;
- try
- {
- fGrammarResolver->cacheGrammarFromParse(false);
- fGrammarResolver->useCachedGrammarInParse(false);
- fRootGrammar = 0;
- if (fValScheme == Val_Auto) {
- fValidate = true;
- }
- // Reset some status flags
- fInException = false;
- fStandalone = false;
- fErrorCount = 0;
- fHasNoDTD = true;
- fSeeXsi = false;
- if (grammarType == Grammar::SchemaGrammarType) {
- loadedGrammar = loadXMLSchemaGrammar(src, toCache);
- }
- // Reset the reader manager to close all files, sockets, etc...
- fReaderMgr.reset();
- }
- // NOTE:
- //
- // In all of the error processing below, the emitError() call MUST come
- // before the flush of the reader mgr, or it will fail because it tries
- // to find out the position in the XML source of the error.
- catch(const XMLErrs::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLValid::Codes)
- {
- // This is a 'first fatal error' type exit, so reset and fall through
- fReaderMgr.reset();
- }
- catch(const XMLException& excToCatch)
- {
- // Emit the error and catch any user exception thrown from here. Make
- // sure in all cases we flush the reader manager.
- fInException = true;
- try
- {
- if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
- emitError
- (
- XMLErrs::DisplayErrorMessage
- , excToCatch.getMessage()
- );
- else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
- emitError
- (
- XMLErrs::XMLException_Fatal
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- else
- emitError
- (
- XMLErrs::XMLException_Error
- , excToCatch.getType()
- , excToCatch.getMessage()
- );
- }
- catch(...)
- {
- // Flush the reader manager and rethrow user's error
- fReaderMgr.reset();
- throw;
- }
- // If it returned, then reset the reader manager and fall through
- fReaderMgr.reset();
- }
- catch(...)
- {
- // Reset and rethrow
- fReaderMgr.reset();
- throw;
- }
- return loadedGrammar;
- }
- // ---------------------------------------------------------------------------
- // SGXMLScanner: Private helper methods
- // ---------------------------------------------------------------------------
- // This method handles the common initialization, to avoid having to do
- // it redundantly in multiple constructors.
- void SGXMLScanner::commonInit()
- {
- // Create the element state array
- fElemState = (unsigned int*) fMemoryManager->allocate
- (
- fElemStateSize * sizeof(unsigned int)
- ); //new unsigned int[fElemStateSize];
- // And we need one for the raw attribute scan. This just stores key/
- // value string pairs (prior to any processing.)
- fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
- // Create dummy schema grammar
- fSchemaGrammar = new (fMemoryManager) SchemaGrammar(fMemoryManager);
- // Create the Validator and init them
- fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
- initValidator(fSchemaValidator);
- // Create IdentityConstraint info
- fMatcherStack = new (fMemoryManager) XPathMatcherStack(fMemoryManager);
- fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager);
- fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache, fMatcherStack, fMemoryManager);
- fValueStoreCache->setScanner(this);
- // Add the default entity entries for the character refs that must always
- // be present.
- fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
- fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
- fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
- fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
- fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
- fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
- }
- void SGXMLScanner::cleanUp()
- {
- fMemoryManager->deallocate(fElemState); //delete [] fElemState;
- delete fSchemaGrammar;
- delete fEntityTable;
- delete fRawAttrList;
- delete fSchemaValidator;
- delete fFieldActivator;
- delete fMatcherStack;
- delete fValueStoreCache;
- }
- void SGXMLScanner::resizeElemState() {
- unsigned int newSize = fElemStateSize * 2;
- unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
- (
- newSize * sizeof(unsigned int)
- ); //new unsigned int[newSize];
- // Copy the existing values
- unsigned int index = 0;
- for (; index < fElemStateSize; index++)
- newElemState[index] = fElemState[index];
- for (; index < newSize; index++)
- newElemState[index] = 0;
- // Delete the old array and udpate our members
- fMemoryManager->deallocate(fElemState); //delete [] fElemState;
- fElemState = newElemState;
- fElemStateSize = newSize;
- }
- // This method is called from scanStartTag() to build up the list of
- // XMLAttr objects that will be passed out in the start tag callout. We
- // get the key/value pairs from the raw scan of explicitly provided attrs,
- // which have not been normalized. And we get the element declaration from
- // which we will get any defaulted or fixed attribute defs and add those
- // in as well.
- unsigned int
- SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs
- , const unsigned int attCount
- , XMLElementDecl* elemDecl
- , RefVectorOf<XMLAttr>& toFill)
- {
- // Ask the element to clear the 'provided' flag on all of the att defs
- // that it owns, and to return us a boolean indicating whether it has
- // any defs.
- const bool hasDefs = elemDecl->resetDefs();
- // If there are no expliclitily provided attributes and there are no
- // defined attributes for the element, the we don't have anything to do.
- // So just return zero in this case.
- if (!hasDefs && !attCount)
- return 0;
- // Keep up with how many attrs we end up with total
- unsigned int retCount = 0;
- // And get the current size of the output vector. This lets us use
- // existing elements until we fill it, then start adding new ones.
- const unsigned int curAttListSize = toFill.size();
- // We need a buffer into which raw scanned attribute values will be
- // normalized.
- XMLBufBid bbNormal(&fBufMgr);
- XMLBuffer& normBuf = bbNormal.getBuffer();
- // Loop through our explicitly provided attributes, which are in the raw
- // scanned form, and build up XMLAttr objects.
- unsigned int index;
- for (index = 0; index < attCount; index++)
- {
- const KVStringPair* curPair = providedAttrs.elementAt(index);
- // We have to split the name into its prefix and name parts. Then
- // we map the prefix to its URI.
- const XMLCh* const namePtr = curPair->getKey();
- ArrayJanitor<XMLCh> janName(0);
- // use a stack-based buffer when possible.
- XMLCh tempBuffer[100];
- const int colonInd = XMLString::indexOf(namePtr, chColon);
- const XMLCh* prefPtr = XMLUni::fgZeroLenString;
- const XMLCh* suffPtr = XMLUni::fgZeroLenString;
- if (colonInd != -1)
- {
- // We have to split the string, so make a copy.
- if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
- {
- XMLString::copyString(tempBuffer, namePtr);
- tempBuffer[colonInd] = chNull;
- prefPtr = tempBuffer;
- }
- else
- {
- janName.reset(XMLString::replicate(namePtr, fMemoryManager), fMemoryManager);
- janName[colonInd] = chNull;
- prefPtr = janName.get();
- }
- suffPtr = prefPtr + colonInd + 1;
- }
- else
- {
- // No colon, so we just have a name with no prefix
- suffPtr = namePtr;
- }
- // Map the prefix to a URI id. We tell him that we are mapping an
- // attr prefix, so any xmlns attrs at this level will not affect it.
- const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
- // If the uri comes back as the xmlns or xml URI or its just a name
- // and that name is 'xmlns', then we handle it specially. So set a
- // boolean flag that lets us quickly below know which we are dealing
- // with.
- const bool isNSAttr = (uriId == fXMLNSNamespaceId)
- || (uriId == fXMLNamespaceId)
- || XMLString::equals(suffPtr, XMLUni::fgXMLNSString)
- || XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI);
- // If its not a special case namespace attr of some sort, then we
- // do normal checking and processing.
- XMLAttDef::AttTypes attType;
- if (!isNSAttr)
- {
- // Some checking for attribute wild card first (for schema)
- bool laxThisOne = false;
- bool skipThisOne = false;
- XMLAttDef* attDefForWildCard = 0;
- XMLAttDef* attDef = 0;
- if (fGrammarType == Grammar::SchemaGrammarType) {
- //retrieve the att def
- attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId);
- // if not found or faulted in - check for a matching wildcard attribute
- // if no matching wildcard attribute, check (un)qualifed cases and flag
- // appropriate errors
- if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
- SchemaAttDef* attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
- if (attWildCard) {
- //if schema, see if we should lax or skip the validation of this attribute
- if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
- SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
- if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
- RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
- if (attRegistry) {
- attDefForWildCard = attRegistry->get(suffPtr);
- }
- }
- }
- }
- else {
- // not found, see if the attDef should be qualified or not
- if (uriId == fEmptyNamespaceId) {
- attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
- if (fValidate
- && attDef
- && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
- // the attribute should be qualified
- fValidator->emitError
- (
- XMLValid::AttributeNotQualified
- , attDef->getFullName()
- );
- ((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
- }
- }
- else {
- attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fEmptyNamespaceId);
- if (fValidate
- && attDef
- && attDef->getCreateReason() != XMLAttDef::JustFaultIn) {