词法分析

开发平台：

Visual C++

SGXMLScanner.cpp：源码内容

/*
* The Apache Software License, Version 1.1
*
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Xerces" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation, and was
* Business Machines, Inc., http://www.ibm.com . For more information
* on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
/*
* $Id: SGXMLScanner.cpp,v 1.26 2003/05/18 14:02:04 knoaman Exp $
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/internal/SGXMLScanner.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/UnexpectedEOFException.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/framework/XMLDocumentHandler.hpp>
#include <xercesc/framework/XMLEntityHandler.hpp>
#include <xercesc/framework/XMLPScanToken.hpp>
#include <xercesc/framework/MemoryManager.hpp>
#include <xercesc/internal/EndOfEntityException.hpp>
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
#include <xercesc/validators/schema/SchemaValidator.hpp>
#include <xercesc/validators/schema/TraverseSchema.hpp>
#include <xercesc/validators/schema/XSDDOMParser.hpp>
#include <xercesc/validators/schema/SubstitutionGroupComparator.hpp>
#include <xercesc/validators/schema/identity/FieldActivator.hpp>
#include <xercesc/validators/schema/identity/XPathMatcherStack.hpp>
#include <xercesc/validators/schema/identity/ValueStoreCache.hpp>
#include <xercesc/validators/schema/identity/IC_Selector.hpp>
#include <xercesc/validators/schema/identity/ValueStore.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// SGXMLScanner: Constructors and Destructor
// ---------------------------------------------------------------------------
SGXMLScanner::SGXMLScanner( XMLValidator* const valToAdopt
, MemoryManager* const manager) :
XMLScanner(valToAdopt, manager)
, fSeeXsi(false)
, fElemStateSize(16)
, fElemState(0)
, fElemStack(manager)
, fContent(1023, manager)
, fEntityTable(0)
, fRawAttrList(0)
, fSchemaValidator(0)
, fMatcherStack(0)
, fValueStoreCache(0)
, fFieldActivator(0)
{
try
{
commonInit();
if (valToAdopt)
{
if (!valToAdopt->handlesSchema())
ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
}
else
{
fValidator = fSchemaValidator;
}
}
catch(...)
{
cleanUp();
throw;
}
}
SGXMLScanner::SGXMLScanner( XMLDocumentHandler* const docHandler
, DocTypeHandler* const docTypeHandler
, XMLEntityHandler* const entityHandler
, XMLErrorReporter* const errHandler
, XMLValidator* const valToAdopt
, MemoryManager* const manager) :
XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, manager)
, fSeeXsi(false)
, fElemStateSize(16)
, fElemState(0)
, fElemStack(manager)
, fContent(1023, manager)
, fEntityTable(0)
, fRawAttrList(0)
, fSchemaValidator(0)
, fMatcherStack(0)
, fValueStoreCache(0)
, fFieldActivator(0)
{
try
{
commonInit();
if (valToAdopt)
{
if (!valToAdopt->handlesSchema())
ThrowXML(RuntimeException, XMLExcepts::Gen_NoSchemaValidator);
}
else
{
fValidator = fSchemaValidator;
}
}
catch(...)
{
cleanUp();
throw;
}
}
SGXMLScanner::~SGXMLScanner()
{
cleanUp();
}
// ---------------------------------------------------------------------------
// XMLScanner: Getter methods
// ---------------------------------------------------------------------------
NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool()
{
return 0;
}
const NameIdPool<DTDEntityDecl>* SGXMLScanner::getEntityDeclPool() const
{
return 0;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Main entry point to scan a document
// ---------------------------------------------------------------------------
void SGXMLScanner::scanDocument(const InputSource& src)
{
// Bump up the sequence id for this parser instance. This will invalidate
// any previous progressive scan tokens.
fSequenceId++;
try
{
// Reset the scanner and its plugged in stuff for a new run. This
// resets all the data structures, creates the initial reader and
// pushes it on the stack, and sets up the base document path.
scanReset(src);
// If we have a document handler, then call the start document
if (fDocHandler)
fDocHandler->startDocument();
// Scan the prolog part, which is everything before the root element
// including the DTD subsets.
scanProlog();
// If we got to the end of input, then its not a valid XML file.
// Else, go on to scan the content.
if (fReaderMgr.atEOF())
{
emitError(XMLErrs::EmptyMainEntity);
}
else
{
// Scan content, and tell it its not an external entity
if (scanContent(false))
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
if (!fReaderMgr.atEOF())
scanMiscellaneous();
}
}
// If we have a document handler, then call the end document
if (fDocHandler)
fDocHandler->endDocument();
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
}
bool SGXMLScanner::scanNext(XMLPScanToken& token)
{
// Make sure this token is still legal
if (!isLegalToken(token))
ThrowXML(RuntimeException, XMLExcepts::Scan_BadPScanToken);
// Find the next token and remember the reader id
unsigned int orgReader;
XMLTokens curToken;
bool retVal = true;
try
{
while (true)
{
// We have to handle any end of entity exceptions that happen here.
// We could be at the end of X nested entities, each of which will
// generate an end of entity exception as we try to move forward.
try
{
curToken = senseNextToken(orgReader);
break;
}
catch(const EndOfEntityException& toCatch)
{
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
}
}
if (curToken == Token_CharData)
{
scanCharData(fCDataBuf);
}
else if (curToken == Token_EOF)
{
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
retVal = false;
}
else
{
// Its some sort of markup
bool gotData = true;
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// If we hit the end, then do the miscellaneous part
if (!gotData)
{
// Do post-parse validation if required
if (fValidate)
{
// We handle ID reference semantics at this level since
// its required by XML 1.0.
checkIDRefs();
// Then allow the validator to do any extra stuff it wants
// fValidator->postParseValidation();
}
// That went ok, so scan for any miscellaneous stuff
scanMiscellaneous();
if (fValidate)
fValueStoreCache->endDocument();
if (fDocHandler)
fDocHandler->endDocument();
}
}
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first failure' exception, so reset and return failure
fReaderMgr.reset();
return false;
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and reuturn failure
fReaderMgr.reset();
return false;
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::XMLException_Warning
, excToCatch.getType()
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Reset and rethrow user error
fReaderMgr.reset();
throw;
}
// Reset and return failure
fReaderMgr.reset();
return false;
}
catch(...)
{
// Reset and rethrow original error
fReaderMgr.reset();
throw;
}
// If we hit the end, then flush the reader manager
if (!retVal)
fReaderMgr.reset();
return retVal;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private scanning methods
// ---------------------------------------------------------------------------
// This method is called from scanStartTag() to handle the very raw initial
// scan of the attributes. It just fills in the passed collection with
// key/value pairs for each attribute. No processing is done on them at all.
unsigned int
SGXMLScanner::rawAttrScan(const XMLCh* const elemName
, RefVectorOf<KVStringPair>& toFill
, bool& isEmpty)
{
// Keep up with how many attributes we've seen so far, and how many
// elements are available in the vector. This way we can reuse old
// elements until we run out and then expand it.
unsigned int attCount = 0;
unsigned int curVecSize = toFill.size();
// Assume it is not empty
isEmpty = false;
// We loop until we either see a /> or >, handling key/value pairs util
// we get there. We place them in the passed vector, which we will expand
// as required to hold them.
while (true)
{
// Get the next character, which should be non-space
XMLCh nextCh = fReaderMgr.peekNextChar();
// If the next character is not a slash or closed angle bracket,
// then it must be whitespace, since whitespace is required
// between the end of the last attribute and the name of the next
// one.
//
if (attCount)
{
if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
{
if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
{
// Ok, skip by them and get another char
fReaderMgr.getNextChar();
fReaderMgr.skipPastSpaces();
nextCh = fReaderMgr.peekNextChar();
}
else
{
// Emit the error but keep on going
emitError(XMLErrs::ExpectedWhitespace);
}
}
}
// Ok, here we first check for any of the special case characters.
// If its not one, then we do the normal case processing, which
// assumes that we've hit an attribute value, Otherwise, we do all
// the special case checks.
if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
{
// Assume its going to be an attribute, so get a name from
// the input.
if (!fReaderMgr.getName(fAttNameBuf))
{
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.skipPastChar(chCloseAngle);
return attCount;
}
// And next must be an equal sign
if (!scanEq())
{
static const XMLCh tmpList[] =
{
chSingleQuote, chDoubleQuote, chCloseAngle
, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedEqSign);
// Try to sync back up by skipping forward until we either
// hit something meaningful.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
{
// Jump back to top for normal processing of these
continue;
}
else if ((chFound == chSingleQuote)
|| (chFound == chDoubleQuote)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through assuming that the value is to follow
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemName);
return attCount;
}
else
{
// Something went really wrong
return attCount;
}
}
// Next should be the quoted attribute value. We just do a simple
// and stupid scan of this value. The only thing we do here
// is to expand entity references.
if (!basicAttrValueScan(fAttNameBuf.getRawBuffer(), fAttValueBuf))
{
static const XMLCh tmpList[] =
{
chCloseAngle, chOpenAngle, chForwardSlash, chNull
};
emitError(XMLErrs::ExpectedAttrValue);
// It failed, so lets try to get synced back up. We skip
// forward until we find some whitespace or one of the
// chars in our list.
const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);
if ((chFound == chCloseAngle)
|| (chFound == chForwardSlash)
|| fReaderMgr.getCurrentReader()->isWhitespace(chFound))
{
// Just fall through and process this attribute, though
// the value will be "".
}
else if (chFound == chOpenAngle)
{
// Assume a malformed tag and that new one is starting
emitError(XMLErrs::UnterminatedStartTag, elemName);
return attCount;
}
else
{
// Something went really wrong
return attCount;
}
}
// Make sure that the name is basically well formed for namespace
// enabled rules. It either has no colons, or it has one which
// is neither the first or last char.
const int colonFirst = XMLString::indexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != -1)
{
const int colonLast = XMLString::lastIndexOf(fAttNameBuf.getRawBuffer(), chColon);
if (colonFirst != colonLast)
{
emitError(XMLErrs::TooManyColonsInName);
continue;
}
else if ((colonFirst == 0)
|| (colonLast == (int)fAttNameBuf.getLen() - 1))
{
emitError(XMLErrs::InvalidColonPos);
continue;
}
}
// And now lets add it to the passed collection. If we have not
// filled it up yet, then we use the next element. Else we add
// a new one.
KVStringPair* curPair = 0;
if (attCount >= curVecSize)
{
curPair = new (fMemoryManager) KVStringPair
(
fAttNameBuf.getRawBuffer()
, fAttValueBuf.getRawBuffer()
, fMemoryManager
);
toFill.addElement(curPair);
}
else
{
curPair = toFill.elementAt(attCount);
curPair->set(fAttNameBuf.getRawBuffer(), fAttValueBuf.getRawBuffer());
}
// And bump the count of attributes we've gotten
attCount++;
// And go to the top again for another attribute
continue;
}
// It was some special case character so do all of the checks and
// deal with it.
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
if (nextCh == chForwardSlash)
{
fReaderMgr.getNextChar();
isEmpty = true;
if (!fReaderMgr.skippedChar(chCloseAngle))
emitError(XMLErrs::UnterminatedStartTag, elemName);
break;
}
else if (nextCh == chCloseAngle)
{
fReaderMgr.getNextChar();
break;
}
else if (nextCh == chOpenAngle)
{
// Check for this one specially, since its going to be common
// and it is kind of auto-recovering since we've already hit the
// next open bracket, which is what we would have seeked to (and
// skipped this whole tag.)
emitError(XMLErrs::UnterminatedStartTag, elemName);
break;
}
else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
{
// Check for this one specially, which is probably a missing
// attribute name, e.g. ="value". Just issue expected name
// error and eat the quoted string, then jump back to the
// top again.
emitError(XMLErrs::ExpectedAttrName);
fReaderMgr.getNextChar();
fReaderMgr.skipQuotedString(nextCh);
fReaderMgr.skipPastSpaces();
continue;
}
}
return attCount;
}
// This method will kick off the scanning of the primary content of the
// document, i.e. the elements.
bool SGXMLScanner::scanContent(const bool extEntity)
{
// Go into a loop until we hit the end of the root element, or we fall
// out because there is no root element.
//
// We have to do kind of a deeply nested double loop here in order to
// avoid doing the setup/teardown of the exception handler on each
// round. Doing it this way we only do it when an exception actually
// occurs.
bool gotData = true;
bool inMarkup = false;
while (gotData)
{
try
{
while (gotData)
{
// Sense what the next top level token is. According to what
// this tells us, we will call something to handle that kind
// of thing.
unsigned int orgReader;
const XMLTokens curToken = senseNextToken(orgReader);
// Handle character data and end of file specially. Char data
// is not markup so we don't want to handle it in the loop
// below.
if (curToken == Token_CharData)
{
// Scan the character data and call appropriate events. Let
// him use our local character data buffer for efficiency.
scanCharData(fCDataBuf);
continue;
}
else if (curToken == Token_EOF)
{
// The element stack better be empty at this point or we
// ended prematurely before all elements were closed.
if (!fElemStack.isEmpty())
{
const ElemStack::StackElem* topElem = fElemStack.popTop();
emitError
(
XMLErrs::EndedWithTagsOnStack
, topElem->fThisElement->getFullName()
);
}
// Its the end of file, so clear the got data flag
gotData = false;
continue;
}
// We are in some sort of markup now
inMarkup = true;
// According to the token we got, call the appropriate
// scanning method.
switch(curToken)
{
case Token_CData :
// Make sure we are within content
if (fElemStack.isEmpty())
emitError(XMLErrs::CDATAOutsideOfContent);
scanCDSection();
break;
case Token_Comment :
scanComment();
break;
case Token_EndTag :
scanEndTag(gotData);
break;
case Token_PI :
scanPI();
break;
case Token_StartTag :
scanStartTag(gotData);
break;
default :
fReaderMgr.skipToChar(chOpenAngle);
break;
}
if (orgReader != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialMarkupInEntity);
// And we are back out of markup again
inMarkup = false;
}
}
catch(const EndOfEntityException& toCatch)
{
// If we were in some markup when this happened, then its a
// partial markup error.
if (inMarkup)
emitError(XMLErrs::PartialMarkupInEntity);
// Send an end of entity reference event
if (fDocHandler)
fDocHandler->endEntityReference(toCatch.getEntity());
inMarkup = false;
}
}
// It went ok, so return success
return true;
}
void SGXMLScanner::scanEndTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the end of the root element.
gotData = true;
// Check if the element stack is empty. If so, then this is an unbalanced
// element (i.e. more ends than starts, perhaps because of bad text
// causing one to be skipped.)
if (fElemStack.isEmpty())
{
emitError(XMLErrs::MoreEndThanStartTags);
fReaderMgr.skipPastChar(chCloseAngle);
ThrowXML(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd);
}
// After the </ is the element QName, so get a name from the input
if (!fReaderMgr.getName(fQNameBuf))
{
// It failed so we can't really do anything with it
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipPastChar(chCloseAngle);
return;
}
int prefixColonPos = -1;
unsigned int uriId = resolveQName
(
fQNameBuf.getRawBuffer()
, fPrefixBuf
, ElemStack::Mode_Element
, prefixColonPos
);
// Pop the stack of the element we are supposed to be ending. Remember
// that we don't own this. The stack just keeps them and reuses them.
//
// NOTE: We CANNOT do this until we've resolved the element name because
// the element stack top contains the prefix to URI mappings for this
// element.
unsigned int topUri = fElemStack.getCurrentURI();
const ElemStack::StackElem* topElem = fElemStack.popTop();
// See if it was the root element, to avoid multiple calls below
const bool isRoot = fElemStack.isEmpty();
// Make sure that its the end of the element that we expect
XMLElementDecl* tempElement = topElem->fThisElement;
const XMLCh* rawNameBuf = fQNameBuf.getRawBuffer();
if ((topUri != uriId) ||
(!XMLString::equals(tempElement->getBaseName(), &rawNameBuf[prefixColonPos + 1])))
{
emitError
(
XMLErrs::ExpectedEndOfTagX
, topElem->fThisElement->getFullName()
);
}
// Make sure we are back on the same reader as where we started
if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
emitError(XMLErrs::PartialTagMarkupError);
// Skip optional whitespace
fReaderMgr.skipPastSpaces();
// Make sure we find the closing bracket
if (!fReaderMgr.skippedChar(chCloseAngle))
{
emitError
(
XMLErrs::UnterminatedEndTag
, topElem->fThisElement->getFullName()
);
}
// If validation is enabled, then lets pass him the list of children and
// this element and let him validate it.
if (fValidate)
{
int res = fValidator->checkContent
(
topElem->fThisElement
, topElem->fChildren
, topElem->fChildCount
);
if (res >= 0)
{
// One of the elements is not valid for the content. NOTE that
// if no children were provided but the content model requires
// them, it comes back with a zero value. But we cannot use that
// to index the child array in this case, and have to put out a
// special message.
if (!topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::EmptyNotValidForContent
, topElem->fThisElement->getFormattedContentModel()
);
}
else if ((unsigned int)res >= topElem->fChildCount)
{
fValidator->emitError
(
XMLValid::NotEnoughElemsForCM
, topElem->fThisElement->getFormattedContentModel()
);
}
else
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, topElem->fChildren[res]->getRawName()
, topElem->fThisElement->getFormattedContentModel()
);
}
}
// call matchers and de-activate context
int oldCount = fMatcherStack->getMatcherCount();
if (oldCount ||
((SchemaElementDecl*)topElem->fThisElement)->getIdentityConstraintCount()) {
for (int i = oldCount - 1; i >= 0; i--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(i);
matcher->endElement(*(topElem->fThisElement), fContent.getRawBuffer());
}
if (fMatcherStack->size() > 0) {
fMatcherStack->popContext();
}
// handle everything *but* keyref's.
int newCount = fMatcherStack->getMatcherCount();
for (int j = oldCount - 1; j >= newCount; j--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() != IdentityConstraint::KEYREF))
fValueStoreCache->transplant(ic, matcher->getInitialDepth());
}
// now handle keyref's...
for (int k = oldCount - 1; k >= newCount; k--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(k);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() == IdentityConstraint::KEYREF)) {
ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth());
if (values) { // nothing to do if nothing matched!
values->endDcocumentFragment(fValueStoreCache);
}
}
}
fValueStoreCache->endElement();
}
}
if(!isRoot)
((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromElement(topElem->fThisElement, fGrammarType);
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*topElem->fThisElement
, uriId
, isRoot
, fPrefixBuf.getRawBuffer()
);
}
// reset xsi:type ComplexTypeInfo
((SchemaElementDecl*)topElem->fThisElement)->reset();
if (!isRoot)
((SchemaElementDecl*)(fElemStack.topElement()->fThisElement))->
setXsiComplexTypeInfo(((SchemaValidator*)fValidator)->getCurrentTypeInfo());
// If this was the root, then done with content
gotData = !isRoot;
if (gotData) {
// Restore the grammar
fGrammar = fElemStack.getCurrentGrammar();
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
// This method handles the high level logic of scanning the DOCType
// declaration. This calls the DTDScanner and kicks off both the scanning of
// the internal subset and the scanning of the external subset, if any.
//
// When we get here the '<!DOCTYPE' part has already been scanned, which is
// what told us that we had a doc type decl to parse.
void SGXMLScanner::scanDocTypeDecl()
{
// Just skips over it
// REVISIT: Should we issue a warning
static const XMLCh doctypeIE[] =
{
chOpenSquare, chCloseAngle, chNull
};
XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);
if (nextCh == chOpenSquare)
fReaderMgr.skipPastChar(chCloseSquare);
fReaderMgr.skipPastChar(chCloseAngle);
}
// This method is called to scan a start tag when we are processing
// namespaces. This method is called after we've scanned the < of a
// start tag. So we have to get the element name, then scan the attributes,
// after which we are either going to see >, />, or attributes followed
// by one of those sequences.
bool SGXMLScanner::scanStartTag(bool& gotData)
{
// Assume we will still have data until proven otherwise. It will only
// ever be false if this is the root and its empty.
gotData = true;
// Reset element content
fContent.reset();
// The current position is after the open bracket, so we need to read in
// in the element name.
if (!fReaderMgr.getName(fQNameBuf))
{
emitError(XMLErrs::ExpectedElementName);
fReaderMgr.skipToChar(chOpenAngle);
return false;
}
// See if its the root element
const bool isRoot = fElemStack.isEmpty();
// Skip any whitespace after the name
fReaderMgr.skipPastSpaces();
// First we have to do the rawest attribute scan. We don't do any
// normalization of them at all, since we don't know yet what type they
// might be (since we need the element decl in order to do that.)
bool isEmpty;
unsigned int attCount = rawAttrScan
(
fQNameBuf.getRawBuffer()
, *fRawAttrList
, isEmpty
);
const bool gotAttrs = (attCount != 0);
// save the contentleafname and currentscope before addlevel, for later use
ContentLeafNameTypeVector* cv = 0;
XMLContentModel* cm = 0;
int currentScope = Grammar::TOP_LEVEL_SCOPE;
bool laxThisOne = false;
if (!isRoot) {
SchemaElementDecl* tempElement = (SchemaElementDecl*) fElemStack.topElement()->fThisElement;
SchemaElementDecl::ModelTypes modelType = tempElement->getModelType();
if ((modelType == SchemaElementDecl::Mixed_Simple)
|| (modelType == SchemaElementDecl::Mixed_Complex)
|| (modelType == SchemaElementDecl::Children))
{
cm = tempElement->getContentModel();
cv = cm->getContentLeafNameTypeVector();
currentScope = fElemStack.getCurrentScope();
}
else if (modelType == SchemaElementDecl::Any) {
laxThisOne = true;
}
}
// Now, since we might have to update the namespace map for this element,
// but we don't have the element decl yet, we just tell the element stack
// to expand up to get ready.
unsigned int elemDepth = fElemStack.addLevel();
fElemStack.setValidationFlag(fValidate);
// Check if there is any external schema location specified, and if we are at root,
// go through them first before scanning those specified in the instance document
if (isRoot
&& (fExternalSchemaLocation || fExternalNoNamespaceSchemaLocation)) {
if (fExternalSchemaLocation)
parseSchemaLocation(fExternalSchemaLocation);
if (fExternalNoNamespaceSchemaLocation)
resolveSchemaGrammar(fExternalNoNamespaceSchemaLocation, XMLUni::fgZeroLenString);
}
// Make an initial pass through the list and find any xmlns attributes or
// schema attributes.
if (attCount)
scanRawAttrListforNameSpaces(fRawAttrList, attCount);
// Resolve the qualified name to a URI and name so that we can look up
// the element decl for this element. We have now update the prefix to
// namespace map so we should get the correct element now.
int prefixColonPos = -1;
const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
unsigned int uriId = resolveQName
(
qnameRawBuf
, fPrefixBuf
, ElemStack::Mode_Element
, prefixColonPos
);
//if schema, check if we should lax or skip the validation of this element
bool parentValidation = fValidate;
if (cv) {
QName element(fPrefixBuf.getRawBuffer(), &qnameRawBuf[prefixColonPos + 1], uriId, fMemoryManager);
// elementDepth will be > 0, as cv is only constructed if element is not
// root.
laxThisOne = laxElementValidation(&element, cv, cm, elemDepth - 1);
}
// Look up the element now in the grammar. This will get us back a
// generic element decl object. We tell him to fault one in if he does
// not find it.
XMLElementDecl* elemDecl = 0;
bool wasAdded = false;
bool errorBeforeElementFound = false;
bool laxBeforeElementFound = false;
const XMLCh* nameRawBuf = &qnameRawBuf[prefixColonPos + 1];
const XMLCh* original_uriStr = fGrammar->getTargetNamespace();
unsigned orgGrammarUri = fURIStringPool->getId(original_uriStr);
if (uriId != fEmptyNamespaceId) {
// Check in current grammar before switching if necessary
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
if (!elemDecl && (orgGrammarUri != uriId)) {
// not found, switch to the specified grammar
const XMLCh* uriStr = getURIText(uriId);
bool errorCondition = !switchGrammar(uriStr) && fValidate;
if (errorCondition && !laxThisOne)
{
fValidator->emitError
(
XMLValid::GrammarNotFound
,uriStr
);
errorBeforeElementFound = true;
}
else if(errorCondition)
laxBeforeElementFound = true;
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
}
if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
// if not found, then it may be a reference, try TOP_LEVEL_SCOPE
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, Grammar::TOP_LEVEL_SCOPE
);
if(!elemDecl) {
// still not found in specified uri
// try emptyNamesapce see if element should be un-qualified.
elemDecl = fGrammar->getElemDecl
(
fEmptyNamespaceId
, nameRawBuf
, qnameRawBuf
, currentScope
);
bool errorCondition = elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn;
if (errorCondition && fValidate) {
fValidator->emitError
(
XMLValid::ElementNotUnQualified
, elemDecl->getFullName()
);
errorBeforeElementFound = true;
}
else if(errorCondition)
laxBeforeElementFound = true;
}
}
if (!elemDecl) {
// still not found, fault this in and issue error later
// switch back to original grammar first
switchGrammar(original_uriStr);
elemDecl = fGrammar->putElemDecl(uriId
, nameRawBuf
, fPrefixBuf.getRawBuffer()
, qnameRawBuf
, currentScope
, true);
wasAdded = true;
}
}
else if (!elemDecl)
{
//the element has no prefix,
//thus it is either a non-qualified element defined in current targetNS
//or an element that is defined in the globalNS
//try unqualifed first
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
//not found, switch grammar and try globalNS
bool errorCondition = !switchGrammar(XMLUni::fgZeroLenString) && fValidate;
if (errorCondition && !laxThisOne)
{
fValidator->emitError
(
XMLValid::GrammarNotFound
, XMLUni::fgZeroLenString
);
errorBeforeElementFound = true;
}
else if(errorCondition)
laxBeforeElementFound = true;
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, currentScope
);
}
if (!elemDecl && currentScope != Grammar::TOP_LEVEL_SCOPE) {
// if not found, then it may be a reference, try TOP_LEVEL_SCOPE
elemDecl = fGrammar->getElemDecl
(
uriId
, nameRawBuf
, qnameRawBuf
, Grammar::TOP_LEVEL_SCOPE
);
if (!elemDecl && orgGrammarUri != fEmptyNamespaceId) {
// still Not found in specified uri
// go to original Grammar again to see if element needs to be fully qualified.
const XMLCh* uriStr = getURIText(orgGrammarUri);
bool errorCondition = !switchGrammar(original_uriStr) && fValidate;
if (errorCondition && !laxThisOne)
{
fValidator->emitError
(
XMLValid::GrammarNotFound
,original_uriStr
);
errorBeforeElementFound = true;
}
else if(errorCondition)
laxBeforeElementFound = true;
elemDecl = fGrammar->getElemDecl
(
orgGrammarUri
, nameRawBuf
, qnameRawBuf
, currentScope
);
if (elemDecl && elemDecl->getCreateReason() != XMLElementDecl::JustFaultIn && fValidate) {
fValidator->emitError
(
XMLValid::ElementNotQualified
, elemDecl->getFullName()
);
errorBeforeElementFound = true;
}
}
}
if (!elemDecl) {
// still not found, fault this in and issue error later
// switch back to original grammar first
switchGrammar(original_uriStr);
elemDecl = fGrammar->putElemDecl(uriId
, nameRawBuf
, fPrefixBuf.getRawBuffer()
, qnameRawBuf
, currentScope
, true);
wasAdded = true;
}
}
// We do something different here according to whether we found the
// element or not.
if (wasAdded)
{
if (laxThisOne) {
fValidate = false;
fElemStack.setValidationFlag(fValidate);
}
else if(fValidate) {
((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
}
// If validating then emit an error
if (fValidate)
{
// This is to tell the reuse Validator that this element was
// faulted-in, was not an element in the grammar pool originally
elemDecl->setCreateReason(XMLElementDecl::JustFaultIn);
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
}
}
else
{
if(!laxBeforeElementFound) {
if (fValidate) {
((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::VALID);
}
}
// If its not marked declared and validating, then emit an error
if (!elemDecl->isDeclared()) {
if (laxThisOne) {
fValidate = false;
fElemStack.setValidationFlag(fValidate);
}
if (fValidate)
{
fValidator->emitError
(
XMLValid::ElementNotDefined
, elemDecl->getFullName()
);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::FULL);
}
}
((SchemaElementDecl*)elemDecl)->setXsiComplexTypeInfo(0);
((SchemaElementDecl*)elemDecl)->setXsiSimpleTypeInfo(0);
}
if(errorBeforeElementFound) {
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
}
// Now we can update the element stack to set the current element
// decl. We expanded the stack above, but couldn't store the element
// decl because we didn't know it yet.
fElemStack.setElement(elemDecl, fReaderMgr.getCurrentReaderNum());
fElemStack.setCurrentURI(uriId);
if (isRoot)
fRootGrammar = fGrammar;
// Validate the element
if (fValidate)
fValidator->validateElement(elemDecl);
ComplexTypeInfo* typeinfo = ((SchemaElementDecl*)elemDecl)->getComplexTypeInfo();
if (typeinfo) {
currentScope = typeinfo->getScopeDefined();
// switch grammar if the typeinfo has a different grammar (happens when there is xsi:type)
XMLCh* typeName = typeinfo->getTypeName();
const XMLCh poundStr[] = {chPound, chNull};
if (!XMLString::startsWith(typeName, poundStr)) {
const int comma = XMLString::indexOf(typeName, chComma);
if (comma > 0) {
XMLBuffer prefixBuf(comma+1, fMemoryManager);
prefixBuf.append(typeName, comma);
const XMLCh* uriStr = prefixBuf.getRawBuffer();
bool errorCondition = !switchGrammar(uriStr) && fValidate;
if (errorCondition && !laxThisOne)
{
fValidator->emitError
(
XMLValid::GrammarNotFound
, prefixBuf.getRawBuffer()
);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
}
else if(errorCondition) {
((SchemaElementDecl *)(elemDecl))->setValidationAttempted(PSVIDefs::NONE);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::UNKNOWN);
}
}
}
}
fElemStack.setCurrentScope(currentScope);
// Set element next state
if (elemDepth >= fElemStateSize) {
resizeElemState();
}
fElemState[elemDepth] = 0;
fElemStack.setCurrentGrammar(fGrammar);
// If this is the first element and we are validating, check the root
// element.
if (isRoot)
{
if (fValidate)
{
// Some validators may also want to check the root, call the
// XMLValidator::checkRootElement
if (fValidatorFromUser && !fValidator->checkRootElement(elemDecl->getId())) {
fValidator->emitError(XMLValid::RootElemNotLikeDocType);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
}
}
}
else if (parentValidation)
{
// If the element stack is not empty, then add this element as a
// child of the previous top element. If its empty, this is the root
// elem and is not the child of anything.
fElemStack.addChild(elemDecl->getElementName(), true);
}
// Now lets get the fAttrList filled in. This involves faulting in any
// defaulted and fixed attributes and normalizing the values of any that
// we got explicitly.
//
// We update the attCount value with the total number of attributes, but
// it goes in with the number of values we got during the raw scan of
// explictly provided attrs above.
attCount = buildAttList(*fRawAttrList, attCount, elemDecl, *fAttrList);
// activate identity constraints
if (fValidate) {
unsigned int count = ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount();
if (count || fMatcherStack->getMatcherCount()) {
fValueStoreCache->startElement();
fMatcherStack->pushContext();
fValueStoreCache->initValueStoresFor((SchemaElementDecl*) elemDecl, (int) elemDepth);
for (unsigned int i = 0; i < count; i++) {
activateSelectorFor(((SchemaElementDecl*) elemDecl)->getIdentityConstraintAt(i), (int) elemDepth);
}
// call all active identity constraints
count = fMatcherStack->getMatcherCount();
for (unsigned int j = 0; j < count; j++) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
matcher->startElement(*elemDecl, uriId, fPrefixBuf.getRawBuffer(), *fAttrList, attCount);
}
}
}
// Since the element may have default values, call start tag now regardless if it is empty or not
// If we have a document handler, then tell it about this start tag
if (fDocHandler)
{
fDocHandler->startElement
(
*elemDecl
, uriId
, fPrefixBuf.getRawBuffer()
, *fAttrList
, attCount
, false
, isRoot
);
}
// If empty, validate content right now if we are validating and then
// pop the element stack top. Else, we have to update the current stack
// top's namespace mapping elements.
if (isEmpty)
{
// Pop the element stack back off since it'll never be used now
fElemStack.popTop();
// If validating, then insure that its legal to have no content
if (fValidate)
{
const int res = fValidator->checkContent(elemDecl, 0, 0);
if (res >= 0)
{
fValidator->emitError
(
XMLValid::ElementNotValidForContent
, elemDecl->getFullName()
, elemDecl->getFormattedContentModel()
);
((SchemaElementDecl *)(elemDecl))->setValidity(PSVIDefs::INVALID);
}
// call matchers and de-activate context
int oldCount = fMatcherStack->getMatcherCount();
if (oldCount || ((SchemaElementDecl*) elemDecl)->getIdentityConstraintCount()) {
for (int i = oldCount - 1; i >= 0; i--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(i);
matcher->endElement(*elemDecl, fContent.getRawBuffer());
}
if (fMatcherStack->size() > 0) {
fMatcherStack->popContext();
}
// handle everything *but* keyref's.
int newCount = fMatcherStack->getMatcherCount();
for (int j = oldCount - 1; j >= newCount; j--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(j);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() != IdentityConstraint::KEYREF))
fValueStoreCache->transplant(ic, matcher->getInitialDepth());
}
// now handle keyref's...
for (int k = oldCount - 1; k >= newCount; k--) {
XPathMatcher* matcher = fMatcherStack->getMatcherAt(k);
IdentityConstraint* ic = matcher->getIdentityConstraint();
if (ic && (ic->getType() == IdentityConstraint::KEYREF)) {
ValueStore* values = fValueStoreCache->getValueStoreFor(ic, matcher->getInitialDepth());
if (values) { // nothing to do if nothing matched!
values->endDcocumentFragment(fValueStoreCache);
}
}
}
fValueStoreCache->endElement();
}
}
if(!isRoot)
((SchemaElementDecl *)fElemStack.topElement()->fThisElement)->updateValidityFromElement(elemDecl, fGrammarType);
// If we have a doc handler, tell it about the end tag
if (fDocHandler)
{
fDocHandler->endElement
(
*elemDecl
, uriId
, isRoot
, fPrefixBuf.getRawBuffer()
);
}
// reset xsi:type ComplexTypeInfo
((SchemaElementDecl*)elemDecl)->reset();
if (!isRoot)
((SchemaElementDecl*)(fElemStack.topElement()->fThisElement))->
setXsiComplexTypeInfo(((SchemaValidator*)fValidator)->getCurrentTypeInfo());
// If the elem stack is empty, then it was an empty root
if (isRoot)
gotData = false;
else
{
// Restore the grammar
fGrammar = fElemStack.getCurrentGrammar();
fGrammarType = fGrammar->getGrammarType();
fValidator->setGrammar(fGrammar);
// Restore the validation flag
fValidate = fElemStack.getValidationFlag();
}
}
return true;
}
unsigned int
SGXMLScanner::resolveQName(const XMLCh* const qName
, XMLBuffer& prefixBuf
, const short mode
, int& prefixColonPos)
{
// Lets split out the qName into a URI and name buffer first. The URI
// can be empty.
prefixColonPos = XMLString::indexOf(qName, chColon);
if (prefixColonPos == -1)
{
// Its all name with no prefix, so put the whole thing into the name
// buffer. Then map the empty string to a URI, since the empty string
// represents the default namespace. This will either return some
// explicit URI which the default namespace is mapped to, or the
// the default global namespace.
bool unknown = false;
prefixBuf.reset();
return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
}
else
{
// Copy the chars up to but not including the colon into the prefix
// buffer.
prefixBuf.set(qName, prefixColonPos);
// Watch for the special namespace prefixes. We always map these to
// special URIs. 'xml' gets mapped to the official URI that its defined
// to map to by the NS spec. xmlns gets mapped to a special place holder
// URI that we define (so that it maps to something checkable.)
const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {
// if this is an element, it is an error to have xmlns as prefix
if (mode == ElemStack::Mode_Element)
emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);
return fXMLNSNamespaceId;
}
else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
return fXMLNamespaceId;
}
else
{
bool unknown = false;
unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);
if (unknown)
emitError(XMLErrs::UnknownPrefix, prefixRawBuf);
return uriId;
}
}
}
// ---------------------------------------------------------------------------
// SGXMLScanner: IC activation methos
// ---------------------------------------------------------------------------
void SGXMLScanner::activateSelectorFor(IdentityConstraint* const ic, const int initialDepth) {
IC_Selector* selector = ic->getSelector();
if (!selector)
return;
XPathMatcher* matcher = selector->createMatcher
(
fFieldActivator
, initialDepth
, fMemoryManager
);
fMatcherStack->addMatcher(matcher);
matcher->startDocumentFragment();
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Grammar preparsing
// ---------------------------------------------------------------------------
Grammar* SGXMLScanner::loadGrammar(const InputSource& src
, const short grammarType
, const bool toCache)
{
Grammar* loadedGrammar = 0;
try
{
fGrammarResolver->cacheGrammarFromParse(false);
fGrammarResolver->useCachedGrammarInParse(false);
fRootGrammar = 0;
if (fValScheme == Val_Auto) {
fValidate = true;
}
// Reset some status flags
fInException = false;
fStandalone = false;
fErrorCount = 0;
fHasNoDTD = true;
fSeeXsi = false;
if (grammarType == Grammar::SchemaGrammarType) {
loadedGrammar = loadXMLSchemaGrammar(src, toCache);
}
// Reset the reader manager to close all files, sockets, etc...
fReaderMgr.reset();
}
// NOTE:
//
// In all of the error processing below, the emitError() call MUST come
// before the flush of the reader mgr, or it will fail because it tries
// to find out the position in the XML source of the error.
catch(const XMLErrs::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLValid::Codes)
{
// This is a 'first fatal error' type exit, so reset and fall through
fReaderMgr.reset();
}
catch(const XMLException& excToCatch)
{
// Emit the error and catch any user exception thrown from here. Make
// sure in all cases we flush the reader manager.
fInException = true;
try
{
if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
emitError
(
XMLErrs::DisplayErrorMessage
, excToCatch.getMessage()
);
else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
emitError
(
XMLErrs::XMLException_Fatal
, excToCatch.getType()
, excToCatch.getMessage()
);
else
emitError
(
XMLErrs::XMLException_Error
, excToCatch.getType()
, excToCatch.getMessage()
);
}
catch(...)
{
// Flush the reader manager and rethrow user's error
fReaderMgr.reset();
throw;
}
// If it returned, then reset the reader manager and fall through
fReaderMgr.reset();
}
catch(...)
{
// Reset and rethrow
fReaderMgr.reset();
throw;
}
return loadedGrammar;
}
// ---------------------------------------------------------------------------
// SGXMLScanner: Private helper methods
// ---------------------------------------------------------------------------
// This method handles the common initialization, to avoid having to do
// it redundantly in multiple constructors.
void SGXMLScanner::commonInit()
{
// Create the element state array
fElemState = (unsigned int*) fMemoryManager->allocate
(
fElemStateSize * sizeof(unsigned int)
); //new unsigned int[fElemStateSize];
// And we need one for the raw attribute scan. This just stores key/
// value string pairs (prior to any processing.)
fRawAttrList = new (fMemoryManager) RefVectorOf<KVStringPair>(32, true, fMemoryManager);
// Create dummy schema grammar
fSchemaGrammar = new (fMemoryManager) SchemaGrammar(fMemoryManager);
// Create the Validator and init them
fSchemaValidator = new (fMemoryManager) SchemaValidator(0, fMemoryManager);
initValidator(fSchemaValidator);
// Create IdentityConstraint info
fMatcherStack = new (fMemoryManager) XPathMatcherStack(fMemoryManager);
fValueStoreCache = new (fMemoryManager) ValueStoreCache(fMemoryManager);
fFieldActivator = new (fMemoryManager) FieldActivator(fValueStoreCache, fMatcherStack, fMemoryManager);
fValueStoreCache->setScanner(this);
// Add the default entity entries for the character refs that must always
// be present.
fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
}
void SGXMLScanner::cleanUp()
{
fMemoryManager->deallocate(fElemState); //delete [] fElemState;
delete fSchemaGrammar;
delete fEntityTable;
delete fRawAttrList;
delete fSchemaValidator;
delete fFieldActivator;
delete fMatcherStack;
delete fValueStoreCache;
}
void SGXMLScanner::resizeElemState() {
unsigned int newSize = fElemStateSize * 2;
unsigned int* newElemState = (unsigned int*) fMemoryManager->allocate
(
newSize * sizeof(unsigned int)
); //new unsigned int[newSize];
// Copy the existing values
unsigned int index = 0;
for (; index < fElemStateSize; index++)
newElemState[index] = fElemState[index];
for (; index < newSize; index++)
newElemState[index] = 0;
// Delete the old array and udpate our members
fMemoryManager->deallocate(fElemState); //delete [] fElemState;
fElemState = newElemState;
fElemStateSize = newSize;
}
// This method is called from scanStartTag() to build up the list of
// XMLAttr objects that will be passed out in the start tag callout. We
// get the key/value pairs from the raw scan of explicitly provided attrs,
// which have not been normalized. And we get the element declaration from
// which we will get any defaulted or fixed attribute defs and add those
// in as well.
unsigned int
SGXMLScanner::buildAttList(const RefVectorOf<KVStringPair>& providedAttrs
, const unsigned int attCount
, XMLElementDecl* elemDecl
, RefVectorOf<XMLAttr>& toFill)
{
// Ask the element to clear the 'provided' flag on all of the att defs
// that it owns, and to return us a boolean indicating whether it has
// any defs.
const bool hasDefs = elemDecl->resetDefs();
// If there are no expliclitily provided attributes and there are no
// defined attributes for the element, the we don't have anything to do.
// So just return zero in this case.
if (!hasDefs && !attCount)
return 0;
// Keep up with how many attrs we end up with total
unsigned int retCount = 0;
// And get the current size of the output vector. This lets us use
// existing elements until we fill it, then start adding new ones.
const unsigned int curAttListSize = toFill.size();
// We need a buffer into which raw scanned attribute values will be
// normalized.
XMLBufBid bbNormal(&fBufMgr);
XMLBuffer& normBuf = bbNormal.getBuffer();
// Loop through our explicitly provided attributes, which are in the raw
// scanned form, and build up XMLAttr objects.
unsigned int index;
for (index = 0; index < attCount; index++)
{
const KVStringPair* curPair = providedAttrs.elementAt(index);
// We have to split the name into its prefix and name parts. Then
// we map the prefix to its URI.
const XMLCh* const namePtr = curPair->getKey();
ArrayJanitor<XMLCh> janName(0);
// use a stack-based buffer when possible.
XMLCh tempBuffer[100];
const int colonInd = XMLString::indexOf(namePtr, chColon);
const XMLCh* prefPtr = XMLUni::fgZeroLenString;
const XMLCh* suffPtr = XMLUni::fgZeroLenString;
if (colonInd != -1)
{
// We have to split the string, so make a copy.
if (XMLString::stringLen(namePtr) < sizeof(tempBuffer) / sizeof(tempBuffer[0]))
{
XMLString::copyString(tempBuffer, namePtr);
tempBuffer[colonInd] = chNull;
prefPtr = tempBuffer;
}
else
{
janName.reset(XMLString::replicate(namePtr, fMemoryManager), fMemoryManager);
janName[colonInd] = chNull;
prefPtr = janName.get();
}
suffPtr = prefPtr + colonInd + 1;
}
else
{
// No colon, so we just have a name with no prefix
suffPtr = namePtr;
}
// Map the prefix to a URI id. We tell him that we are mapping an
// attr prefix, so any xmlns attrs at this level will not affect it.
const unsigned int uriId = resolvePrefix(prefPtr, ElemStack::Mode_Attribute);
// If the uri comes back as the xmlns or xml URI or its just a name
// and that name is 'xmlns', then we handle it specially. So set a
// boolean flag that lets us quickly below know which we are dealing
// with.
const bool isNSAttr = (uriId == fXMLNSNamespaceId)
|| (uriId == fXMLNamespaceId)
|| XMLString::equals(suffPtr, XMLUni::fgXMLNSString)
|| XMLString::equals(getURIText(uriId), SchemaSymbols::fgURI_XSI);
// If its not a special case namespace attr of some sort, then we
// do normal checking and processing.
XMLAttDef::AttTypes attType;
if (!isNSAttr)
{
// Some checking for attribute wild card first (for schema)
bool laxThisOne = false;
bool skipThisOne = false;
XMLAttDef* attDefForWildCard = 0;
XMLAttDef* attDef = 0;
if (fGrammarType == Grammar::SchemaGrammarType) {
//retrieve the att def
attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, uriId);
// if not found or faulted in - check for a matching wildcard attribute
// if no matching wildcard attribute, check (un)qualifed cases and flag
// appropriate errors
if (!attDef || (attDef->getCreateReason() == XMLAttDef::JustFaultIn)) {
SchemaAttDef* attWildCard = ((SchemaElementDecl*)elemDecl)->getAttWildCard();
if (attWildCard) {
//if schema, see if we should lax or skip the validation of this attribute
if (anyAttributeValidation(attWildCard, uriId, skipThisOne, laxThisOne)) {
SchemaGrammar* sGrammar = (SchemaGrammar*) fGrammarResolver->getGrammar(getURIText(uriId));
if (sGrammar && sGrammar->getGrammarType() == Grammar::SchemaGrammarType) {
RefHashTableOf<XMLAttDef>* attRegistry = sGrammar->getAttributeDeclRegistry();
if (attRegistry) {
attDefForWildCard = attRegistry->get(suffPtr);
}
}
}
}
else {
// not found, see if the attDef should be qualified or not
if (uriId == fEmptyNamespaceId) {
attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fURIStringPool->getId(fGrammar->getTargetNamespace()));
if (fValidate
&& attDef
&& attDef->getCreateReason() != XMLAttDef::JustFaultIn) {
// the attribute should be qualified
fValidator->emitError
(
XMLValid::AttributeNotQualified
, attDef->getFullName()
);
((SchemaAttDef *)(attDef))->setValidity(PSVIDefs::INVALID);
}
}
else {
attDef = ((SchemaElementDecl*)elemDecl)->getAttDef(suffPtr, fEmptyNamespaceId);
if (fValidate
&& attDef
&& attDef->getCreateReason() != XMLAttDef::JustFaultIn) {