词法分析

开发平台：

Visual C++

DTDScanner.cpp：源码内容

entityDecl->setIsParameter(isPEDecl);
//
// Space is legal (required actually) here so check for a PE ref. If
// we don't get our whitespace, then issue an error, but try to keep
// going.
//
if (!checkForPERef(true, false, true))
fScanner->emitError(XMLErrs::ExpectedWhitespace);
// save the hasNoDTD status for Entity Constraint Checking
bool hasNoDTD = fScanner->getHasNoDTD();
if (hasNoDTD && isPEDecl)
fScanner->setHasNoDTD(false);
// According to the type call the value scanning method
if (!scanEntityDef(*entityDecl, isPEDecl))
{
fReaderMgr->skipPastChar(chCloseAngle);
fScanner->setHasNoDTD(true);
fScanner->emitError(XMLErrs::ExpectedEntityValue);
return;
}
if (hasNoDTD)
fScanner->setHasNoDTD(true);
// Space is legal (but not required) here so check for a PE ref
checkForPERef(false, false, true);
// And then we have to have the closing angle bracket
if (!fReaderMgr->skippedChar(chCloseAngle))
{
fScanner->emitError(XMLErrs::UnterminatedEntityDecl, entityDecl->getName());
fReaderMgr->skipPastChar(chCloseAngle);
}
//
// If we have a doc type handler, then call it. But only call it for
// ignored elements if advanced callbacks are enabled.
//
if (fDocTypeHandler)
fDocTypeHandler->entityDecl(*entityDecl, isPEDecl, isIgnored);
}
//
// This method will scan a general/character entity ref. It will either
// expand a char ref and return the value directly, or it will expand
// a general entity and a reader for it onto the reader stack.
//
// The return value indicates whether the value was returned directly or
// pushed as a reader or it failed.
//
// The escaped flag tells the caller whether the returnd parameter resulted
// from a character reference, which escapes the character in some cases. It
// only makes any difference if the return indicates the value was returned
// directly.
//
// NOTE: This is only called when scanning attribute values, so we always
// expand general entities.
//
DTDScanner::EntityExpRes
DTDScanner::scanEntityRef(XMLCh& firstCh, XMLCh& secondCh, bool& escaped)
{
// Assume no escape and no second char
escaped = false;
secondCh = 0;
// We have to insure its all done in a single entity
const unsigned int curReader = fReaderMgr->getCurrentReaderNum();
//
// If the next char is a pound, then its a character reference and we
// need to expand it always.
//
if (fReaderMgr->skippedChar(chPound))
{
//
// Its a character reference, so scan it and get back the numeric
// value it represents. If it fails, just return immediately.
//
if (!scanCharRef(firstCh, secondCh))
return EntityExp_Failed;
if (curReader != fReaderMgr->getCurrentReaderNum())
fScanner->emitError(XMLErrs::PartialMarkupInEntity);
// Its now escaped since it was a char ref
escaped = true;
return EntityExp_Returned;
}
// Get the name of the general entity
XMLBufBid bbName(fBufMgr);
if (!fReaderMgr->getName(bbName.getBuffer()))
{
fScanner->emitError(XMLErrs::ExpectedEntityRefName);
return EntityExp_Failed;
}
//
// Next char must be a semi-colon. But if its not, just emit
// an error and try to continue.
//
if (!fReaderMgr->skippedChar(chSemiColon))
fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());
// Make sure it was all in one entity reader
if (curReader != fReaderMgr->getCurrentReaderNum())
fScanner->emitError(XMLErrs::PartialMarkupInEntity);
// Look it up the name the general entity pool
XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());
// If it does not exist, then obviously an error
if (!decl)
{
// XML 1.0 Section 4.1
if (fScanner->getStandalone() || fScanner->getHasNoDTD()) {
fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
}
else {
if (fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
}
return EntityExp_Failed;
}
//
// XML 1.0 Section 4.1
// If we are a standalone document, then it has to have been declared
// in the internal subset.
//
if (fScanner->getStandalone() && !decl->getDeclaredInIntSubset())
fScanner->emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());
//
// If its a special char reference, then its escaped and we can return
// it directly.
//
if (decl->getIsSpecialChar())
{
firstCh = decl->getValue()[0];
escaped = true;
return EntityExp_Returned;
}
if (decl->isExternal())
{
// If its unparsed, then its not valid here
// XML 1.0 Section 4.4.4 the appearance of a reference to an unparsed entity is forbidden.
if (decl->isUnparsed())
{
fScanner->emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
return EntityExp_Failed;
}
// We are in an attribute value, so not valid.
// XML 1.0 Section 4.4.4 a reference to an external entity in an attribute value is forbidden.
fScanner->emitError(XMLErrs::NoExtRefsInAttValue);
// And now create a reader to read this entity
InputSource* srcUsed;
XMLReader* reader = fReaderMgr->createReader
(
decl->getBaseURI()
, decl->getSystemId()
, decl->getPublicId()
, false
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, XMLReader::Source_External
, srcUsed
);
// Put a janitor on the source so it gets cleaned up on exit
Janitor<InputSource> janSrc(srcUsed);
//
// If the creation failed then throw an exception
//
if (!reader)
ThrowXML1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed->getSystemId());
//
// Push the reader. If its a recursive expansion, then emit an error
// and return an failure.
//
if (!fReaderMgr->pushReader(reader, decl))
{
fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
return EntityExp_Failed;
}
// If it starts with the XML string, then parse a text decl
if (fScanner->checkXMLDecl(true))
scanTextDecl();
}
else
{
//
// Create a reader over a memory stream over the entity value
// We force it to assume UTF-16 by passing in an encoding
// string. This way it won't both trying to predecode the
// first line, looking for an XML/TextDecl.
//
XMLReader* valueReader = fReaderMgr->createIntEntReader
(
decl->getName()
, XMLReader::RefFrom_NonLiteral
, XMLReader::Type_General
, decl->getValue()
, decl->getValueLen()
, false
);
//
// Trt to push the entity reader onto the reader manager stack,
// where it will become the subsequent input. If it fails, that
// means the entity is recursive, so issue an error. The reader
// will have just been discarded, but we just keep going.
//
if (!fReaderMgr->pushReader(valueReader, decl))
fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
}
return EntityExp_Pushed;
}
//
// This method will scan a quoted literal of an entity value. It has to
// deal with replacement of PE references; however, since this is a DTD
// scanner, all such entity literals are in entity decls and therefore
// general entities are not expanded.
//
bool DTDScanner::scanEntityLiteral(XMLBuffer& toFill, const bool isPE)
{
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr->skipIfQuote(quoteCh))
return false;
// Get a buffer for pulling in entity names when we see GE refs
XMLBufBid bbName(fBufMgr);
XMLBuffer& nameBuf = bbName.getBuffer();
// Remember the current reader
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
//
// Loop until we see the ending quote character, handling any references
// in the process.
//
XMLCh nextCh;
XMLCh secondCh = 0;
bool gotLeadingSurrogate = false;
while (true)
{
nextCh = fReaderMgr->getNextChar();
//
// Watch specifically for EOF and issue a more meaningful error
// if that occurs (since an unterminated quoted char can cause
// this easily.)
//
if (!nextCh)
{
fScanner->emitError(XMLErrs::UnterminatedEntityLiteral);
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
}
//
// Break out on our terminating quote char when we are back in the
// same reader. Otherwise, we might trigger on a nested quote char
// in an expanded entity.
//
if ((nextCh == quoteCh)
&& (fReaderMgr->getCurrentReaderNum() == orgReader))
{
break;
}
if (nextCh == chPercent)
{
//
// Put the PE's value on the reader stack and then jump back
// to the top to start processing it. The parameter indicates
// that it should not scan the reference's content as an external
// subset.
//
expandPERef(false, true, true);
continue;
}
//
// Ok, now that all the other special stuff is checked, we can
// look for a general entity. In here, we cannot have a naked &
// and will only expand numerical char refs or the intrinsic char
// refs. Others will be left alone.
//
if (nextCh == chAmpersand)
{
//
// Here, we only expand numeric char refs, but not any general
// entities. However, the stupid XML spec requires that we check
// and make sure it does refer to a general entity if its not
// a char ref (i.e. no naked '&' chars.)
//
if (fReaderMgr->skippedChar(chPound))
{
// If it failed, then just jump back to the top and try to pick up
if (!scanCharRef(nextCh, secondCh))
{
gotLeadingSurrogate = false;
continue;
}
}
else
{
if (!fReaderMgr->getName(nameBuf))
{
fScanner->emitError(XMLErrs::ExpectedEntityRefName);
}
else
{
//
// Since we are not expanding any of this, we have to
// put the amp and name into the target buffer as data.
//
toFill.append(chAmpersand);
toFill.append(nameBuf.getRawBuffer());
// Make sure we skipped a trailing semicolon
if (!fReaderMgr->skippedChar(chSemiColon))
{
fScanner->emitError
(
XMLErrs::UnterminatedEntityRef
, nameBuf.getRawBuffer()
);
}
// And make the new character the semicolon
nextCh = chSemiColon;
}
// Either way here we reset the surrogate flag
gotLeadingSurrogate = false;
}
}
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
if (gotLeadingSurrogate)
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
if (gotLeadingSurrogate)
{
if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
}
else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
fReaderMgr->skipPastChar(quoteCh);
return false;
}
gotLeadingSurrogate = false;
}
// Looks ok, so add it to the literal
toFill.append(nextCh);
if (secondCh)
toFill.append(secondCh);
}
//
// If we got here and did not get back to the original reader level,
// then we propogated some entity out of the literal, so issue an
// error, but don't fail.
//
if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
return true;
}
//
// This method is called after the entity name has been scanned, and any
// PE referenced following the name is handled. The passed decl will be
// filled in with the info scanned.
//
bool DTDScanner::scanEntityDef(DTDEntityDecl& decl, const bool isPEDecl)
{
// Its got to be an entity literal
if (fReaderMgr->lookingAtChar(chSingleQuote)
|| fReaderMgr->lookingAtChar(chDoubleQuote))
{
// Get a buffer for the literal
XMLBufBid bbValue(fBufMgr);
if (!scanEntityLiteral(bbValue.getBuffer(), isPEDecl))
return false;
// Set it on the entity decl
decl.setValue(bbValue.getRawBuffer());
return true;
}
//
// Its got to be an external entity, so there must be an external id.
// Get buffers for them and scan an external id into them.
//
XMLBufBid bbPubId(fBufMgr);
XMLBufBid bbSysId(fBufMgr);
if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_External))
return false;
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr->getLastExtEntityInfo(lastInfo);
// Fill in the id fields of the decl with the info we got
const XMLCh* publicId = bbPubId.getRawBuffer();
const XMLCh* systemId = bbSysId.getRawBuffer();
decl.setPublicId((publicId && *publicId) ? publicId : 0);
decl.setSystemId((systemId && *systemId) ? systemId : 0);
decl.setBaseURI((lastInfo.systemId && *lastInfo.systemId) ? lastInfo.systemId : 0);
// If its a PE decl, we are done
bool gotSpaces = checkForPERef(false, false, true);
if (isPEDecl)
{
//
// Check for a common error here. NDATA is not allowed for PEs
// so check for the NDATA string. If found give a nice meaningful
// error and continue parsing to eat the NDATA text.
//
if (gotSpaces)
{
if (fReaderMgr->skippedString(XMLUni::fgNDATAString))
fScanner->emitError(XMLErrs::NDATANotValidForPE);
}
else
{
return true;
}
}
// If looking at close angle now, we are done
if (fReaderMgr->lookingAtChar(chCloseAngle))
return true;
// Else we had to have seem the whitespace
if (!gotSpaces)
fScanner->emitError(XMLErrs::ExpectedWhitespace);
// We now have to see a notation data string
if (!fReaderMgr->skippedString(XMLUni::fgNDATAString))
fScanner->emitError(XMLErrs::ExpectedNDATA);
// Space is required here, but try to go on if not
if (!checkForPERef(false, false, true))
fScanner->emitError(XMLErrs::ExpectedWhitespace);
// Get a name
XMLBufBid bbName(fBufMgr);
if (!fReaderMgr->getName(bbName.getBuffer()))
{
fScanner->emitError(XMLErrs::ExpectedNotationName);
return false;
}
// Set the decl's notation name
decl.setNotationName(bbName.getRawBuffer());
return true;
}
//
// This method is called after an attribute decl name or a notation decl has
// been scanned and then an opening parenthesis was see, indicating the list
// of values. It scans the enumeration values and creates a single string
// which has a single space between each value.
//
// The terminating close paren ends this scan.
//
bool DTDScanner::scanEnumeration( const DTDAttDef& attDef
, XMLBuffer& toFill
, const bool notation)
{
// Reset the passed buffer
toFill.reset();
// Check for PE ref but don't require space
checkForPERef(false, false, true);
// If this is a notation, we need an opening paren
if (notation)
{
if (!fReaderMgr->skippedChar(chOpenParen))
fScanner->emitError(XMLErrs::ExpectedOpenParen);
}
// We need a local buffer to use as well
XMLBufBid bbTmp(fBufMgr);
while (true)
{
// Space is allowed here for either type so check for PE ref
checkForPERef(false, false, true);
// And then get either a name or a name token
bool success;
if (notation)
success = fReaderMgr->getName(bbTmp.getBuffer());
else
success = fReaderMgr->getNameToken(bbTmp.getBuffer());
if (!success)
{
fScanner->emitError
(
XMLErrs::ExpectedEnumValue
, attDef.getFullName()
);
return false;
}
// Append this value to the target value
toFill.append(bbTmp.getRawBuffer(), bbTmp.getLen());
// Space is allowed here for either type so check for PE ref
checkForPERef(false, false, true);
// Check for the terminating paren
if (fReaderMgr->skippedChar(chCloseParen))
break;
// And append a space separator
toFill.append(chSpace);
// Check for the pipe character separator
if (!fReaderMgr->skippedChar(chPipe))
{
fScanner->emitError(XMLErrs::ExpectedEnumSepOrParen);
return false;
}
}
return true;
}
bool DTDScanner::scanEq()
{
fReaderMgr->skipPastSpaces();
if (fReaderMgr->skippedChar(chEqual))
{
fReaderMgr->skipPastSpaces();
return true;
}
return false;
}
//
// This method is called when an external entity reference is seen in the
// DTD or an external DTD subset is encountered, and their contents pushed
// onto the reader stack. This method will scan that contents.
//
void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect, const bool isDTD)
{
// Indicate we are in the external subset now
FlagJanitor<bool> janContentFlag(&fInternalSubset, false);
bool bAcceptDecl = !inIncludeSect;
// Get a buffer for whitespace
XMLBufBid bbSpace(fBufMgr);
//
// If we have a doc type handler and we are not being called recursively
// to handle an include section, tell it the ext subset starts
//
if (fDocTypeHandler && !inIncludeSect)
fDocTypeHandler->startExtSubset();
//
// We have to play a trick here if the current entity we are parsing
// is a PE. Because the spooling code will put out a whitespace before
// and after an expanded PE if its being scanned outside the context of
// a literal entity, this will confuse this external subset code.
//
// So, we see if that is what is happening and, if so, eat the single
// space, a check for the <?xml string. If we find it, we parse that
// markup right now and put the space back.
//
if (fReaderMgr->isScanningPERefOutOfLiteral())
{
if (fReaderMgr->skippedSpace())
{
if (fScanner->checkXMLDecl(true))
{
scanTextDecl();
bAcceptDecl = false;
// <TBD> Figure out how to do this
// fReaderMgr->unGet(chSpace);
}
}
}
// Get the current reader number
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
//
// Loop until we hit the end of the external subset entity. Note that
// we use a double loop here in order to avoid the overhead of doing
// the exception setup/teardown work on every loop.
//
bool inMarkup = false;
bool inCharData = false;
while (true)
{
try
{
while (true)
{
const XMLCh nextCh = fReaderMgr->peekNextChar();
if (nextCh == chOpenAngle)
{
// Get the reader we started this on
// XML 1.0 P28a Well-formedness constraint: PE Between Declarations
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);
//
// Now scan the markup. Set the flag so that we will know that
// we were in markup if an end of entity exception occurs.
//
fReaderMgr->getNextChar();
inMarkup = true;
scanMarkupDecl(bAcceptDecl);
inMarkup = false;
//
// And see if we got back to the same level. If not, then its
// a partial markup error.
//
if (fReaderMgr->getCurrentReaderNum() != orgReader){
if (wasInPE)
fScanner->emitError(XMLErrs::PEBetweenDecl);
else if (fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
}
}
else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
{
//
// If we have a doc type handler, and advanced callbacks are
// enabled, then gather up whitespace and call back. Otherwise
// just skip whitespaces.
//
if (fDocTypeHandler)
{
inCharData = true;
fReaderMgr->getSpaces(bbSpace.getBuffer());
inCharData = false;
fDocTypeHandler->doctypeWhitespace
(
bbSpace.getRawBuffer()
, bbSpace.getLen()
);
}
else
{
//
// If we hit an end of entity in the middle of white
// space, that's fine. We'll just come back in here
// again on the next round and skip some more.
//
fReaderMgr->skipPastSpaces();
}
}
else if (nextCh == chPercent)
{
//
// Expand (and scan if external) the reference value. Tell
// it to throw an end of entity exception at the end of the
// entity.
//
fReaderMgr->getNextChar();
expandPERef(true, false, false, true);
}
else if (inIncludeSect && (nextCh == chCloseSquare))
{
//
// Its the end of a conditional include section. So scan it and
// decrement the include depth counter.
//
fReaderMgr->getNextChar();
if (!fReaderMgr->skippedChar(chCloseSquare))
{
fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
fReaderMgr->skipPastChar(chCloseAngle);
}
else if (!fReaderMgr->skippedChar(chCloseAngle))
{
fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
fReaderMgr->skipPastChar(chCloseAngle);
}
return;
}
else if (!nextCh)
{
return; // nothing left
}
else
{
fReaderMgr->getNextChar();
if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
else
{
fScanner->emitError(XMLErrs::InvalidDocumentStructure);
}
// Try to get realigned
static const XMLCh toSkip[] =
{
chPercent, chCloseSquare, chOpenAngle, chNull
};
fReaderMgr->skipUntilInOrWS(toSkip);
}
bAcceptDecl = false;
}
}
catch(const EndOfEntityException& toCatch)
{
//
// If the external entity ended while we were in markup, then that's
// a partial markup error.
//
if (inMarkup)
{
fScanner->emitError(XMLErrs::PartialMarkupInEntity);
inMarkup = false;
}
// If we were in char data, then send what we got
if (inCharData)
{
// Send what we got, then rethrow
if (fDocTypeHandler)
{
fDocTypeHandler->doctypeWhitespace
(
bbSpace.getRawBuffer()
, bbSpace.getLen()
);
}
inCharData = false;
}
//
// If the entity that just ended was the entity that we started
// on, then this is the end of the external subset.
//
if (orgReader == toCatch.getReaderNum())
break;
}
}
// If we have a doc type handler, tell it the ext subset ends
if (fDocTypeHandler && isDTD)
fDocTypeHandler->endExtSubset();
}
//
// This method will scan for an id, either public or external.
//
//
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
// | 'PUBLIC' S PubidLiteral S SystemLiteral
// [83] PublicID ::= 'PUBLIC' S PubidLiteral
//
bool DTDScanner::scanId( XMLBuffer& pubIdToFill
, XMLBuffer& sysIdToFill
, const IDTypes whatKind)
{
// Clean out both return buffers
pubIdToFill.reset();
sysIdToFill.reset();
//
// Check first for the system id first. If we find it, and system id
// is one of the legal values, then lets try to scan it.
//
// 'SYSTEM' S SystemLiteral
if (fReaderMgr->skippedString(XMLUni::fgSysIDString))
{
// If they were looking for a public id, then we failed
if (whatKind == IDType_Public)
{
fScanner->emitError(XMLErrs::ExpectedPublicId);
return false;
}
// We must skip spaces
if (!fReaderMgr->skipPastSpaces())
{
fScanner->emitError(XMLErrs::ExpectedWhitespace);
return false;
}
// Get the system literal value
return scanSystemLiteral(sysIdToFill);
}
// Now scan for public id
// 'PUBLIC' S PubidLiteral S SystemLiteral
// or
// 'PUBLIC' S PubidLiteral
// If we don't have any public id string => Error
if (!fReaderMgr->skippedString(XMLUni::fgPubIDString)) {
fScanner->emitError(XMLErrs::ExpectedSystemOrPublicId);
return false;
}
//
// So following this we must have whitespace, a public literal, whitespace,
// and a system literal.
//
if (!fReaderMgr->skipPastSpaces())
{
fScanner->emitError(XMLErrs::ExpectedWhitespace);
//
// Just in case, if they just forgot the whitespace but the next char
// is a single or double quote, then keep going.
//
const XMLCh chPeek = fReaderMgr->peekNextChar();
if ((chPeek != chDoubleQuote) && (chPeek != chSingleQuote))
return false;
}
if (!scanPublicLiteral(pubIdToFill))
return false;
// If they wanted a public id, then this is all
if (whatKind == IDType_Public)
return true;
// check if there is any space follows
bool hasSpace = fReaderMgr->skipPastSpaces();
//
// In order to recover best here we need to see if
// the next thing is a quote or not
//
const XMLCh chPeek = fReaderMgr->peekNextChar();
const bool bIsQuote = ((chPeek == chDoubleQuote)
|| (chPeek == chSingleQuote));
if (!hasSpace)
{
if (whatKind == IDType_External)
{
//
// If its an external Id, then we need to see the system id.
// So, emit the error. But, if the next char is a quote, don't
// give up since its probably going to work. The user just
// missed the separating space. Otherwise, fail.
//
fScanner->emitError(XMLErrs::ExpectedWhitespace);
if (!bIsQuote)
return false;
}
else
{
//
// We can legally return here. But, if the next char is a quote,
// then that's probably not what was desired, since its probably
// just that space was forgotten and there really is a system
// id to follow.
//
// So treat it like missing whitespace if so and keep going.
// Else, just return success.
//
if (bIsQuote)
fScanner->emitError(XMLErrs::ExpectedWhitespace);
else
return true;
}
}
if (bIsQuote) {
// there is a quote coming, scan the system literal
if (!scanSystemLiteral(sysIdToFill))
return false;
}
else {
// no quote, if expecting exteral id, this is an error
if (whatKind == IDType_External)
fScanner->emitError(XMLErrs::ExpectedQuotedString);
}
return true;
}
//
// This method will scan the contents of an ignored section. It assumes that
// we already are in the body, i.e. we've seen <![IGNORE[ at this point. So
// we have to just scan until we see a matching ]]> closing markup.
//
void DTDScanner::scanIgnoredSection()
{
//
// Depth starts at one because we are already in one section and want
// to parse until we hit its end.
//
unsigned long depth = 1;
bool gotLeadingSurrogate = false;
while (true)
{
const XMLCh nextCh = fReaderMgr->getNextChar();
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
if (nextCh == chOpenAngle)
{
if (fReaderMgr->skippedChar(chBang)
&& fReaderMgr->skippedChar(chOpenSquare))
{
depth++;
}
}
else if (nextCh == chCloseSquare)
{
if (fReaderMgr->skippedChar(chCloseSquare))
{
while (fReaderMgr->skippedChar(chCloseSquare))
{
// Do nothing, just skip them
}
if (fReaderMgr->skippedChar(chCloseAngle))
{
depth--;
if (!depth)
break;
}
}
}
// Deal with surrogate pairs
else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
// Its a leading surrogate. If we already got one, then
// issue an error, else set leading flag to make sure that
// we look for a trailing next time.
if (gotLeadingSurrogate)
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
// If its a trailing surrogate, make sure that we are
// prepared for that. Else, its just a regular char so make
// sure that we were not expected a trailing surrogate.
if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
{
// Its trailing, so make sure we were expecting it
if (!gotLeadingSurrogate)
fScanner->emitError(XMLErrs::Unexpected2ndSurrogateChar);
}
else
{
// Its just a char, so make sure we were not expecting a
// trailing surrogate.
if (gotLeadingSurrogate)
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
// Its got to at least be a valid XML character
else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
}
gotLeadingSurrogate = false;
}
}
}
//
// This method scans the entire internal subset. All we can have here is
// decl markup, and PE references. The expanded PE references must contain
// whole markup, so we don't have to worry about their content at this
// level. We just scan them, expand them, push them, and parse their content
// right there, via the expandERef() method.
//
bool DTDScanner::scanInternalSubset()
{
// Indicate we are in the internal subset now
FlagJanitor<bool> janContentFlag(&fInternalSubset, true);
// If we have a doc type handler, tell it the internal subset starts
if (fDocTypeHandler)
fDocTypeHandler->startIntSubset();
// Get a buffer for whitespace
XMLBufBid bbSpace(fBufMgr);
bool noErrors = true;
while (true)
{
const XMLCh nextCh = fReaderMgr->peekNextChar();
//
// If we get an end of file marker, just unget it and return a
// failure status. The caller will then see the end of file and
// faill out correctly.
//
if (!nextCh)
return false;
// Watch for the end of internal subset marker
if (nextCh == chCloseSquare)
{
fReaderMgr->getNextChar();
break;
}
if (nextCh == chPercent)
{
//
// Expand (and scan if external) the reference value. Tell
// it to set the reader to cause an end of entity exception
// when this reader dies, which is what the scanExtSubset
// method wants (who is called to scan this.)
//
fReaderMgr->getNextChar();
expandPERef(true, false, false, true);
}
else if (nextCh == chOpenAngle)
{
// Remember this reader before we start the scan, for checking
// XML 1.0 P28a Well-formedness constraint: PE Between Declarations
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);
// And scan this markup
fReaderMgr->getNextChar();
scanMarkupDecl(false);
// If we did not get back to entry level, then partial markup
if (fReaderMgr->getCurrentReaderNum() != orgReader) {
if (wasInPE)
fScanner->emitError(XMLErrs::PEBetweenDecl);
else if (fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
}
}
else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
{
//
// IF we are doing advanced callbacks and have a doc type
// handler, then get the whitespace and call the doc type
// handler with it. Otherwise, just skip whitespace.
//
if (fDocTypeHandler)
{
fReaderMgr->getSpaces(bbSpace.getBuffer());
fDocTypeHandler->doctypeWhitespace
(
bbSpace.getRawBuffer()
, bbSpace.getLen()
);
}
else
{
fReaderMgr->skipPastSpaces();
}
}
else
{
// Not valid, so emit an error
XMLCh tmpBuf[9];
XMLString::binToText
(
fReaderMgr->getNextChar()
, tmpBuf
, 8
, 16
);
fScanner->emitError
(
XMLErrs::InvalidCharacterInIntSubset
, tmpBuf
);
//
// If an '>', then probably an abnormally terminated
// internal subset so just return.
//
if (nextCh == chCloseAngle)
{
noErrors = false;
break;
}
//
// Otherwise, try to sync back up by scanning forward for
// a reasonable start character.
//
static const XMLCh toSkip[] =
{
chPercent, chCloseSquare, chOpenAngle, chNull
};
fReaderMgr->skipUntilInOrWS(toSkip);
}
}
// If we have a doc type handler, tell it the internal subset ends
if (fDocTypeHandler)
fDocTypeHandler->endIntSubset();
return noErrors;
}
//
// This method is called once we see a < in the input of an int/ext subset,
// which indicates the start of some sort of markup.
//
void DTDScanner::scanMarkupDecl(const bool parseTextDecl)
{
//
// We only have two valid first characters here. One is a ! which opens
// some markup decl. The other is a ?, which could begin either a PI
// or a text decl. If parseTextDecl is false, we cannot accept a text
// decl.
//
const XMLCh nextCh = fReaderMgr->getNextChar();
if (nextCh == chBang)
{
if (fReaderMgr->skippedChar(chDash))
{
if (fReaderMgr->skippedChar(chDash))
{
scanComment();
}
else
{
fScanner->emitError(XMLErrs::CommentsMustStartWith);
fReaderMgr->skipPastChar(chCloseAngle);
}
}
else if (fReaderMgr->skippedChar(chOpenSquare))
{
//
// Its a conditional section. This is only valid in the external
// subset, so issue an error if we aren't there.
//
if (fInternalSubset)
{
fScanner->emitError(XMLErrs::ConditionalSectInIntSubset);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// A PE ref can happen here, but space is not required
checkForPERef(false, false, true);
if (fReaderMgr->skippedString(XMLUni::fgIncludeString))
{
checkForPERef(false, false, true);
// Check for the following open square bracket
if (!fReaderMgr->skippedChar(chOpenSquare))
fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);
// Get the reader we started this on
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
checkForPERef(false, false, true);
//
// Recurse back to the ext subset call again, telling it its
// in an include section.
//
scanExtSubsetDecl(true, false);
//
// And see if we got back to the same level. If not, then its
// a partial markup error.
//
if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
}
else if (fReaderMgr->skippedString(XMLUni::fgIgnoreString))
{
checkForPERef(false, false, true);
// Check for the following open square bracket
if (!fReaderMgr->skippedChar(chOpenSquare))
fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);
// Get the reader we started this on
const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
// And scan over the ignored part
scanIgnoredSection();
//
// And see if we got back to the same level. If not, then its
// a partial markup error.
//
if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
}
else
{
fScanner->emitError(XMLErrs::ExpectedIncOrIgn);
fReaderMgr->skipPastChar(chCloseAngle);
}
}
else if (fReaderMgr->skippedString(XMLUni::fgAttListString))
{
scanAttListDecl();
}
else if (fReaderMgr->skippedString(XMLUni::fgElemString))
{
scanElementDecl();
}
else if (fReaderMgr->skippedString(XMLUni::fgEntityString))
{
scanEntityDecl();
}
else if (fReaderMgr->skippedString(XMLUni::fgNotationString))
{
scanNotationDecl();
}
else
{
fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
fReaderMgr->skipPastChar(chCloseAngle);
}
}
else if (nextCh == chQuestion)
{
// It could be a PI or the XML declaration. Check for Decl
if (fScanner->checkXMLDecl(false))
{
// If we are not accepting text decls, its an error
if (parseTextDecl)
{
scanTextDecl();
}
else
{
// Emit the error and skip past this markup
fScanner->emitError(XMLErrs::TextDeclNotLegalHere);
fReaderMgr->skipPastChar(chCloseAngle);
}
}
else
{
// It has to be a PI
scanPI();
}
}
else
{
// Can't be valid so emit error and try to skip past end of this decl
fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
fReaderMgr->skipPastChar(chCloseAngle);
}
}
//
// This method is called for a mixed model element's content mode. We've
// already scanned past the '(PCDATA' part by the time we get here. So
// everything else is element names separated by | characters until we
// hit the end. The passed element decl's content model is filled in with
// the information found.
//
bool DTDScanner::scanMixed(DTDElementDecl& toFill)
{
//
// The terminating star is only required if there is something more
// than (PCDATA).
//
bool starRequired = false;
// Get a buffer to be used below to get element names
XMLBufBid bbName(fBufMgr);
XMLBuffer& nameBuf = bbName.getBuffer();
//
// Create an initial content spec node. Its just a leaf node with a
// PCDATA element id. This current node pointer will be pushed down the
// tree as we go.
//
ContentSpecNode* curNode = new (fMemoryManager) ContentSpecNode
(
new (fMemoryManager) QName
(
XMLUni::fgZeroLenString
, XMLUni::fgZeroLenString
, XMLElementDecl::fgPCDataElemId
, fMemoryManager
)
, false
, fMemoryManager
);
//
// Set the initial leaf as the temporary head. If we hit the first choice
// node, it will be set up here. When done, this is the node that's set
// as the content spec for the element.
//
ContentSpecNode* headNode = curNode;
// Remember the original node so we can sense the first choice node
ContentSpecNode* orgNode = curNode;
//
// We just loop around, getting the | character at the top and then
// looking for the next element name. We keep up with the last node
// and add each new one to its right node.
//
while (true)
{
//
// First of all we check for some grunt work details of skipping
// whitespace, expand PE refs, and catching invalid reps.
//
if (fReaderMgr->lookingAtChar(chPercent))
{
// Expand it and continue
checkForPERef(false, false, true);
}
else if (fReaderMgr->skippedChar(chAsterisk))
{
//
// Tell them they can't have reps in mixed model, but eat
// it and keep going if we are allowed to.
//
fScanner->emitError(XMLErrs::NoRepInMixed);
}
else if (fReaderMgr->skippedSpace())
{
// Spaces are ok at this point, just eat them and continue
fReaderMgr->skipPastSpaces();
}
else
{
if (!fReaderMgr->skippedChar(chPipe))
{
// Has to be the closing paren now.
if (!fReaderMgr->skippedChar(chCloseParen))
{
delete headNode;
fScanner->emitError(XMLErrs::UnterminatedContentModel);
return false;
}
bool starSkipped = true;
if (!fReaderMgr->skippedChar(chAsterisk)) {
starSkipped = false;
if (starRequired)
fScanner->emitError(XMLErrs::ExpectedAsterisk);
}
//
// Create a zero or more node and make the original head
// node its first child.
//
if (starRequired || starSkipped) {
headNode = new (fMemoryManager) ContentSpecNode
(
ContentSpecNode::ZeroOrMore
, headNode
, 0
, true
, true
, fMemoryManager
);
}
// Store the head node as the content spec of the element.
toFill.setContentSpec(headNode);
break;
}
// Its more than just a PCDATA, so an ending star will be required now
starRequired = true;
// Space is legal here so check for a PE ref, but don't require space
checkForPERef(false, false, true);
// Get a name token
if (!fReaderMgr->getName(nameBuf))
{
delete headNode;
fScanner->emitError(XMLErrs::ExpectedElementName);
return false;
}
//
// Create a leaf node for it. If we can find the element id for
// this element, then use it. Else, we have to fault in an element
// decl, marked as created because of being in a content model.
//
XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, nameBuf.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
if (!decl)
{
decl = new (fMemoryManager) DTDElementDecl
(
nameBuf.getRawBuffer()
, fEmptyNamespaceId
, DTDElementDecl::Any
, fMemoryManager
);
decl->setCreateReason(XMLElementDecl::InContentModel);
decl->setExternalElemDeclaration(isReadingExternalEntity());
fDTDGrammar->putElemDecl(decl);
}
//
// If the current node is the original node, this is the first choice
// node, so create an initial choice node with the current node and
// the new element id. Store this as the head node.
//
// Otherwise, we have to steal the right node of the previous choice
// and weave in another choice node there, which has the old choice
// as its left and the new leaf as its right.
//
if (curNode == orgNode)
{
curNode = new (fMemoryManager) ContentSpecNode
(
ContentSpecNode::Choice
, curNode
, new (fMemoryManager) ContentSpecNode
(
decl->getElementName()
, fMemoryManager
)
, true
, true
, fMemoryManager
);
// Remember the top node
headNode = curNode;
}
else
{
ContentSpecNode* oldRight = curNode->orphanSecond();
curNode->setSecond
(
new (fMemoryManager) ContentSpecNode
(
ContentSpecNode::Choice
, oldRight
, new (fMemoryManager) ContentSpecNode
(
decl->getElementName()
, fMemoryManager
)
, true
, true
, fMemoryManager
)
);
// Make the new right node the current node
curNode = curNode->getSecond();
}
}
}
return true;
}
//
// This method is called when we see a '<!NOTATION' string while scanning
// markup decl. It parses out the notation and its id and stores a new
// notation decl object in the notation decl pool.
//
void DTDScanner::scanNotationDecl()
{
// Space is required here so check for a PE ref, and require space
if (!checkForPERef(true, false, true))
{
fScanner->emitError(XMLErrs::ExpectedWhitespace);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
//
// And now we get a name, which is the name of the notation. Get a
// buffer for the name.
//
XMLBufBid bbName(fBufMgr);
if (!fReaderMgr->getName(bbName.getBuffer()))
{
fScanner->emitError(XMLErrs::ExpectedNotationName);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// If namespaces are enabled, then no colons allowed
if (fScanner->getDoNamespaces())
{
if (XMLString::indexOf(bbName.getRawBuffer(), chColon) != -1)
fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
}
// Space is required here so check for a PE ref, and require space
if (!checkForPERef(true, false, true))
{
fScanner->emitError(XMLErrs::ExpectedWhitespace);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
//
// And scan an external or public id. We need buffers to use for both
// of these.
//
XMLBufBid bbPubId(fBufMgr);
XMLBufBid bbSysId(fBufMgr);
if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_Either))
{
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// We can have an optional space or PE ref here
checkForPERef(false, false, true);
//
// See if it already exists. If so, add it to the notatino decl pool.
// Otherwise, if advanced callbacks are on, create a temp one and
// call out for that one.
//
XMLNotationDecl* decl = fDTDGrammar->getNotationDecl(bbName.getRawBuffer());
bool isIgnoring = (decl != 0);
if (isIgnoring)
{
fScanner->emitError(XMLErrs::NotationAlreadyExists, bbName.getRawBuffer());
}
else
{
// Fill in a new notation declaration and add it to the pool
const XMLCh* publicId = bbPubId.getRawBuffer();
const XMLCh* systemId = bbSysId.getRawBuffer();
ReaderMgr::LastExtEntityInfo lastInfo;
fReaderMgr->getLastExtEntityInfo(lastInfo);
decl = new (fMemoryManager) XMLNotationDecl
(
bbName.getRawBuffer()
, (publicId && *publicId) ? publicId : 0
, (systemId && *systemId) ? systemId : 0
, (lastInfo.systemId && *lastInfo.systemId) ? lastInfo.systemId : 0
, fMemoryManager
);
fDTDGrammar->putNotationDecl(decl);
}
//
// If we have a document type handler, then tell it about this. If we
// are ignoring it, only call out if advanced callbacks are enabled.
//
if (fDocTypeHandler)
{
fDocTypeHandler->notationDecl
(
*decl
, isIgnoring
);
}
// And one more optional space or PE ref
checkForPERef(false, false, true);
// And skip the terminating bracket
if (!fReaderMgr->skippedChar(chCloseAngle))
fScanner->emitError(XMLErrs::UnterminatedNotationDecl);
}
//
// Scans a PI and calls the appropriate callbacks. A PI can happen in either
// the document or the DTD, so it calls the appropriate handler according
// to the fInDocument flag.
//
// At entry we have just scanned the <? part, and need to now start on the
// PI target name.
//
void DTDScanner::scanPI()
{
const XMLCh* namePtr = 0;
const XMLCh* targetPtr = 0;
//
// If there are any spaces here, then warn about it. If we aren't in
// 'first error' mode, then we'll come back and can easily pick up
// again by just skipping them.
//
if (fReaderMgr->lookingAtSpace())
{
fScanner->emitError(XMLErrs::PINameExpected);
fReaderMgr->skipPastSpaces();
}
// Get a buffer for the PI name and scan it in
XMLBufBid bbName(fBufMgr);
if (!fReaderMgr->getName(bbName.getBuffer()))
{
fScanner->emitError(XMLErrs::PINameExpected);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// Point the name pointer at the raw data
namePtr = bbName.getRawBuffer();
// See if it issome form of 'xml' and emit a warning
if (!XMLString::compareIString(namePtr, XMLUni::fgXMLString))
fScanner->emitError(XMLErrs::NoPIStartsWithXML);
// If namespaces are enabled, then no colons allowed
if (fScanner->getDoNamespaces())
{
if (XMLString::indexOf(namePtr, chColon) != -1)
fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
}
//
// If we don't hit a space next, then the PI has no target. If we do
// then get out the target. Get a buffer for it as well
//
XMLBufBid bbTarget(fBufMgr);
if (fReaderMgr->skippedSpace())
{
// Skip any leading spaces
fReaderMgr->skipPastSpaces();
bool gotLeadingSurrogate = false;
// It does have a target, so lets move on to deal with that.
while (1)
{
const XMLCh nextCh = fReaderMgr->getNextChar();
// Watch for an end of file, which is always bad here
if (!nextCh)
{
fScanner->emitError(XMLErrs::UnterminatedPI);
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
}
// Watch for potential terminating character
if (nextCh == chQuestion)
{
// It must be followed by '>' to be a termination of the target
if (fReaderMgr->skippedChar(chCloseAngle))
break;
}
// Check for correct surrogate pairs
if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
{
if (gotLeadingSurrogate)
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
else
gotLeadingSurrogate = true;
}
else
{
if (gotLeadingSurrogate)
{
if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
}
// Its got to at least be a valid XML character
else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) {
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
}
gotLeadingSurrogate = false;
}
bbTarget.append(nextCh);
}
}
else
{
// No target, but make sure its terminated ok
if (!fReaderMgr->skippedChar(chQuestion))
{
fScanner->emitError(XMLErrs::UnterminatedPI);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
if (!fReaderMgr->skippedChar(chCloseAngle))
{
fScanner->emitError(XMLErrs::UnterminatedPI);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
}
// Point the target pointer at the raw data
targetPtr = bbTarget.getRawBuffer();
//
// If we have a handler, then call it.
//
if (fDocTypeHandler)
{
fDocTypeHandler->doctypePI
(
namePtr
, targetPtr
);
}
}
//
// This method scans a public literal. It must be quoted and all of its
// characters must be valid public id characters. The quotes are discarded
// and the results are returned.
//
bool DTDScanner::scanPublicLiteral(XMLBuffer& toFill)
{
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr->skipIfQuote(quoteCh)) {
fScanner->emitError(XMLErrs::ExpectedQuotedString);
return false;
}
while (true)
{
const XMLCh nextCh = fReaderMgr->getNextChar();
// Watch for EOF
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
if (nextCh == quoteCh)
break;
//
// If its not a valid public id char, then report it but keep going
// since that's the best recovery scheme.
//
if (!fReaderMgr->getCurrentReader()->isPublicIdChar(nextCh))
{
XMLCh tmpBuf[9];
XMLString::binToText
(
nextCh
, tmpBuf
, 8
, 16
);
fScanner->emitError(XMLErrs::InvalidPublicIdChar, tmpBuf);
}
toFill.append(nextCh);
}
return true;
}
//
// This method handles scanning in a quoted system literal. It expects to
// start on the open quote and returns after eating the ending quote. There
// are not really any restrictions on the contents of system literals.
//
bool DTDScanner::scanSystemLiteral(XMLBuffer& toFill)
{
toFill.reset();
// Get the next char which must be a single or double quote
XMLCh quoteCh;
if (!fReaderMgr->skipIfQuote(quoteCh)) {
fScanner->emitError(XMLErrs::ExpectedQuotedString);
return false;
}
while (true)
{
const XMLCh nextCh = fReaderMgr->getNextChar();
// Watch for EOF
if (!nextCh)
ThrowXML(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF);
// Break out on terminating quote
if (nextCh == quoteCh)
break;
toFill.append(nextCh);
}
return true;
}
//
// This method is called to scan a text decl line, which can be the first
// line in an external entity or external subset.
//
// On entry the <? has been scanned, and next should be 'xml' followed by
// some whitespace, version string, etc...
// [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'
//
void DTDScanner::scanTextDecl()
{
// Skip any subsequent whitespace before the version string
fReaderMgr->skipPastSpaces();
// Next should be the version string
XMLBufBid bbVersion(fBufMgr);
if (fReaderMgr->skippedString(XMLUni::fgVersionString))
{
if (!scanEq())
{
fScanner->emitError(XMLErrs::ExpectedEqSign);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
//
// Followed by a single or double quoted version. Get a buffer for
// the string.
//
if (!getQuotedString(bbVersion.getBuffer()))
{
fScanner->emitError(XMLErrs::BadXMLVersion);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// If its not our supported version, issue an error but continue
if (XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_1)) {
if (fScanner->getXMLVersion() != XMLReader::XMLV1_1)
fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer());
}
else if (!XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_0))
fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer());
}
// Ok, now we must have an encoding string
XMLBufBid bbEncoding(fBufMgr);
fReaderMgr->skipPastSpaces();
bool gotEncoding = false;
if (fReaderMgr->skippedString(XMLUni::fgEncodingString))
{
// There must be a equal sign next
if (!scanEq())
{
fScanner->emitError(XMLErrs::ExpectedEqSign);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// Followed by a single or double quoted version string
getQuotedString(bbEncoding.getBuffer());
if (bbEncoding.isEmpty() || !XMLString::isValidEncName(bbEncoding.getRawBuffer()))
{
fScanner->emitError(XMLErrs::BadXMLEncoding, bbEncoding.getRawBuffer());
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
// Indicate that we got an encoding
gotEncoding = true;
}
//
// Encoding declarations are required in the external entity
// if there is a text declaration present
//
if (!gotEncoding)
{
fScanner->emitError(XMLErrs::EncodingRequired);
fReaderMgr->skipPastChar(chCloseAngle);
return;
}
fReaderMgr->skipPastSpaces();
if (!fReaderMgr->skippedChar(chQuestion))
{
fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr->skipPastChar(chCloseAngle);
}
else if (!fReaderMgr->skippedChar(chCloseAngle))
{
fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
fReaderMgr->skipPastChar(chCloseAngle);
}
//
// If we have a document type handler and advanced callbacks are on,
// then call the TextDecl callback
//
if (fDocTypeHandler)
{
fDocTypeHandler->TextDecl
(
bbVersion.getRawBuffer()
, bbEncoding.getRawBuffer()
);
}
//
// If we got an encoding string, then we have to call back on the reader
// to tell it what the encoding is.
//
if (!bbEncoding.isEmpty())
{
if (!fReaderMgr->getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
fScanner->emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
}
}
XERCES_CPP_NAMESPACE_END