ParserForXMLSchema.cpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:18k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 2001, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Log: ParserForXMLSchema.cpp,v $
- * Revision 1.6 2003/05/15 18:42:54 knoaman
- * Partial implementation of the configurable memory manager.
- *
- * Revision 1.5 2003/03/18 19:38:28 knoaman
- * Schema Errata E2-18 + misc. regex fixes.
- *
- * Revision 1.4 2003/01/13 19:02:23 knoaman
- * [Bug 14390] C++ Indentifier collision with Python.
- *
- * Revision 1.3 2002/11/04 15:17:00 tng
- * C++ Namespace Support.
- *
- * Revision 1.2 2002/03/18 19:29:53 knoaman
- * Change constant names to eliminate possible conflict with user defined ones.
- *
- * Revision 1.1.1.1 2002/02/01 22:22:29 peiyongz
- * sane_include
- *
- * Revision 1.6 2001/09/20 13:11:42 knoaman
- * Regx + misc. fixes
- *
- * Revision 1.5 2001/06/01 14:15:37 knoaman
- * Add a return value to satisfy compilers that complain about
- * no return value, although that code will not be executed.
- *
- * Revision 1.4 2001/05/11 21:50:56 knoaman
- * Schema updates and fixes.
- *
- * Revision 1.3 2001/05/11 13:26:44 tng
- * Copyright update.
- *
- * Revision 1.2 2001/05/03 18:17:30 knoaman
- * Some design changes:
- * o Changed the TokenFactory from a single static instance, to a
- * normal class. Each RegularExpression object will have its own
- * instance of TokenFactory, and that instance will be passed to
- * other classes that need to use a TokenFactory to create Token
- * objects (with the exception of RangeTokenMap).
- * o Added a new class RangeTokenMap to map a the different ranges
- * in a given category to a specific RangeFactory object. In the old
- * design RangeFactory had dual functionality (act as a Map, and as
- * a factory for creating RangeToken(s)). The RangeTokenMap will
- * have its own copy of the TokenFactory. There will be only one
- * instance of the RangeTokenMap class, and that instance will be
- * lazily deleted when XPlatformUtils::Terminate is called.
- *
- * Revision 1.1 2001/03/02 19:26:43 knoaman
- * Schema: Regular expression handling part II
- *
- */
- // ---------------------------------------------------------------------------
- // Includes
- // ---------------------------------------------------------------------------
- #include <xercesc/util/regx/ParserForXMLSchema.hpp>
- #include <xercesc/util/regx/TokenFactory.hpp>
- #include <xercesc/util/regx/RangeToken.hpp>
- #include <xercesc/util/regx/TokenInc.hpp>
- #include <xercesc/util/regx/RegxDefs.hpp>
- #include <xercesc/util/ParseException.hpp>
- #include <xercesc/util/RuntimeException.hpp>
- #include <xercesc/util/PlatformUtils.hpp>
- XERCES_CPP_NAMESPACE_BEGIN
- // ---------------------------------------------------------------------------
- // ParserForXMLSchema: Constructors and Destructors
- // ---------------------------------------------------------------------------
- ParserForXMLSchema::ParserForXMLSchema(MemoryManager* const manager)
- : RegxParser(manager)
- {
- }
- ParserForXMLSchema::~ParserForXMLSchema() {
- }
- // ---------------------------------------------------------------------------
- // ParserForXMLSchema: Parsing/Processing methods
- // ---------------------------------------------------------------------------
- Token* ParserForXMLSchema::processCaret() {
- processNext();
- return getTokenFactory()->createChar(chCaret);
- }
- Token* ParserForXMLSchema::processDollar() {
- processNext();
- return getTokenFactory()->createChar(chDollarSign);
- }
- Token* ParserForXMLSchema::processPlus(Token* const tok) {
- processNext();
- return getTokenFactory()->createConcat(tok,
- getTokenFactory()->createClosure(tok));
- }
- Token* ParserForXMLSchema::processStar(Token* const tok) {
- processNext();
- return getTokenFactory()->createClosure(tok);
- }
- Token* ParserForXMLSchema::processQuestion(Token* const tok) {
- processNext();
- TokenFactory* tokFactory = getTokenFactory();
- Token* retTok = tokFactory->createUnion();
- retTok->addChild(tok, tokFactory);
- retTok->addChild(tokFactory->createToken(Token::T_EMPTY), tokFactory);
- return retTok;
- }
- Token* ParserForXMLSchema::processParen() {
- processNext();
- Token* retTok = getTokenFactory()->createParenthesis(parseRegx(true), 0);
- if (getState() != REGX_T_RPAREN) {
- ThrowXML(ParseException, XMLExcepts::Parser_Factor1);
- }
- processNext();
- return retTok;
- }
- RangeToken* ParserForXMLSchema::parseCharacterClass(const bool useNRange) {
- setParseContext(S_INBRACKETS);
- processNext();
- RangeToken* base = 0;
- RangeToken* tok = 0;
- bool isNRange = false;
- if (getState() == REGX_T_CHAR && getCharData() == chCaret) {
- isNRange = true;
- processNext();
- base = getTokenFactory()->createRange();
- base->addRange(0, Token::UTF16_MAX);
- tok = getTokenFactory()->createRange();
- }
- else {
- tok= getTokenFactory()->createRange();
- }
- int type;
- bool firstLoop = true;
- while ( (type = getState()) != REGX_T_EOF) {
- // single range | from-to-range | subtraction
- if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop) {
- if (isNRange) {
- base->subtractRanges(tok);
- tok = base;
- }
- break;
- }
- XMLInt32 ch = getCharData();
- bool end = false;
- if (type == REGX_T_BACKSOLIDUS) {
- switch(ch) {
- case chLatin_d:
- case chLatin_D:
- case chLatin_w:
- case chLatin_W:
- case chLatin_s:
- case chLatin_S:
- {
- tok->mergeRanges(getTokenForShorthand(ch));
- end = true;
- }
- break;
- case chLatin_i:
- case chLatin_I:
- case chLatin_c:
- case chLatin_C:
- {
- ch = processCInCharacterClass(tok, ch);
- if (ch < 0) {
- end = true;
- }
- }
- break;
- case chLatin_p:
- case chLatin_P:
- {
- int start = getOffset();
- RangeToken* tok2 = processBacksolidus_pP(ch);
- if (tok2 == 0) {
- ThrowXML(ParseException,XMLExcepts::Parser_Atom5);
- }
- tok->mergeRanges(tok2);
- end = true;
- }
- break;
- default:
- ch = decodeEscaped();
- }
- } // end if REGX_T_BACKSOLIDUS
- else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) {
- if (isNRange) {
- base->subtractRanges(tok);
- tok = base;
- }
- RangeToken* rangeTok = parseCharacterClass(false);
- tok->subtractRanges(rangeTok);
- if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) {
- ThrowXML(ParseException,XMLExcepts::Parser_CC5);
- }
- break;
- } // end if REGX_T_XMLSCHEMA...
- processNext();
- if (!end) {
- if (type == REGX_T_CHAR
- && (ch == chOpenSquare
- || ch == chCloseSquare
- || ch == chDash)) {
- // '[', ']', '-' not allowed and should be esacaped
- XMLCh chStr[] = { ch, chNull };
- ThrowXML2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr);
- }
- if (getState() != REGX_T_CHAR || getCharData() != chDash) {
- tok->addRange(ch, ch);
- }
- else {
- processNext();
- if ((type = getState()) == REGX_T_EOF)
- ThrowXML(ParseException,XMLExcepts::Parser_CC2);
- if ((type == REGX_T_CHAR && getCharData() == chCloseSquare)
- || type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
- static const XMLCh dashStr[] = { chDash, chNull};
- ThrowXML2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr);
- }
- else {
- XMLInt32 rangeEnd = getCharData();
- XMLCh rangeEndStr[] = { rangeEnd, chNull };
- if (type == REGX_T_CHAR) {
- if (rangeEnd == chOpenSquare
- || rangeEnd == chCloseSquare
- || rangeEnd == chDash)
- // '[', ']', '-' not allowed and should be esacaped
- ThrowXML2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr);
- }
- else if (type == REGX_T_BACKSOLIDUS) {
- rangeEnd = decodeEscaped();
- }
- processNext();
- if (ch > rangeEnd) {
- XMLCh chStr[] = { ch, chNull };
- ThrowXML2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr);
- }
- tok->addRange(ch, rangeEnd);
- }
- }
- }
- firstLoop = false;
- }
- if (getState() == REGX_T_EOF)
- ThrowXML(ParseException,XMLExcepts::Parser_CC2);
- tok->sortRanges();
- tok->compactRanges();
- setParseContext(S_NORMAL);
- processNext();
- return tok;
- }
- XMLInt32 ParserForXMLSchema::processCInCharacterClass(RangeToken* const tok,
- const XMLInt32 ch)
- {
- tok->mergeRanges(getTokenForShorthand(ch));
- return -1;
- }
- Token* ParserForXMLSchema::processLook(const unsigned short tokType) {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_A() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_B() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_b() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_C() {
- processNext();
- return getTokenForShorthand(chLatin_C);
- }
- Token* ParserForXMLSchema::processBacksolidus_c() {
- processNext();
- return getTokenForShorthand(chLatin_c);
- }
- Token* ParserForXMLSchema::processBacksolidus_g() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_gt() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_I() {
- processNext();
- return getTokenForShorthand(chLatin_I);
- }
- Token* ParserForXMLSchema::processBacksolidus_i() {
- processNext();
- return getTokenForShorthand(chLatin_i);
- }
- Token* ParserForXMLSchema::processBacksolidus_lt() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_X() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_Z() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBacksolidus_z() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processBackReference() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processCondition() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processIndependent() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processModifiers() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- Token* ParserForXMLSchema::processParen2() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- RangeToken* ParserForXMLSchema::parseSetOperations() {
- ThrowXML(RuntimeException, XMLExcepts::Regex_NotSupported);
- return 0; // for compilers that complain about no return value
- }
- // ---------------------------------------------------------------------------
- // ParserForXMLSchema: Getter methods
- // ---------------------------------------------------------------------------
- Token* ParserForXMLSchema::getTokenForShorthand(const XMLInt32 ch) {
- switch(ch) {
- case chLatin_d:
- return getTokenFactory()->getRange(fgXMLDigit);
- case chLatin_D:
- return getTokenFactory()->getRange(fgXMLDigit, true);
- case chLatin_w:
- return getTokenFactory()->getRange(fgXMLWord);
- case chLatin_W:
- return getTokenFactory()->getRange(fgXMLWord, true);
- case chLatin_s:
- return getTokenFactory()->getRange(fgXMLSpace);
- case chLatin_S:
- return getTokenFactory()->getRange(fgXMLSpace, true);
- case chLatin_c:
- return getTokenFactory()->getRange(fgXMLNameChar);
- case chLatin_C:
- return getTokenFactory()->getRange(fgXMLNameChar, true);
- case chLatin_i:
- return getTokenFactory()->getRange(fgXMLInitialNameChar);
- case chLatin_I:
- return getTokenFactory()->getRange(fgXMLInitialNameChar, true);
- }
- return 0;
- }
- // ---------------------------------------------------------------------------
- // ParserForXMLSchema: Helper methods
- // ---------------------------------------------------------------------------
- bool ParserForXMLSchema::checkQuestion(const int off) {
- return false;
- }
- XMLInt32 ParserForXMLSchema::decodeEscaped() {
- if (getState() != REGX_T_BACKSOLIDUS)
- ThrowXML(ParseException,XMLExcepts::Parser_Next1);;
- XMLInt32 ch = getCharData();
- switch (ch) {
- case chLatin_n:
- ch = chLF;
- break;
- case chLatin_r:
- ch = chCR;
- break;
- case chLatin_t:
- ch = chHTab;
- break;
- case chBackSlash:
- case chPipe:
- case chPeriod:
- case chCaret:
- case chDash:
- case chQuestion:
- case chAsterisk:
- case chPlus:
- case chOpenCurly:
- case chCloseCurly:
- case chOpenParen:
- case chCloseParen:
- case chOpenSquare:
- case chCloseSquare:
- break;
- default:
- {
- XMLCh chString[] = {chBackSlash, ch, chNull};
- chString[1] = ch;
- ThrowXML1(ParseException,XMLExcepts::Parser_Process2, chString);
- }
- }
- return ch;
- }
- XERCES_CPP_NAMESPACE_END
- /**
- * End of file ParserForXMLSchema.cpp
- */