RegxParser.hpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:12k
源码类别:

词法分析

开发平台:

Visual C++

  1. /*
  2.  * The Apache Software License, Version 1.1
  3.  *
  4.  * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
  5.  * reserved.
  6.  *
  7.  * Redistribution and use in source and binary forms, with or without
  8.  * modification, are permitted provided that the following conditions
  9.  * are met:
  10.  *
  11.  * 1. Redistributions of source code must retain the above copyright
  12.  *    notice, this list of conditions and the following disclaimer.
  13.  *
  14.  * 2. Redistributions in binary form must reproduce the above copyright
  15.  *    notice, this list of conditions and the following disclaimer in
  16.  *    the documentation and/or other materials provided with the
  17.  *    distribution.
  18.  *
  19.  * 3. The end-user documentation included with the redistribution,
  20.  *    if any, must include the following acknowledgment:
  21.  *       "This product includes software developed by the
  22.  *        Apache Software Foundation (http://www.apache.org/)."
  23.  *    Alternately, this acknowledgment may appear in the software itself,
  24.  *    if and wherever such third-party acknowledgments normally appear.
  25.  *
  26.  * 4. The names "Xerces" and "Apache Software Foundation" must
  27.  *    not be used to endorse or promote products derived from this
  28.  *    software without prior written permission. For written
  29.  *    permission, please contact apache@apache.org.
  30.  *
  31.  * 5. Products derived from this software may not be called "Apache",
  32.  *    nor may "Apache" appear in their name, without prior written
  33.  *    permission of the Apache Software Foundation.
  34.  *
  35.  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  36.  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  37.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  38.  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  39.  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  40.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  41.  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  42.  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  43.  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  44.  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  45.  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  46.  * SUCH DAMAGE.
  47.  * ====================================================================
  48.  *
  49.  * This software consists of voluntary contributions made by many
  50.  * individuals on behalf of the Apache Software Foundation, and was
  51.  * originally based on software copyright (c) 2001, International
  52.  * Business Machines, Inc., http://www.ibm.com .  For more information
  53.  * on the Apache Software Foundation, please see
  54.  * <http://www.apache.org/>.
  55.  */
  56. /*
  57.  * $Id: RegxParser.hpp,v 1.6 2003/05/22 02:10:52 knoaman Exp $
  58.  */
  59. /*
  60.  * A regular expression parser
  61.  */
  62. #if !defined(REGXPARSER_HPP)
  63. #define REGXPARSER_HPP
  64. // ---------------------------------------------------------------------------
  65. //  Includes
  66. // ---------------------------------------------------------------------------
  67. #include <xercesc/util/RefVectorOf.hpp>
  68. #include <xercesc/util/XMLUniDefs.hpp>
  69. #include <xercesc/util/Mutexes.hpp>
  70. XERCES_CPP_NAMESPACE_BEGIN
  71. // ---------------------------------------------------------------------------
  72. //  Forward Declaration
  73. // ---------------------------------------------------------------------------
  74. class Token;
  75. class RangeToken;
  76. class TokenFactory;
  77. class XMLUTIL_EXPORT RegxParser : public XMemory
  78. {
  79. public:
  80. // -----------------------------------------------------------------------
  81.     //  Public constant data
  82.     // -----------------------------------------------------------------------
  83.     // Parse tokens
  84. enum {
  85. REGX_T_CHAR                     = 0,
  86. REGX_T_EOF                      = 1,
  87. REGX_T_OR                       = 2,
  88. REGX_T_STAR                     = 3,
  89. REGX_T_PLUS                     = 4,
  90. REGX_T_QUESTION                 = 5,
  91. REGX_T_LPAREN                   = 6,
  92. REGX_T_RPAREN                   = 7,
  93. REGX_T_DOT                      = 8,
  94. REGX_T_LBRACKET                 = 9,
  95. REGX_T_BACKSOLIDUS              = 10,
  96. REGX_T_CARET                    = 11,
  97. REGX_T_DOLLAR                   = 12,
  98. REGX_T_LPAREN2                  = 13,
  99. REGX_T_LOOKAHEAD                = 14,
  100. REGX_T_NEGATIVELOOKAHEAD        = 15,
  101. REGX_T_LOOKBEHIND               = 16,
  102. REGX_T_NEGATIVELOOKBEHIND       = 17,
  103. REGX_T_INDEPENDENT              = 18,
  104. REGX_T_SET_OPERATIONS           = 19,
  105. REGX_T_POSIX_CHARCLASS_START    = 20,
  106. REGX_T_COMMENT                  = 21,
  107. REGX_T_MODIFIERS                = 22,
  108. REGX_T_CONDITION                = 23,
  109. REGX_T_XMLSCHEMA_CC_SUBTRACTION = 24
  110. };
  111. static const unsigned short S_NORMAL;
  112. static const unsigned short S_INBRACKETS;
  113. static const unsigned short S_INXBRACKETS;
  114. // -----------------------------------------------------------------------
  115.     //  Public Constructors and Destructor
  116.     // -----------------------------------------------------------------------
  117. RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
  118. virtual ~RegxParser();
  119.     // -----------------------------------------------------------------------
  120.     //  Getter methods
  121.     // -----------------------------------------------------------------------
  122.     unsigned short getParseContext() const;
  123.     unsigned short getState() const;
  124.     XMLInt32       getCharData() const;
  125.     int            getNoParen() const;
  126. int            getOffset() const;
  127. bool           hasBackReferences() const;
  128.     TokenFactory*  getTokenFactory() const;
  129. // -----------------------------------------------------------------------
  130.     //  Setter methods
  131.     // -----------------------------------------------------------------------
  132. void setParseContext(const unsigned short value);
  133.     void setTokenFactory(TokenFactory* const tokFactory);
  134. // -----------------------------------------------------------------------
  135.     //  Public Parsing methods
  136.     // -----------------------------------------------------------------------
  137. Token* parse(const XMLCh* const regxStr, const int options);
  138. protected:
  139.     // -----------------------------------------------------------------------
  140.     //  Protected Helper methods
  141.     // -----------------------------------------------------------------------
  142.     virtual bool        checkQuestion(const int off);
  143. virtual XMLInt32    decodeEscaped();
  144.     // -----------------------------------------------------------------------
  145.     //  Protected Parsing/Processing methods
  146.     // -----------------------------------------------------------------------
  147. void                processNext();
  148. Token*              parseRegx(const bool matchingRParen = false);
  149. virtual Token*      processCaret();
  150.     virtual Token*      processDollar();
  151. virtual Token*      processLook(const unsigned short tokType);
  152.     virtual Token*      processBacksolidus_A();
  153.     virtual Token*      processBacksolidus_z();
  154.     virtual Token*      processBacksolidus_Z();
  155.     virtual Token*      processBacksolidus_b();
  156.     virtual Token*      processBacksolidus_B();
  157.     virtual Token*      processBacksolidus_lt();
  158.     virtual Token*      processBacksolidus_gt();
  159.     virtual Token*      processBacksolidus_c();
  160.     virtual Token*      processBacksolidus_C();
  161.     virtual Token*      processBacksolidus_i();
  162.     virtual Token*      processBacksolidus_I();
  163.     virtual Token*      processBacksolidus_g();
  164.     virtual Token*      processBacksolidus_X();
  165.     virtual Token*      processBackReference();
  166. virtual Token*      processStar(Token* const tok);
  167. virtual Token*      processPlus(Token* const tok);
  168. virtual Token*      processQuestion(Token* const tok);
  169.     virtual Token*      processParen();
  170.     virtual Token*      processParen2();
  171.     virtual Token*      processCondition();
  172.     virtual Token*      processModifiers();
  173.     virtual Token*      processIndependent();
  174.     virtual RangeToken* parseCharacterClass(const bool useNRange);
  175.     virtual RangeToken* parseSetOperations();
  176. virtual XMLInt32    processCInCharacterClass(RangeToken* const tok,
  177.                                                  const XMLInt32 ch);
  178.     RangeToken*         processBacksolidus_pP(const XMLInt32 ch);
  179.     // -----------------------------------------------------------------------
  180.     //  Protected PreCreated RangeToken access methods
  181.     // -----------------------------------------------------------------------
  182. virtual Token*      getTokenForShorthand(const XMLInt32 ch);
  183. private:
  184.     // -----------------------------------------------------------------------
  185.     //  Private parsing/processing methods
  186.     // -----------------------------------------------------------------------
  187.     Token* parseTerm(const bool matchingRParen = false);
  188. Token* parseFactor();
  189. Token* parseAtom();
  190. // -----------------------------------------------------------------------
  191.     //  Private data types
  192.     // -----------------------------------------------------------------------
  193.     class ReferencePosition : public XMemory
  194.     {
  195.         public :
  196.             ReferencePosition(const int refNo, const int position);
  197.             int fReferenceNo;
  198. int fPosition;
  199.     };
  200.     // -----------------------------------------------------------------------
  201.     //  Private Helper methods
  202.     // -----------------------------------------------------------------------
  203.     bool isSet(const int flag);
  204. int hexChar(const XMLInt32 ch);
  205. // -----------------------------------------------------------------------
  206.     //  Private data members
  207. // -----------------------------------------------------------------------
  208.     MemoryManager*                  fMemoryManager;
  209. bool                            fHasBackReferences;
  210. int                             fOptions;
  211. int                             fOffset;
  212. int                             fNoGroups;
  213. int                             fParseContext;
  214. int                             fStringLen;
  215. unsigned short                  fState;
  216. XMLInt32                        fCharData;
  217. XMLCh*                          fString;
  218. RefVectorOf<ReferencePosition>* fReferences;
  219.     TokenFactory*                   fTokenFactory;
  220. XMLMutex fMutex;
  221. };
  222. // ---------------------------------------------------------------------------
  223. //  RegxParser: Getter Methods
  224. // ---------------------------------------------------------------------------
  225. inline unsigned short RegxParser::getParseContext() const {
  226.     return fParseContext;
  227. }
  228. inline unsigned short RegxParser::getState() const {
  229. return fState;
  230. }
  231. inline XMLInt32 RegxParser::getCharData() const {
  232.     return fCharData;
  233. }
  234. inline int RegxParser::getNoParen() const {
  235.     return fNoGroups;
  236. }
  237. inline int RegxParser::getOffset() const {
  238. return fOffset;
  239. }
  240. inline bool RegxParser::hasBackReferences() const {
  241. return fHasBackReferences;
  242. }
  243. inline TokenFactory* RegxParser::getTokenFactory() const {
  244.     return fTokenFactory;
  245. }
  246. // ---------------------------------------------------------------------------
  247. //  RegxParser: Setter Methods
  248. // ---------------------------------------------------------------------------
  249. inline void RegxParser::setParseContext(const unsigned short value) {
  250. fParseContext = value;
  251. }
  252. inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {
  253.     fTokenFactory = tokFactory;
  254. }
  255. // ---------------------------------------------------------------------------
  256. //  RegxParser: Helper Methods
  257. // ---------------------------------------------------------------------------
  258. inline bool RegxParser::isSet(const int flag) {
  259.     return (fOptions & flag) == flag;
  260. }
  261. inline int RegxParser::hexChar(const XMLInt32 ch) {
  262. if (ch < chDigit_0 || ch > chLatin_f)
  263. return -1;
  264. if (ch <= chDigit_9)
  265. return ch - chDigit_0;
  266. if (ch < chLatin_A)
  267. return -1;
  268. if (ch <= chLatin_F)
  269. return ch - chLatin_A + 10;
  270. if (ch < chLatin_a)
  271. return -1;
  272. return ch - chLatin_a + 10;
  273. }
  274. XERCES_CPP_NAMESPACE_END
  275. #endif
  276. /**
  277.   * End file RegxParser.hpp
  278.   */