DFAContentModel.hpp
上传用户:zhuqijet
上传日期:2013-06-25
资源大小:10074k
文件大小:14k
- /*
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 1999-2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Xerces" and "Apache Software Foundation" must
- * not be used to endorse or promote products derived from this
- * software without prior written permission. For written
- * permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * nor may "Apache" appear in their name, without prior written
- * permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation, and was
- * originally based on software copyright (c) 1999, International
- * Business Machines, Inc., http://www.ibm.com . For more information
- * on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
- /*
- * $Log: DFAContentModel.hpp,v $
- * Revision 1.5 2003/05/16 21:43:20 knoaman
- * Memory manager implementation: Modify constructors to pass in the memory manager.
- *
- * Revision 1.4 2003/05/15 18:48:27 knoaman
- * Partial implementation of the configurable memory manager.
- *
- * Revision 1.3 2003/03/07 18:16:57 tng
- * Return a reference instead of void for operator=
- *
- * Revision 1.2 2002/11/04 14:54:58 tng
- * C++ Namespace Support.
- *
- * Revision 1.1.1.1 2002/02/01 22:22:38 peiyongz
- * sane_include
- *
- * Revision 1.13 2001/11/21 14:30:13 knoaman
- * Fix for UPA checking.
- *
- * Revision 1.12 2001/08/24 12:48:48 tng
- * Schema: AllContentModel
- *
- * Revision 1.11 2001/08/21 16:06:11 tng
- * Schema: Unique Particle Attribution Constraint Checking.
- *
- * Revision 1.10 2001/08/13 15:06:39 knoaman
- * update <any> validation.
- *
- * Revision 1.9 2001/06/13 20:50:55 peiyongz
- * fIsMixed: to handle mixed Content Model
- *
- * Revision 1.8 2001/05/11 13:27:18 tng
- * Copyright update.
- *
- * Revision 1.7 2001/05/03 21:02:30 tng
- * Schema: Add SubstitutionGroupComparator and update exception messages. By Pei Yong Zhang.
- *
- * Revision 1.6 2001/04/19 18:17:30 tng
- * Schema: SchemaValidator update, and use QName in Content Model
- *
- * Revision 1.5 2001/03/21 21:56:27 tng
- * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
- *
- * Revision 1.4 2001/03/21 19:29:55 tng
- * Schema: Content Model Updates, by Pei Yong Zhang.
- *
- * Revision 1.3 2001/02/27 18:32:32 tng
- * Schema: Use XMLElementDecl instead of DTDElementDecl in Content Model.
- *
- * Revision 1.2 2001/02/27 14:48:52 tng
- * Schema: Add CMAny and ContentLeafNameTypeVector, by Pei Yong Zhang
- *
- * Revision 1.1 2001/02/16 14:17:29 tng
- * Schema: Move the common Content Model files that are shared by DTD
- * and schema from 'DTD' folder to 'common' folder. By Pei Yong Zhang.
- *
- * Revision 1.4 2000/03/02 19:55:38 roddey
- * This checkin includes many changes done while waiting for the
- * 1.1.0 code to be finished. I can't list them all here, but a list is
- * available elsewhere.
- *
- * Revision 1.3 2000/02/24 20:16:48 abagchi
- * Swat for removing Log from API docs
- *
- * Revision 1.2 2000/02/09 21:42:37 abagchi
- * Copyright swat
- *
- * Revision 1.1.1.1 1999/11/09 01:03:19 twl
- * Initial checkin
- *
- * Revision 1.2 1999/11/08 20:45:38 rahul
- * Swat for adding in Product name and CVS comment log variable.
- *
- */
- #if !defined(DFACONTENTMODEL_HPP)
- #define DFACONTENTMODEL_HPP
- #include <xercesc/util/XercesDefs.hpp>
- #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
- #include <xercesc/framework/XMLContentModel.hpp>
- #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
- XERCES_CPP_NAMESPACE_BEGIN
- class ContentSpecNode;
- class CMLeaf;
- class CMNode;
- class CMStateSet;
- //
- // DFAContentModel is the heavy weight derivative of ContentModel that does
- // all of the non-trivial element content validation. This guy does the full
- // bore regular expression to DFA conversion to create a DFA that it then
- // uses in its validation algorithm.
- //
- // NOTE: Upstream work insures that this guy will never see a content model
- // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
- // via the MixedContentModel class, since mixed models are very
- // constrained in form and easily handled via a special case. This
- // also makes our life much easier here.
- //
- class DFAContentModel : public XMLContentModel
- {
- public:
- // -----------------------------------------------------------------------
- // Constructors and Destructor
- // -----------------------------------------------------------------------
- DFAContentModel
- (
- const bool dtd
- , ContentSpecNode* const elemContentSpec
- , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
- );
- DFAContentModel
- (
- const bool dtd
- , ContentSpecNode* const elemContentSpec
- , const bool isMixed
- , MemoryManager* const manager
- );
- virtual ~DFAContentModel();
- // -----------------------------------------------------------------------
- // Implementation of the virtual content model interface
- // -----------------------------------------------------------------------
- virtual int validateContent
- (
- QName** const children
- , const unsigned int childCount
- , const unsigned int emptyNamespaceId
- ) const;
- virtual int validateContentSpecial
- (
- QName** const children
- , const unsigned int childCount
- , const unsigned int emptyNamespaceId
- , GrammarResolver* const pGrammarResolver
- , XMLStringPool* const pStringPool
- ) const;
- virtual void checkUniqueParticleAttribution
- (
- SchemaGrammar* const pGrammar
- , GrammarResolver* const pGrammarResolver
- , XMLStringPool* const pStringPool
- , XMLValidator* const pValidator
- , unsigned int* const pContentSpecOrgURI
- ) ;
- virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
- virtual unsigned int getNextState(const unsigned int currentState,
- const unsigned int elementIndex) const;
- private :
- // -----------------------------------------------------------------------
- // Unimplemented constructors and operators
- // -----------------------------------------------------------------------
- DFAContentModel();
- DFAContentModel(const DFAContentModel&);
- DFAContentModel& operator=(const DFAContentModel&);
- // -----------------------------------------------------------------------
- // Private helper methods
- // -----------------------------------------------------------------------
- void buildDFA(ContentSpecNode* const curNode);
- CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
- void calcFollowList(CMNode* const curNode);
- unsigned int* makeDefStateList() const;
- int postTreeBuildInit
- (
- CMNode* const nodeCur
- , const unsigned int curIndex
- );
- // -----------------------------------------------------------------------
- // Private data members
- //
- // fElemMap
- // fElemMapSize
- // This is the map of unique input symbol elements to indices into
- // each state's per-input symbol transition table entry. This is part
- // of the built DFA information that must be kept around to do the
- // actual validation.
- //
- // fElemMapType
- // This is a map of whether the element map contains information
- // related to ANY models.
- //
- // fEmptyOk
- // This is an optimization. While building the transition table we
- // can see whether this content model would approve of an empty
- // content (which could happen if everything was optional.) So we
- // set this flag and short circuit that check, which would otherwise
- // be ugly and time consuming if we tried to determine it at each
- // validation call.
- //
- // fEOCPos
- // The NFA position of the special EOC (end of content) node. This
- // is saved away since its used during the DFA build.
- //
- // fFinalStateFlags
- // This is an array of booleans, one per state (there are
- // fTransTableSize states in the DFA) that indicates whether that
- // state is a final state.
- //
- // fFollowList
- // The list of follow positions for each NFA position (i.e. for each
- // non-epsilon leaf node.) This is only used during the building of
- // the DFA, and is let go afterwards.
- //
- // fHeadNode
- // This is the head node of our intermediate representation. It is
- // only non-null during the building of the DFA (just so that it
- // does not have to be passed all around.) Once the DFA is built,
- // this is no longer required so its deleted.
- //
- // fLeafCount
- // The count of leaf nodes. This is an important number that set some
- // limits on the sizes of data structures in the DFA process.
- //
- // fLeafList
- // An array of non-epsilon leaf nodes, which is used during the DFA
- // build operation, then dropped. These are just references to nodes
- // pointed to by fHeadNode, so we don't have to clean them up, just
- // the actually leaf list array itself needs cleanup.
- //
- // fLeafListType
- // Array mapping ANY types to the leaf list.
- //
- // fTransTable
- // fTransTableSize
- // This is the transition table that is the main by product of all
- // of the effort here. It is an array of arrays of ints. The first
- // dimension is the number of states we end up with in the DFA. The
- // second dimensions is the number of unique elements in the content
- // model (fElemMapSize). Each entry in the second dimension indicates
- // the new state given that input for the first dimension's start
- // state.
- //
- // The fElemMap array handles mapping from element indexes to
- // positions in the second dimension of the transition table.
- //
- // fTransTableSize is the number of valid entries in the transition
- // table, and in the other related tables such as fFinalStateFlags.
- //
- // fDTD
- // Boolean to allow DTDs to validate even with namespace support.
- //
- // fIsMixed
- // DFA ContentModel with mixed PCDATA.
- // -----------------------------------------------------------------------
- QName** fElemMap;
- ContentSpecNode::NodeTypes *fElemMapType;
- unsigned int fElemMapSize;
- bool fEmptyOk;
- unsigned int fEOCPos;
- bool* fFinalStateFlags;
- CMStateSet** fFollowList;
- CMNode* fHeadNode;
- unsigned int fLeafCount;
- CMLeaf** fLeafList;
- ContentSpecNode::NodeTypes *fLeafListType;
- unsigned int** fTransTable;
- unsigned int fTransTableSize;
- bool fDTD;
- bool fIsMixed;
- ContentLeafNameTypeVector *fLeafNameTypeVector;
- MemoryManager* fMemoryManager;
- };
- inline unsigned int
- DFAContentModel::getNextState(const unsigned int currentState,
- const unsigned int elementIndex) const {
- if (currentState == XMLContentModel::gInvalidTrans) {
- return XMLContentModel::gInvalidTrans;
- }
- if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
- ThrowXML(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex);
- }
- return fTransTable[currentState][elementIndex];
- }
- XERCES_CPP_NAMESPACE_END
- #endif