blob: c7f2e01c9fff731ff88a492d890aba79a67e73c4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: DFAContentModel.hpp 568078 2007-08-21 11:43:25Z amassari $
*/
#if !defined(DFACONTENTMODEL_HPP)
#define DFACONTENTMODEL_HPP
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
#include <xercesc/framework/XMLContentModel.hpp>
#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class ContentSpecNode;
class CMLeaf;
class CMNode;
class CMStateSet;
//
// DFAContentModel is the heavy weight derivative of ContentModel that does
// all of the non-trivial element content validation. This guy does the full
// bore regular expression to DFA conversion to create a DFA that it then
// uses in its validation algorithm.
//
// NOTE: Upstream work insures that this guy will never see a content model
// with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
// via the MixedContentModel class, since mixed models are very
// constrained in form and easily handled via a special case. This
// also makes our life much easier here.
//
class DFAContentModel : public XMLContentModel
{
public:
// -----------------------------------------------------------------------
// Constructors and Destructor
// -----------------------------------------------------------------------
DFAContentModel
(
const bool dtd
, ContentSpecNode* const elemContentSpec
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
DFAContentModel
(
const bool dtd
, ContentSpecNode* const elemContentSpec
, const bool isMixed
, MemoryManager* const manager
);
virtual ~DFAContentModel();
// -----------------------------------------------------------------------
// Implementation of the virtual content model interface
// -----------------------------------------------------------------------
virtual int validateContent
(
QName** const children
, const unsigned int childCount
, const unsigned int emptyNamespaceId
) const;
virtual int validateContentSpecial
(
QName** const children
, const unsigned int childCount
, const unsigned int emptyNamespaceId
, GrammarResolver* const pGrammarResolver
, XMLStringPool* const pStringPool
) const;
virtual void checkUniqueParticleAttribution
(
SchemaGrammar* const pGrammar
, GrammarResolver* const pGrammarResolver
, XMLStringPool* const pStringPool
, XMLValidator* const pValidator
, unsigned int* const pContentSpecOrgURI
, const XMLCh* pComplexTypeName = 0
) ;
virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
virtual unsigned int getNextState(const unsigned int currentState,
const unsigned int elementIndex) const;
private :
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
DFAContentModel();
DFAContentModel(const DFAContentModel&);
DFAContentModel& operator=(const DFAContentModel&);
// -----------------------------------------------------------------------
// Private helper methods
// -----------------------------------------------------------------------
void buildDFA(ContentSpecNode* const curNode);
CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
void calcFollowList(CMNode* const curNode);
unsigned int* makeDefStateList() const;
int postTreeBuildInit
(
CMNode* const nodeCur
, const unsigned int curIndex
);
// -----------------------------------------------------------------------
// Private data members
//
// fElemMap
// fElemMapSize
// This is the map of unique input symbol elements to indices into
// each state's per-input symbol transition table entry. This is part
// of the built DFA information that must be kept around to do the
// actual validation.
//
// fElemMapType
// This is a map of whether the element map contains information
// related to ANY models.
//
// fEmptyOk
// This is an optimization. While building the transition table we
// can see whether this content model would approve of an empty
// content (which could happen if everything was optional.) So we
// set this flag and short circuit that check, which would otherwise
// be ugly and time consuming if we tried to determine it at each
// validation call.
//
// fEOCPos
// The NFA position of the special EOC (end of content) node. This
// is saved away since its used during the DFA build.
//
// fFinalStateFlags
// This is an array of booleans, one per state (there are
// fTransTableSize states in the DFA) that indicates whether that
// state is a final state.
//
// fFollowList
// The list of follow positions for each NFA position (i.e. for each
// non-epsilon leaf node.) This is only used during the building of
// the DFA, and is let go afterwards.
//
// fHeadNode
// This is the head node of our intermediate representation. It is
// only non-null during the building of the DFA (just so that it
// does not have to be passed all around.) Once the DFA is built,
// this is no longer required so its deleted.
//
// fLeafCount
// The count of leaf nodes. This is an important number that set some
// limits on the sizes of data structures in the DFA process.
//
// fLeafList
// An array of non-epsilon leaf nodes, which is used during the DFA
// build operation, then dropped. These are just references to nodes
// pointed to by fHeadNode, so we don't have to clean them up, just
// the actually leaf list array itself needs cleanup.
//
// fLeafListType
// Array mapping ANY types to the leaf list.
//
// fTransTable
// fTransTableSize
// This is the transition table that is the main by product of all
// of the effort here. It is an array of arrays of ints. The first
// dimension is the number of states we end up with in the DFA. The
// second dimensions is the number of unique elements in the content
// model (fElemMapSize). Each entry in the second dimension indicates
// the new state given that input for the first dimension's start
// state.
//
// The fElemMap array handles mapping from element indexes to
// positions in the second dimension of the transition table.
//
// fTransTableSize is the number of valid entries in the transition
// table, and in the other related tables such as fFinalStateFlags.
//
// fDTD
// Boolean to allow DTDs to validate even with namespace support.
//
// fIsMixed
// DFA ContentModel with mixed PCDATA.
// -----------------------------------------------------------------------
QName** fElemMap;
ContentSpecNode::NodeTypes *fElemMapType;
unsigned int fElemMapSize;
bool fEmptyOk;
unsigned int fEOCPos;
bool* fFinalStateFlags;
CMStateSet** fFollowList;
CMNode* fHeadNode;
unsigned int fLeafCount;
CMLeaf** fLeafList;
ContentSpecNode::NodeTypes *fLeafListType;
unsigned int** fTransTable;
unsigned int fTransTableSize;
bool fDTD;
bool fIsMixed;
ContentLeafNameTypeVector *fLeafNameTypeVector;
MemoryManager* fMemoryManager;
};
inline unsigned int
DFAContentModel::getNextState(const unsigned int currentState,
const unsigned int elementIndex) const {
if (currentState == XMLContentModel::gInvalidTrans) {
return XMLContentModel::gInvalidTrans;
}
if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
}
return fTransTable[currentState][elementIndex];
}
XERCES_CPP_NAMESPACE_END
#endif