| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * $Id: DTDValidator.cpp 568078 2007-08-21 11:43:25Z amassari $ |
| */ |
| |
| |
| // --------------------------------------------------------------------------- |
| // Includes |
| // --------------------------------------------------------------------------- |
| #include <xercesc/util/Janitor.hpp> |
| #include <xercesc/util/XMLUniDefs.hpp> |
| #include <xercesc/util/XMLUni.hpp> |
| #include <xercesc/internal/ReaderMgr.hpp> |
| #include <xercesc/internal/XMLScanner.hpp> |
| #include <xercesc/validators/DTD/DTDValidator.hpp> |
| |
| XERCES_CPP_NAMESPACE_BEGIN |
| |
| // --------------------------------------------------------------------------- |
| // DTDValidator: Constructors and Destructor |
| // --------------------------------------------------------------------------- |
| DTDValidator::DTDValidator(XMLErrorReporter* const errReporter) : |
| |
| XMLValidator(errReporter) |
| , fDTDGrammar(0) |
| { |
| reset(); |
| } |
| |
| DTDValidator::~DTDValidator() |
| { |
| } |
| |
| |
| // --------------------------------------------------------------------------- |
| // DTDValidator: Implementation of the XMLValidator interface |
| // --------------------------------------------------------------------------- |
| int DTDValidator::checkContent(XMLElementDecl* const elemDecl |
| , QName** const children |
| , const unsigned int childCount) |
| { |
| // |
| // Look up the element id in our element decl pool. This will get us |
| // the element decl in our own way of looking at them. |
| // |
| if (!elemDecl) |
| ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_InvalidElemId, getScanner()->getMemoryManager()); |
| |
| // |
| // Get the content spec type of this element. This will tell us what |
| // to do to validate it. |
| // |
| const DTDElementDecl::ModelTypes modelType = ((DTDElementDecl*) elemDecl)->getModelType(); |
| |
| if (modelType == DTDElementDecl::Empty) |
| { |
| // |
| // We can do this one here. It cannot have any children. If it does |
| // we return 0 as the index of the first bad child. |
| // |
| if (childCount) |
| return 0; |
| } |
| else if (modelType == DTDElementDecl::Any) |
| { |
| // We pass no judgement on this one, anything goes |
| } |
| else if ((modelType == DTDElementDecl::Mixed_Simple) |
| || (modelType == DTDElementDecl::Children)) |
| { |
| // Get the element's content model or fault it in |
| const XMLContentModel* elemCM = elemDecl->getContentModel(); |
| |
| // Ask it to validate and return its return |
| return elemCM->validateContent(children, childCount, getScanner()->getEmptyNamespaceId()); |
| } |
| else |
| { |
| ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::CM_UnknownCMType, getScanner()->getMemoryManager()); |
| } |
| |
| // Went ok, so return success |
| return -1; |
| } |
| |
| |
| void DTDValidator::faultInAttr(XMLAttr& toFill, const XMLAttDef& attDef) const |
| { |
| toFill.set(0, attDef.getFullName(), attDef.getValue(), attDef.getType()); |
| } |
| |
| void DTDValidator::reset() |
| { |
| } |
| |
| |
| bool DTDValidator::requiresNamespaces() const |
| { |
| // Namespaces are not supported for DTDs |
| return false; |
| } |
| |
| |
| void |
| DTDValidator::validateAttrValue(const XMLAttDef* attDef |
| , const XMLCh* const attrValue |
| , bool preValidation |
| , const XMLElementDecl*) |
| { |
| // |
| // Get quick refs to lost of of the stuff in the passed objects in |
| // order to simplify the code below, which will reference them very |
| // often. |
| // |
| const XMLAttDef::AttTypes type = attDef->getType(); |
| const XMLAttDef::DefAttTypes defType = attDef->getDefaultType(); |
| const XMLCh* const valueText = attDef->getValue(); |
| const XMLCh* const fullName = attDef->getFullName(); |
| const XMLCh* const enumList = attDef->getEnumeration(); |
| |
| // |
| // If the default type is fixed, then make sure the passed value maps |
| // to the fixed value. |
| // If during preContentValidation, the value we are validating is the fixed value itself |
| // so no need to compare. |
| // Only need to do this for regular attribute value validation |
| // |
| if (defType == XMLAttDef::Fixed && !preValidation) |
| { |
| if (!XMLString::equals(attrValue, valueText)) |
| emitError(XMLValid::NotSameAsFixedValue, fullName, attrValue, valueText); |
| } |
| |
| // |
| // If its a CDATA attribute, then we are done with any DTD level |
| // validation else do the rest. |
| // |
| if (type == XMLAttDef::CData) |
| return; |
| |
| |
| |
| // An empty string cannot be valid for any of the other types |
| if (!attrValue[0]) |
| { |
| emitError(XMLValid::InvalidEmptyAttValue, fullName); |
| return; |
| } |
| |
| // See whether we are doing multiple values or not |
| const bool multipleValues = |
| ( |
| (type == XMLAttDef::IDRefs) |
| || (type == XMLAttDef::Entities) |
| || (type == XMLAttDef::NmTokens) |
| || (type == XMLAttDef::Notation) |
| || (type == XMLAttDef::Enumeration) |
| ); |
| |
| // And whether we must check for a first name char |
| const bool firstNameChar = |
| ( |
| (type == XMLAttDef::ID) |
| || (type == XMLAttDef::IDRef) |
| || (type == XMLAttDef::IDRefs) |
| || (type == XMLAttDef::Entity) |
| || (type == XMLAttDef::Entities) |
| || (type == XMLAttDef::Notation) |
| ); |
| |
| // Whether it requires ref checking stuff |
| const bool isARefType |
| ( |
| (type == XMLAttDef::ID) |
| || (type == XMLAttDef::IDRef) |
| || (type == XMLAttDef::IDRefs) |
| ); |
| |
| // Some trigger flags to avoid issuing redundant errors and whatnot |
| bool alreadyCapped = false; |
| |
| // |
| // Make a copy of the text that we can mangle and get a pointer we can |
| // move through the value |
| // |
| |
| // Use a stack-based buffer, when possible... |
| XMLCh tempBuffer[100]; |
| |
| XMLCh* pszTmpVal = 0; |
| |
| ArrayJanitor<XMLCh> janTmpVal(0); |
| |
| if (XMLString::stringLen(attrValue) < sizeof(tempBuffer) / sizeof(tempBuffer[0])) |
| { |
| XMLString::copyString(tempBuffer, attrValue); |
| pszTmpVal = tempBuffer; |
| } |
| else |
| { |
| janTmpVal.reset(XMLString::replicate(attrValue, getScanner()->getMemoryManager()), getScanner()->getMemoryManager()); |
| pszTmpVal = janTmpVal.get(); |
| } |
| |
| XMLCh* valPtr = pszTmpVal; |
| |
| bool doNamespace = getScanner()->getDoNamespaces(); |
| |
| while (true) |
| { |
| // |
| // Make sure the first character is a valid first name char, i.e. |
| // if its a Name value. For NmToken values we don't treat the first |
| // char any differently. |
| // |
| if (firstNameChar) |
| { |
| // If its not, emit and error but try to keep going |
| if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr)) |
| emitError(XMLValid::AttrValNotName, valPtr, fullName); |
| valPtr++; |
| } |
| |
| // Make sure all the remaining chars are valid name chars |
| while (*valPtr) |
| { |
| // |
| // If we hit a whitespace, its either a break between two |
| // or more values, or an error if we have a single value. |
| // |
| // |
| // XML1.0-3rd |
| // |
| // [6] Names ::= Name (#x20 Name)* |
| // [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* |
| // |
| // only and only ONE #x20 is allowed to be the delimiter |
| // |
| if (*valPtr==chSpace) |
| { |
| if (!multipleValues) |
| { |
| emitError(XMLValid::NoMultipleValues, fullName); |
| return; |
| } |
| |
| break; |
| } |
| |
| // Now this attribute can be of type |
| // ID, IDREF, IDREFS, ENTITY, ENTITIES, NOTATION, NMTOKEN, NMTOKENS, ENUMERATION |
| // All these must be valid XMLName |
| // If namespace is enabled, colon is not allowed in the first 6 |
| |
| if (doNamespace && *valPtr == chColon && firstNameChar) |
| emitError(XMLValid::ColonNotValidWithNS); |
| |
| if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr)) |
| { |
| emitError(XMLValid::AttrValNotName, valPtr, fullName); |
| return; |
| } |
| valPtr++; |
| } |
| |
| // |
| // Cap it off at the current non-name char. If already capped, |
| // then remember this. |
| // |
| if (!(*valPtr)) |
| alreadyCapped = true; |
| *valPtr = 0; |
| |
| // |
| // If this type of attribute requires that we track reference |
| // stuff, then handle that. |
| // |
| if (isARefType) |
| { |
| if ((type == XMLAttDef::ID) |
| || (type == XMLAttDef::IDRef) |
| || (type == XMLAttDef::IDRefs)) |
| { |
| XMLRefInfo* find = getScanner()->getIDRefList()->get(pszTmpVal); |
| if (find) |
| { |
| if (find->getDeclared() && (type == XMLAttDef::ID)) |
| emitError(XMLValid::ReusedIDValue, pszTmpVal); |
| } |
| else |
| { |
| find = new (getScanner()->getMemoryManager()) XMLRefInfo |
| ( |
| pszTmpVal |
| , false |
| , false |
| , getScanner()->getMemoryManager() |
| ); |
| getScanner()->getIDRefList()->put((void*)find->getRefName(), find); |
| } |
| |
| // |
| // Mark it declared or used, which might be redundant in some cases |
| // but not worth checking |
| // |
| if (type == XMLAttDef::ID) |
| find->setDeclared(true); |
| else { |
| if (!preValidation) { |
| find->setUsed(true); |
| } |
| } |
| } |
| } |
| else if (!preValidation && ((type == XMLAttDef::Entity) || (type == XMLAttDef::Entities))) |
| { |
| // |
| // If its refering to a entity, then look up the name in the |
| // general entity pool. If not there, then its an error. If its |
| // not an external unparsed entity, then its an error. |
| // |
| // In case of pre-validation, the above errors should be ignored. |
| // |
| const XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(pszTmpVal); |
| if (decl) |
| { |
| if (!decl->isUnparsed()) |
| emitError(XMLValid::BadEntityRefAttr, pszTmpVal, fullName); |
| } |
| else |
| { |
| emitError |
| ( |
| XMLValid::UnknownEntityRefAttr |
| , fullName |
| , pszTmpVal |
| ); |
| } |
| } |
| else if ((type == XMLAttDef::Notation) || (type == XMLAttDef::Enumeration)) |
| { |
| // |
| // Make sure that this value maps to one of the enumeration or |
| // notation values in the enumList parameter. We don't have to |
| // look it up in the notation pool (if a notation) because we |
| // will look up the enumerated values themselves. If they are in |
| // the notation pool (after the DTD is parsed), then obviously |
| // this value will be legal since it matches one of them. |
| // |
| if (!XMLString::isInList(pszTmpVal, enumList)) |
| emitError(XMLValid::DoesNotMatchEnumList, pszTmpVal, fullName); |
| } |
| |
| // If not doing multiple values, then we are done |
| if (!multipleValues) |
| break; |
| |
| // |
| // If we are at the end, then break out now, else move up to the |
| // next char and update the base pointer. |
| // |
| if (alreadyCapped) |
| break; |
| |
| valPtr++; |
| pszTmpVal = valPtr; |
| } |
| |
| } |
| |
| void DTDValidator::preContentValidation(bool reuseGrammar, |
| bool validateDefAttr) |
| { |
| // |
| // Lets enumerate all of the elements in the element decl pool |
| // and put out an error for any that did not get declared. |
| // We also check all of the attributes as well. |
| // |
| NameIdPoolEnumerator<DTDElementDecl> elemEnum = fDTDGrammar->getElemEnumerator(); |
| fDTDGrammar->setValidated(true); |
| while (elemEnum.hasMoreElements()) |
| { |
| const DTDElementDecl& curElem = elemEnum.nextElement(); |
| const DTDElementDecl::CreateReasons reason = curElem.getCreateReason(); |
| |
| // |
| // See if this element decl was ever marked as declared. If |
| // not, then put out an error. In some cases its just |
| // a warning, such as being referenced in a content model. |
| // |
| if (reason != XMLElementDecl::Declared) |
| { |
| if (reason == XMLElementDecl::AttList) |
| { |
| getScanner()->emitError |
| ( |
| XMLErrs::UndeclaredElemInAttList |
| , curElem.getFullName() |
| ); |
| } |
| else if (reason == XMLElementDecl::AsRootElem) |
| { |
| // It's ok that the root element is not declared in the DTD |
| /* |
| emitError |
| ( |
| XMLValid::UndeclaredElemInDocType |
| , curElem.getFullName() |
| );*/ |
| } |
| else if (reason == XMLElementDecl::InContentModel) |
| { |
| getScanner()->emitError |
| ( |
| XMLErrs::UndeclaredElemInCM |
| , curElem.getFullName() |
| ); |
| } |
| else |
| { |
| #if defined(XERCES_DEBUG) |
| if(reuseGrammar && reason == XMLElementDecl::JustFaultIn){ |
| } |
| else |
| ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::DTD_UnknownCreateReason, getScanner()->getMemoryManager()); |
| #endif |
| } |
| } |
| |
| // |
| // Check all of the attributes of the current element. |
| // We check for: |
| // |
| // 1) Multiple ID attributes |
| // 2) That all of the default values of attributes are |
| // valid for their type. |
| // 3) That for any notation types, that their lists |
| // of possible values refer to declared notations. |
| // |
| // 4) XML1.0(3rd edition) |
| // |
| // Validity constraint: One Notation Per Element Type |
| // An element type MUST NOT have more than one NOTATION attribute specified. |
| // |
| // Validity constraint: No Notation on Empty Element |
| // For compatibility, an attribute of type NOTATION MUST NOT be declared on an element declared EMPTY. |
| // |
| // Validity constraint: No Duplicate Tokens |
| // The notation names in a single NotationType attribute declaration, as well as |
| // the NmTokens in a single Enumeration attribute declaration, MUST all be distinct. |
| // |
| |
| XMLAttDefList& attDefList = curElem.getAttDefList(); |
| bool seenId = false; |
| bool seenNOTATION = false; |
| bool elemEmpty = (curElem.getModelType() == DTDElementDecl::Empty); |
| |
| for(unsigned int i=0; i<attDefList.getAttDefCount(); i++) |
| { |
| const XMLAttDef& curAttDef = attDefList.getAttDef(i); |
| |
| if (curAttDef.getType() == XMLAttDef::ID) |
| { |
| if (seenId) |
| { |
| emitError |
| ( |
| XMLValid::MultipleIdAttrs |
| , curElem.getFullName() |
| ); |
| break; |
| } |
| |
| seenId = true; |
| } |
| else if (curAttDef.getType() == XMLAttDef::Notation) |
| { |
| if (seenNOTATION) |
| { |
| emitError |
| ( |
| XMLValid::ElemOneNotationAttr |
| , curElem.getFullName() |
| ); |
| |
| break; |
| } |
| |
| seenNOTATION = true; |
| |
| // no notation attribute on empty element |
| if (elemEmpty) |
| { |
| emitError |
| ( |
| XMLValid::EmptyElemNotationAttr |
| , curElem.getFullName() |
| , curAttDef.getFullName() |
| ); |
| |
| break; |
| } |
| |
| //go through enumeration list to check |
| // distinct |
| // notation declaration |
| if (curAttDef.getEnumeration()) |
| { |
| checkTokenList(curAttDef, true); |
| } |
| } |
| else if (curAttDef.getType() == XMLAttDef::Enumeration ) |
| { |
| //go through enumeration list to check |
| // distinct only |
| if (curAttDef.getEnumeration()) |
| { |
| checkTokenList(curAttDef, false); |
| } |
| } |
| |
| // If it has a default/fixed value, then validate it |
| if (validateDefAttr && curAttDef.getValue()) |
| { |
| validateAttrValue |
| ( |
| &curAttDef |
| , curAttDef.getValue() |
| , true |
| , &curElem |
| ); |
| } |
| } |
| } |
| |
| // |
| // And enumerate all of the general entities. If any of them |
| // reference a notation, then make sure the notation exists. |
| // |
| NameIdPoolEnumerator<DTDEntityDecl> entEnum = fDTDGrammar->getEntityEnumerator(); |
| while (entEnum.hasMoreElements()) |
| { |
| const DTDEntityDecl& curEntity = entEnum.nextElement(); |
| |
| if (!curEntity.getNotationName()) |
| continue; |
| |
| // It has a notation name, so look it up |
| if (!fDTDGrammar->getNotationDecl(curEntity.getNotationName())) |
| { |
| emitError |
| ( |
| XMLValid::NotationNotDeclared |
| , curEntity.getNotationName() |
| ); |
| } |
| } |
| } |
| |
| void DTDValidator::postParseValidation() |
| { |
| // |
| // At this time, there is nothing to do here. The scanner itself handles |
| // ID/IDREF validation, since that is the same no matter what kind of |
| // validator. |
| // |
| } |
| |
| // |
| // We need to verify that all of its possible values |
| // (in the enum list) |
| // is distinct and |
| // refer to valid notations if toValidateNotation is set on |
| // |
| void DTDValidator::checkTokenList(const XMLAttDef& curAttDef |
| , bool toValidateNotation) |
| { |
| |
| XMLCh* list = XMLString::replicate(curAttDef.getEnumeration(), getScanner()->getMemoryManager()); |
| ArrayJanitor<XMLCh> janList(list, getScanner()->getMemoryManager()); |
| |
| // |
| // Search forward for a space or a null. If a null, |
| // we are done. If a space, cap it and look it up. |
| // |
| bool breakFlag = false; |
| XMLCh* listPtr = list; |
| XMLCh* lastPtr = listPtr; |
| while (true) |
| { |
| while (*listPtr && (*listPtr != chSpace)) |
| listPtr++; |
| |
| // |
| // If at the end, indicate we need to break after |
| // this one. Else, cap it off here. |
| // |
| if (!*listPtr) |
| breakFlag = true; |
| else |
| *listPtr++ = chNull; |
| |
| //distinction check |
| //there should be no same token found in the remaining list |
| if (XMLString::isInList(lastPtr, listPtr)) |
| { |
| emitError |
| ( |
| XMLValid::AttrDupToken |
| , curAttDef.getFullName() |
| , lastPtr |
| ); |
| } |
| |
| if (toValidateNotation && !fDTDGrammar->getNotationDecl(lastPtr)) |
| { |
| emitError |
| ( |
| XMLValid::UnknownNotRefAttr |
| , curAttDef.getFullName() |
| , lastPtr |
| ); |
| } |
| |
| // Break out if we hit the end last time |
| if (breakFlag) |
| break; |
| |
| // Else move upwards and try again |
| lastPtr = listPtr; |
| } |
| } |
| |
| XERCES_CPP_NAMESPACE_END |