src/xercesc/validators/DTD/DTDScanner.cpp - platform/external/xerces-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * $Id: DTDScanner.cpp 568078 2007-08-21 11:43:25Z amassari $
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/util/BinMemInputStream.hpp>
 #include <xercesc/util/FlagJanitor.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
 #include <xercesc/util/UnexpectedEOFException.hpp>
 #include <xercesc/sax/InputSource.hpp>
 #include <xercesc/framework/XMLDocumentHandler.hpp>
 #include <xercesc/framework/XMLEntityHandler.hpp>
 #include <xercesc/framework/XMLValidator.hpp>
 #include <xercesc/internal/EndOfEntityException.hpp>
 #include <xercesc/internal/XMLScanner.hpp>
 #include <xercesc/validators/common/ContentSpecNode.hpp>
 #include <xercesc/validators/common/MixedContentModel.hpp>
 #include <xercesc/validators/DTD/DTDEntityDecl.hpp>
 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
 #include <xercesc/validators/DTD/DTDScanner.hpp>
 #include <xercesc/util/OutOfMemoryException.hpp>

 XERCES_CPP_NAMESPACE_BEGIN

 // ---------------------------------------------------------------------------
 //  Local methods
 // ---------------------------------------------------------------------------
 //
 //  This method automates the grunt work of looking at a char and see if its
 //  a repetition suffix. If so, it creates a new correct rep node and wraps
 //  the pass node in it. Otherwise, it returns the previous node.
 //
 static ContentSpecNode* makeRepNode(const XMLCh testCh,
                                     ContentSpecNode* const prevNode,
                                     MemoryManager* const manager)
 {
     if (testCh == chQuestion)
     {
         return new (manager) ContentSpecNode
         (
             ContentSpecNode::ZeroOrOne
             , prevNode
             , 0
             , true
             , true
             , manager
         );
     }
      else if (testCh == chPlus)
     {
         return new (manager) ContentSpecNode
         (
             ContentSpecNode::OneOrMore
             , prevNode
             , 0
             , true
             , true
             , manager
         );
     }
      else if (testCh == chAsterisk)
     {
         return new (manager) ContentSpecNode
         (
             ContentSpecNode::ZeroOrMore
             , prevNode
             , 0
             , true
             , true
             , manager
         );
     }

     // Just return the incoming node
     return prevNode;
 }

 // ---------------------------------------------------------------------------
 //  DTDValidator: Constructors and Destructor
 // ---------------------------------------------------------------------------
 DTDScanner::DTDScanner( DTDGrammar*           dtdGrammar
                       , DocTypeHandler* const docTypeHandler
                       , MemoryManager* const  grammarPoolMemoryManager
                       , MemoryManager* const  manager) :
     fMemoryManager(manager)
     , fGrammarPoolMemoryManager(grammarPoolMemoryManager)
     , fDocTypeHandler(docTypeHandler)
     , fDumAttDef(0)
     , fDumElemDecl(0)
     , fDumEntityDecl(0)
     , fInternalSubset(false)
     , fNextAttrId(1)
     , fDTDGrammar(dtdGrammar)
     , fBufMgr(0)
     , fReaderMgr(0)
     , fScanner(0)
     , fPEntityDeclPool(0)
     , fEmptyNamespaceId(0)
     , fDocTypeReaderId(0)
 {
     fPEntityDeclPool = new (fMemoryManager) NameIdPool<DTDEntityDecl>(109, 128, fMemoryManager);
 }

 DTDScanner::~DTDScanner()
 {
     delete fDumAttDef;
     delete fDumElemDecl;
     delete fDumEntityDecl;
     delete fPEntityDeclPool;
 }

 // -----------------------------------------------------------------------
 //  Setter methods
 // -----------------------------------------------------------------------
 void DTDScanner::setScannerInfo(XMLScanner* const      owningScanner
                             , ReaderMgr* const      readerMgr
                             , XMLBufferMgr* const   bufMgr)
 {
     // We don't own any of these, we just reference them
     fScanner = owningScanner;
     fReaderMgr = readerMgr;
     fBufMgr = bufMgr;

     if (fScanner->getDoNamespaces())
         fEmptyNamespaceId = fScanner->getEmptyNamespaceId();
     else
         fEmptyNamespaceId = 0;

     fDocTypeReaderId = fReaderMgr->getCurrentReaderNum();
 }


 // ---------------------------------------------------------------------------
 //  DTDScanner: Private scanning methods
 // ---------------------------------------------------------------------------
 bool DTDScanner::checkForPERef(   const bool    inLiteral
                                 , const bool    inMarkup)
 {
     bool gotSpace = false;

     //
     //  See if we have any spaces up front. If so, then skip them and set
     //  the gotSpaces flag.
     //
     if (fReaderMgr->skippedSpace())
     {
         fReaderMgr->skipPastSpaces();
         gotSpace = true;
     }

     // If the next char is a percent, then expand the PERef
     if (!fReaderMgr->skippedChar(chPercent))
        return gotSpace;

     while (true)
     {
        if (!expandPERef(false, inLiteral, inMarkup, false))
           fScanner->emitError(XMLErrs::ExpectedEntityRefName);
        // And skip any more spaces in the expanded value
        if (fReaderMgr->skippedSpace())
        {
           fReaderMgr->skipPastSpaces();
           gotSpace = true;
        }
        if (!fReaderMgr->skippedChar(chPercent))
           break;
     }
     return gotSpace;
 }


 bool DTDScanner::expandPERef( const   bool    scanExternal
                                 , const bool    inLiteral
                                 , const bool    inMarkup
                                 , const bool    throwEndOfExt)
 {
     fScanner->setHasNoDTD(false);
     XMLBufBid bbName(fBufMgr);

     //
     //  If we are in the internal subset and in markup, then this is
     //  an error but we go ahead and do it anyway.
     //
     if (fInternalSubset && inMarkup)
         fScanner->emitError(XMLErrs::PERefInMarkupInIntSubset);

     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedPEName);

         // Skip the semicolon if that's what we ended up on
         fReaderMgr->skippedChar(chSemiColon);
         return false;
     }

     // If no terminating semicolon, emit an error but try to keep going
     if (!fReaderMgr->skippedChar(chSemiColon))
         fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());

     //
     //  Look it up in the PE decl pool and see if it exists. If not, just
     //  emit an error and continue.
     //
     XMLEntityDecl* decl = fPEntityDeclPool->getByKey(bbName.getRawBuffer());
     if (!decl)
     {
         // XML 1.0 Section 4.1
         if (fScanner->getStandalone()) {
             // no need to check fScanner->fHasNoDTD which is for sure false
             // since we are in expandPERef already
             fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
         }
         else {
             if (fScanner->getDoValidation())
                 fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
         }

         return false;
     }

     //
     // XML 1.0 Section 2.9
     //  If we are a standalone document, then it has to have been declared
     //  in the internal subset. Keep going though.
     //
     if (fScanner->getDoValidation() && fScanner->getStandalone() && !decl->getDeclaredInIntSubset())
         fScanner->getValidator()->emitError(XMLValid::VC_IllegalRefInStandalone, bbName.getRawBuffer());

     //
     //  Okee dokee, we found it. So create either a memory stream with
     //  the entity value contents, or a file stream if its an external
     //  entity.
     //
     if (decl->isExternal())
     {
         // And now create a reader to read this entity
         InputSource* srcUsed;
         XMLReader* reader = fReaderMgr->createReader
         (
             decl->getBaseURI()
             , decl->getSystemId()
             , decl->getPublicId()
             , false
             , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_PE
             , XMLReader::Source_External
             , srcUsed
             , fScanner->getCalculateSrcOfs()
             , fScanner->getDisableDefaultEntityResolution()
         );

         // Put a janitor on the source so its cleaned up on exit
         Janitor<InputSource> janSrc(srcUsed);

         // If the creation failed then throw an exception
         if (!reader)
             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager);

         // Set the 'throw at end' flag, to the one we were given
         reader->setThrowAtEnd(throwEndOfExt);

         //
         //  Push the reader. If its a recursive expansion, then emit an error
         //  and return an failure.
         //
         if (!fReaderMgr->pushReader(reader, decl))
         {
             fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
             return false;
         }

         //
         //  If the caller wants us to scan the external entity, then lets
         //  do that now.
         //
         if (scanExternal)
         {
             XMLEntityHandler* entHandler = fScanner->getEntityHandler();

             // If we have an entity handler, tell it we are starting this entity
             if (entHandler)
                 entHandler->startInputSource(*srcUsed);

             //
             //  Scan the external entity now. The parameter tells it that
             //  it is not in an include section. Get the current reader
             //  level so we can catch partial markup errors and be sure
             //  to get back to here if we get an exception out of the
             //  ext subset scan.
             //
             const unsigned int readerNum = fReaderMgr->getCurrentReaderNum();
             try
             {
                 scanExtSubsetDecl(false, false);
             }
             catch(const OutOfMemoryException&)
             {
                 throw;
             }
             catch(...)
             {
                 // Pop the reader back to the original level
                 fReaderMgr->cleanStackBackTo(readerNum);

                 // End the input source, even though its not happy
                 if (entHandler)
                     entHandler->endInputSource(*srcUsed);
                 throw;
             }

             // If we have an entity handler, tell it we are ending this entity
             if (entHandler)
                 entHandler->endInputSource(*srcUsed);
         }
         else {
             // If it starts with the XML string, then parse a text decl
             if (fScanner->checkXMLDecl(true))
                 scanTextDecl();
         }
     }
      else
     {
         // Create a reader over a memory stream over the entity value
         XMLReader* valueReader = fReaderMgr->createIntEntReader
         (
             decl->getName()
             , inLiteral ? XMLReader::RefFrom_Literal : XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_PE
             , decl->getValue()
             , decl->getValueLen()
             , false
         );

         //
         //  Trt to push the entity reader onto the reader manager stack,
         //  where it will become the subsequent input. If it fails, that
         //  means the entity is recursive, so issue an error. The reader
         //  will have just been discarded, but we just keep going.
         //
         if (!fReaderMgr->pushReader(valueReader, decl))
             fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
     }

     return true;
 }


 bool DTDScanner::getQuotedString(XMLBuffer& toFill)
 {
     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr->skipIfQuote(quoteCh))
         return false;

     while (true)
     {
         // Get another char
         const XMLCh nextCh = fReaderMgr->getNextChar();

         // See if it matches the starting quote char
         if (nextCh == quoteCh)
             break;

         //
         //  We should never get either an end of file null char here. If we
         //  do, just fail. It will be handled more gracefully in the higher
         //  level code that called us.
         //
         if (!nextCh)
             return false;

         // Else add it to the buffer
         toFill.append(nextCh);
     }
     return true;
 }


 XMLAttDef*
 DTDScanner::scanAttDef(DTDElementDecl& parentElem, XMLBuffer& bufToUse)
 {
     // Check for PE ref or optional whitespace
     checkForPERef(false, true);

     // Get the name of the attribute
     if (!fReaderMgr->getName(bufToUse))
     {
         fScanner->emitError(XMLErrs::ExpectedAttrName);
         return 0;
     }

     //
     //  Look up this attribute in the parent element's attribute list. If
     //  it already exists, then use the dummy.
     //
     DTDAttDef* decl = parentElem.getAttDef(bufToUse.getRawBuffer());
     if (decl)
     {
         // It already exists, so put out a warning
         fScanner->emitError
         (
             XMLErrs::AttListAlreadyExists
             , bufToUse.getRawBuffer()
             , parentElem.getFullName()
         );

         // Use the dummy decl to parse into and set its name to the name we got
         if (!fDumAttDef)
         {
             fDumAttDef = new (fMemoryManager) DTDAttDef(fMemoryManager);
             fDumAttDef->setId(fNextAttrId++);
         }
         fDumAttDef->setName(bufToUse.getRawBuffer());
         decl = fDumAttDef;
     }
      else
     {
         //
         //  It does not already exist so create a new one, give it the next
         //  available unique id, and add it
         //
         decl = new (fGrammarPoolMemoryManager) DTDAttDef
         (
             bufToUse.getRawBuffer()
             , XMLAttDef::CData
             , XMLAttDef::Implied
             , fGrammarPoolMemoryManager
         );
         decl->setId(fNextAttrId++);
         decl->setExternalAttDeclaration(isReadingExternalEntity());
         parentElem.addAttDef(decl);
     }

     // Set a flag to indicate whether we are doing a dummy parse
     const bool isIgnored = (decl == fDumAttDef);

     // Space is required here, so check for PE ref, and require space
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     //
     //  Next has to be one of the attribute type strings. This tells us what
     //  is to follow.
     //
     if (fReaderMgr->skippedString(XMLUni::fgCDATAString))
     {
         decl->setType(XMLAttDef::CData);
     }
      else if (fReaderMgr->skippedString(XMLUni::fgIDString))
     {
         if (!fReaderMgr->skippedString(XMLUni::fgRefString))
             decl->setType(XMLAttDef::ID);
         else if (!fReaderMgr->skippedChar(chLatin_S))
             decl->setType(XMLAttDef::IDRef);
         else
             decl->setType(XMLAttDef::IDRefs);
     }
      else if (fReaderMgr->skippedString(XMLUni::fgEntitString))
     {
         if (fReaderMgr->skippedChar(chLatin_Y))
         {
             decl->setType(XMLAttDef::Entity);
         }
          else if (fReaderMgr->skippedString(XMLUni::fgIESString))
         {
             decl->setType(XMLAttDef::Entities);
         }
          else
         {
             fScanner->emitError
             (
                 XMLErrs::ExpectedAttributeType
                 , decl->getFullName()
                 , parentElem.getFullName()
             );
             return 0;
         }
     }
      else if (fReaderMgr->skippedString(XMLUni::fgNmTokenString))
     {
         if (fReaderMgr->skippedChar(chLatin_S))
             decl->setType(XMLAttDef::NmTokens);
         else
             decl->setType(XMLAttDef::NmToken);
     }
      else if (fReaderMgr->skippedString(XMLUni::fgNotationString))
     {
         // Check for PE ref and require space
         if (!checkForPERef(false, true))
             fScanner->emitError(XMLErrs::ExpectedWhitespace);

         decl->setType(XMLAttDef::Notation);
         if (!scanEnumeration(*decl, bufToUse, true))
             return 0;

         // Set the value as the enumeration for this decl
         decl->setEnumeration(bufToUse.getRawBuffer());
     }
      else if (fReaderMgr->skippedChar(chOpenParen))
     {
         decl->setType(XMLAttDef::Enumeration);
         if (!scanEnumeration(*decl, bufToUse, false))
             return 0;

         // Set the value as the enumeration for this decl
         decl->setEnumeration(bufToUse.getRawBuffer());
     }
      else
     {
         fScanner->emitError
         (
             XMLErrs::ExpectedAttributeType
             , decl->getFullName()
             , parentElem.getFullName()
         );
         return 0;
     }

     // Space is required here, so check for PE ref, and require space
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // And then scan for the optional default value declaration
     scanDefaultDecl(*decl);

     // If validating, then do a couple of validation constraints
     if (fScanner->getDoValidation())
     {
         if (decl->getType() == XMLAttDef::ID)
         {
             if ((decl->getDefaultType() != XMLAttDef::Implied)
             &&  (decl->getDefaultType() != XMLAttDef::Required))
             {
                 fScanner->getValidator()->emitError(XMLValid::BadIDAttrDefType, decl->getFullName());
             }
         }

         // if attdef is xml:space, check correct enumeration (default|preserve)
         const XMLCh fgXMLSpace[] = { chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_s, chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull };

         if (XMLString::equals(decl->getFullName(),fgXMLSpace)) {
             const XMLCh fgPreserve[] = { chLatin_p, chLatin_r, chLatin_e, chLatin_s, chLatin_e, chLatin_r, chLatin_v, chLatin_e, chNull };
             const XMLCh fgDefault[] = { chLatin_d, chLatin_e, chLatin_f, chLatin_a, chLatin_u, chLatin_l, chLatin_t, chNull };
             bool ok = false;
             if (decl->getType() == XMLAttDef::Enumeration) {
                 BaseRefVectorOf<XMLCh>* enumVector = XMLString::tokenizeString(decl->getEnumeration(), fMemoryManager);
                 int size = enumVector->size();
                 ok = (size == 1 &&
                      (XMLString::equals(enumVector->elementAt(0), fgDefault) ||
                       XMLString::equals(enumVector->elementAt(0), fgPreserve))) ||
                      (size == 2 &&
                      (XMLString::equals(enumVector->elementAt(0), fgDefault) &&
                       XMLString::equals(enumVector->elementAt(1), fgPreserve))) ||
                      (size == 2 &&
                      (XMLString::equals(enumVector->elementAt(1), fgDefault) &&
                       XMLString::equals(enumVector->elementAt(0), fgPreserve)));
                 delete enumVector;
             }
             if (!ok)
                 fScanner->getValidator()->emitError(XMLValid::IllegalXMLSpace);
         }
     }

     // If we have a doc type handler, tell it about this attdef.
     if (fDocTypeHandler)
         fDocTypeHandler->attDef(parentElem, *decl, isIgnored);
     return decl;
 }


 void DTDScanner::scanAttListDecl()
 {
     // Space is required here, so check for a PE ref
     if (!checkForPERef(false, true))
     {
         fScanner->emitError(XMLErrs::ExpectedWhitespace);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     //
     //  Next should be the name of the element it belongs to, so get a buffer
     //  and get the name into it.
     //
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedElementName);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     //
     //  Find this element's declaration. If it has not been declared yet,
     //  we will force one into the list, but not mark it as declared.
     //
     DTDElementDecl* elemDecl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
     if (!elemDecl)
     {
         //
         //  Lets fault in a declaration and add it to the pool. We mark
         //  it having been created because of an attlist. Later, if its
         //  declared, this will be updated.
         //
         elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
         (
             bbName.getRawBuffer()
             , fEmptyNamespaceId
             , DTDElementDecl::Any
             , fGrammarPoolMemoryManager
         );
         elemDecl->setCreateReason(XMLElementDecl::AttList);
         elemDecl->setExternalElemDeclaration(isReadingExternalEntity());
         fDTDGrammar->putElemDecl((XMLElementDecl*) elemDecl);
     }

     // If we have a doc type handler, tell it the att list is starting
     if (fDocTypeHandler)
         fDocTypeHandler->startAttList(*elemDecl);

     //
     //  Now we loop until we are done with all of the attributes in this
     //  list. We need a buffer to use for local processing.
     //
     XMLBufBid   bbTmp(fBufMgr);
     XMLBuffer&  tmpBuf = bbTmp.getBuffer();
     bool        seenAnId = false;
     while (true)
     {
         // Get the next char out and see what it tells us to do
         const XMLCh nextCh = fReaderMgr->peekNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         if (nextCh == chCloseAngle)
         {
             // We are done with this attribute list
             fReaderMgr->getNextChar();
             break;
         }
          else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
         {
             //
             //  If advanced callbacks are enabled and we have a doc
             //  type handler, then gather up the white space and call
             //  back on the doctype handler. Otherwise, just skip
             //  whitespace.
             //
             if (fDocTypeHandler)
             {
                 fReaderMgr->getSpaces(tmpBuf);
                 fDocTypeHandler->doctypeWhitespace
                 (
                     tmpBuf.getRawBuffer()
                     , tmpBuf.getLen()
                 );
             }
              else
             {
                 fReaderMgr->skipPastSpaces();
             }
         }
          else if (nextCh == chPercent)
         {
             // Eat the percent and expand the ref
             fReaderMgr->getNextChar();
             expandPERef(false, false, true);
         }
          else
         {
             //
             //  It must be an attribute name, so scan it. We let
             //  it use our local buffer for its name scanning.
             //
             XMLAttDef* attDef = scanAttDef(*elemDecl, tmpBuf);

             if (!attDef)
             {
                 fReaderMgr->skipPastChar(chCloseAngle);
                 break;
             }

             //
             //  If we are validating and its an ID type, then we have to
             //  make sure that we have not seen an id attribute yet. Set
             //  the flag to say that we've seen one now also.
             //
             if (fScanner->getDoValidation())
             {
                 if (attDef->getType() == XMLAttDef::ID)
                 {
                     if (seenAnId)
                         fScanner->getValidator()->emitError(XMLValid::MultipleIdAttrs, elemDecl->getFullName());
                     seenAnId = true;
                 }
             }
         }
     }

     // If we have a doc type handler, tell it the att list is ending
     if (fDocTypeHandler)
         fDocTypeHandler->endAttList(*elemDecl);
 }


 //
 //  This method is called to scan the value of an attribute in content. This
 //  involves some normalization and replacement of general entity and
 //  character references.
 //
 //  End of entity's must be dealt with here. During DTD scan, they can come
 //  from external entities. During content, they can come from any entity.
 //  We just eat the end of entity and continue with our scan until we come
 //  to the closing quote. If an unterminated value causes us to go through
 //  subsequent entities, that will cause errors back in the calling code,
 //  but there's little we can do about it here.
 //
 bool DTDScanner::scanAttValue(const   XMLCh* const        attrName
                                 ,       XMLBuffer&          toFill
                                 , const XMLAttDef::AttTypes type)
 {
     enum States
     {
         InWhitespace
         , InContent
     };

     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr->skipIfQuote(quoteCh))
         return false;

     //
     //  We have to get the current reader because we have to ignore closing
     //  quotes until we hit the same reader again.
     //
     const unsigned int curReader = fReaderMgr->getCurrentReaderNum();

     //
     //  Loop until we get the attribute value. Note that we use a double
     //  loop here to avoid the setup/teardown overhead of the exception
     //  handler on every round.
     //
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     States  curState = InContent;
     bool    firstNonWS = false;
     bool    gotLeadingSurrogate = false;
     bool    escaped;
     while (true)
     {
     try
     {
         while(true)
         {
             nextCh = fReaderMgr->getNextChar();

             if (!nextCh)
                 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

             // Check for our ending quote in the same entity
             if (nextCh == quoteCh)
             {
                 if (curReader == fReaderMgr->getCurrentReaderNum())
                     return true;

                 // Watch for spillover into a previous entity
                 if (curReader > fReaderMgr->getCurrentReaderNum())
                 {
                     fScanner->emitError(XMLErrs::PartialMarkupInEntity);
                     return false;
                 }
             }

             //
             //  Check for an entity ref now, before we let it affect our
             //  whitespace normalization logic below. We ignore the empty flag
             //  in this one.
             //
             escaped = false;
             if (nextCh == chAmpersand)
             {
                 if (scanEntityRef(nextCh, secondCh, escaped) != EntityExp_Returned)
                 {
                     gotLeadingSurrogate = false;
                     continue;
                 }
             }
             else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 // Check for correct surrogate pairs
                 if (gotLeadingSurrogate)
                     fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                 else
                     gotLeadingSurrogate = true;
             }
              else
             {
                 if (gotLeadingSurrogate)
                 {
                     if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                         fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                 }
                 // Its got to at least be a valid XML character
                 else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
                 {
                     XMLCh tmpBuf[9];
                     XMLString::binToText
                     (
                         nextCh
                         , tmpBuf
                         , 8
                         , 16
                         , fMemoryManager
                     );
                     fScanner->emitError
                     (
                         XMLErrs::InvalidCharacterInAttrValue
                         , attrName
                         , tmpBuf
                     );
                 }

                 gotLeadingSurrogate = false;
             }

             //
             //  If its not escaped, then make sure its not a < character, which
             //  is not allowed in attribute values.
             //
             if (!escaped && (nextCh == chOpenAngle))
                 fScanner->emitError(XMLErrs::BracketInAttrValue, attrName);

             //
             //  If the attribute is a CDATA type we do simple replacement of
             //  tabs and new lines with spaces, if the character is not escaped
             //  by way of a char ref.
             //
             //  Otherwise, we do the standard non-CDATA normalization of
             //  compressing whitespace to single spaces and getting rid of
             //  leading and trailing whitespace.
             //
             if (type == XMLAttDef::CData)
             {
                 if (!escaped)
                 {
                     if ((nextCh == 0x09) || (nextCh == 0x0A) || (nextCh == 0x0D))
                         nextCh = chSpace;
                 }
             }
              else
             {
                 if (curState == InWhitespace)
                 {
                     if (!fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
                     {
                         if (firstNonWS)
                             toFill.append(chSpace);
                         curState = InContent;
                         firstNonWS = true;
                     }
                      else
                     {
                         continue;
                     }
                 }
                  else if (curState == InContent)
                 {
                     if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
                     {
                         curState = InWhitespace;
                         continue;
                     }
                     firstNonWS = true;
                 }
             }

             // Else add it to the buffer
             toFill.append(nextCh);

             if (secondCh)
             {
                 toFill.append(secondCh);
                 secondCh=0;
             }
         }
     }

     catch(const EndOfEntityException&)
     {
         // Just eat it and continue.
         gotLeadingSurrogate = false;
         escaped = false;
     }
     }
     return true;
 }


 bool DTDScanner::scanCharRef(XMLCh& first, XMLCh& second)
 {
     bool gotOne = false;
     unsigned int value = 0;

     //
     //  Set the radix. Its supposed to be a lower case x if hex. But, in
     //  order to recover well, we check for an upper and put out an error
     //  for that.
     //
     unsigned int radix = 10;

     if (fReaderMgr->skippedChar(chLatin_x))
     {
         radix = 16;
     }
      else if (fReaderMgr->skippedChar(chLatin_X))
     {
         fScanner->emitError(XMLErrs::HexRadixMustBeLowerCase);
         radix = 16;
     }

     while (true)
     {
         const XMLCh nextCh = fReaderMgr->peekNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         // Break out on the terminating semicolon
         if (nextCh == chSemiColon)
         {
             fReaderMgr->getNextChar();
             break;
         }

         //
         //  Convert this char to a binary value, or bail out if its not
         //  one.
         //
         unsigned int nextVal;
         if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
             nextVal = (unsigned int)(nextCh - chDigit_0);
         else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
             nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
         else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
             nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
         else
         {
             //
             //  If we got at least a sigit, then do an unterminated ref
             //  error. Else, do an expected a numerical ref thing.
             //
             if (gotOne)
                 fScanner->emitError(XMLErrs::UnterminatedCharRef);
             else
                 fScanner->emitError(XMLErrs::ExpectedNumericalCharRef);

             return false;
         }

         //
         //  Make sure its valid for the radix. If not, then just eat the
         //  digit and go on after issueing an error. Else, update the
         //  running value with this new digit.
         //
         if (nextVal >= radix)
         {
             XMLCh tmpStr[2];
             tmpStr[0] = nextCh;
             tmpStr[1] = chNull;
             fScanner->emitError(XMLErrs::BadDigitForRadix, tmpStr);
         }
          else
         {
             value = (value * radix) + nextVal;
         }

         // Indicate that we got at least one good digit
         gotOne = true;

         // Eat the char we just processed
         fReaderMgr->getNextChar();
     }

     // Return the char (or chars)
     // And check if the character expanded is valid or not
     if (value >= 0x10000 && value <= 0x10FFFF)
     {
         value -= 0x10000;
         first  = XMLCh((value >> 10) + 0xD800);
         second = XMLCh((value & 0x3FF) + 0xDC00);
     }
     else if (value <= 0xFFFD)
     {
         first  = XMLCh(value);
         second = 0;
         if (!fReaderMgr->getCurrentReader()->isXMLChar(first) && !fReaderMgr->getCurrentReader()->isControlChar(first)) {
             // Character reference was not in the valid range
             fScanner->emitError(XMLErrs::InvalidCharacterRef);
             return false;
         }
     }
     else {
         // Character reference was not in the valid range
         fScanner->emitError(XMLErrs::InvalidCharacterRef);
         return false;
     }

     return true;
 }


 ContentSpecNode*
 DTDScanner::scanChildren(const DTDElementDecl& elemDecl, XMLBuffer& bufToUse)
 {
     // Check for a PE ref here, but don't require spaces
     checkForPERef(false, true);

     // We have to check entity nesting here
     unsigned int curReader;

     //
     //  We know that the caller just saw an opening parenthesis, so we need
     //  to parse until we hit the end of it, recursing for other nested
     //  parentheses we see.
     //
     //  We have to check for one up front, since it could be something like
     //  (((a)*)) etc...
     //
     ContentSpecNode* curNode = 0;
     if (fReaderMgr->skippedChar(chOpenParen))
     {
         curReader = fReaderMgr->getCurrentReaderNum();

         // Lets call ourself and get back the resulting node
         curNode = scanChildren(elemDecl, bufToUse);

         // If that failed, no need to go further, return failure
         if (!curNode)
             return 0;

         if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
             fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
     }
      else
     {
         // Not a nested paren, so it must be a leaf node
         if (!fReaderMgr->getName(bufToUse))
         {
             fScanner->emitError(XMLErrs::ExpectedElementName);
             return 0;
         }

         //
         //  Create a leaf node for it. If we can find the element id for
         //  this element, then use it. Else, we have to fault in an element
         //  decl, marked as created because of being in a content model.
         //
         XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
         if (!decl)
         {
             decl = new (fGrammarPoolMemoryManager) DTDElementDecl
             (
                 bufToUse.getRawBuffer()
                 , fEmptyNamespaceId
                 , DTDElementDecl::Any
                 , fGrammarPoolMemoryManager
             );
             decl->setCreateReason(XMLElementDecl::InContentModel);
             decl->setExternalElemDeclaration(isReadingExternalEntity());
             fDTDGrammar->putElemDecl(decl);
         }
         curNode = new (fGrammarPoolMemoryManager) ContentSpecNode
         (
             decl->getElementName()
             , fGrammarPoolMemoryManager
         );

         // Check for a PE ref here, but don't require spaces
         const bool gotSpaces = checkForPERef(false, true);

         // Check for a repetition character after the leaf
         const XMLCh repCh = fReaderMgr->peekNextChar();
         ContentSpecNode* tmpNode = makeRepNode(repCh, curNode, fGrammarPoolMemoryManager);
         if (tmpNode != curNode)
         {
             if (gotSpaces)
             {
                 if (fScanner->emitErrorWillThrowException(XMLErrs::UnexpectedWhitespace))
                 {
                     delete tmpNode;
                 }
                 fScanner->emitError(XMLErrs::UnexpectedWhitespace);
             }
             fReaderMgr->getNextChar();
             curNode = tmpNode;
         }
     }

     // Check for a PE ref here, but don't require spaces
     checkForPERef(false, true);

     //
     //  Ok, the next character tells us what kind of content this particular
     //  model this particular parentesized section is. Its either a choice if
     //  we see ',', a sequence if we see '|', or a single leaf node if we see
     //  a closing paren.
     //
     const XMLCh opCh = fReaderMgr->peekNextChar();

     if ((opCh != chComma)
     &&  (opCh != chPipe)
     &&  (opCh != chCloseParen))
     {
         // Not a legal char, so delete our node and return failure
         delete curNode;
         fScanner->emitError(XMLErrs::ExpectedSeqChoiceLeaf);
         return 0;
     }

     //
     //  Create the head node of the correct type. We need this to remember
     //  the top of the local tree. If it was a single subexpr, then just
     //  set the head node to the current node. For the others, we'll build
     //  the tree off the second child as we move across.
     //
     ContentSpecNode* headNode = 0;
     ContentSpecNode::NodeTypes curType = ContentSpecNode::UnknownType;
     if (opCh == chComma)
     {
         curType = ContentSpecNode::Sequence;
         headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
         (
             curType
             , curNode
             , 0
             , true
             , true
             , fGrammarPoolMemoryManager
         );
         curNode = headNode;
     }
      else if (opCh == chPipe)
     {
         curType = ContentSpecNode::Choice;
         headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
         (
             curType
             , curNode
             , 0
             , true
             , true
             , fGrammarPoolMemoryManager
         );
         curNode = headNode;
     }
      else
     {
         headNode = curNode;
         fReaderMgr->getNextChar();
     }

     //
     //  If it was a sequence or choice, we just loop until we get to the
     //  end of our section, adding each new leaf or sub expression to the
     //  right child of the current node, and making that new node the current
     //  node.
     //
     if ((opCh == chComma) || (opCh == chPipe))
     {
         ContentSpecNode* lastNode = 0;
         while (true)
         {
             //
             //  The next thing must either be another | or , character followed
             //  by another leaf or subexpression, or a closing parenthesis, or a
             //  PE ref.
             //
             if (fReaderMgr->lookingAtChar(chPercent))
             {
                 checkForPERef(false, true);
             }
              else if (fReaderMgr->skippedSpace())
             {
                 // Just skip whitespace
                 fReaderMgr->skipPastSpaces();
             }
              else if (fReaderMgr->skippedChar(chCloseParen))
             {
                 //
                 //  We've hit the end of this section, so break out. But, we
                 //  need to see if we left a partial sequence of choice node
                 //  without a second node. If so, we have to undo that and
                 //  put its left child into the right node of the previous
                 //  node.
                 //
                 if ((curNode->getType() == ContentSpecNode::Choice)
                 ||  (curNode->getType() == ContentSpecNode::Sequence))
                 {
                     if (!curNode->getSecond())
                     {
                         ContentSpecNode* saveFirst = curNode->orphanFirst();
                         lastNode->setSecond(saveFirst);
                         curNode = lastNode;
                     }
                 }
                 break;
             }
              else if (fReaderMgr->skippedChar(opCh))
             {
                 // Check for a PE ref here, but don't require spaces
                 checkForPERef(false, true);

                 if (fReaderMgr->skippedChar(chOpenParen))
                 {
                     curReader = fReaderMgr->getCurrentReaderNum();

                     // Recurse to handle this new guy
                     ContentSpecNode* subNode;
                     try {
                         subNode = scanChildren(elemDecl, bufToUse);
                     }
                     catch (const XMLErrs::Codes)
                     {
                         delete headNode;
                         throw;
                     }

                     // If it failed, we are done, clean up here and return failure
                     if (!subNode)
                     {
                         delete headNode;
                         return 0;
                     }

                     if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
                         fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);

                     // Else patch it in and make it the new current
                     ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
                     (
                         curType
                         , subNode
                         , 0
                         , true
                         , true
                         , fGrammarPoolMemoryManager
                     );
                     curNode->setSecond(newCur);
                     lastNode = curNode;
                     curNode = newCur;
                 }
                  else
                 {
                     //
                     //  Got to be a leaf node, so get a name. If we cannot get
                     //  one, then clean up and get outa here.
                     //
                     if (!fReaderMgr->getName(bufToUse))
                     {
                         delete headNode;
                         fScanner->emitError(XMLErrs::ExpectedElementName);
                         return 0;
                     }

                     //
                     //  Create a leaf node for it. If we can find the element
                     //  id for this element, then use it. Else, we have to
                     //  fault in an element decl, marked as created because
                     //  of being in a content model.
                     //
                     XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bufToUse.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
                     if (!decl)
                     {
                         decl = new (fGrammarPoolMemoryManager) DTDElementDecl
                         (
                             bufToUse.getRawBuffer()
                             , fEmptyNamespaceId
                             , DTDElementDecl::Any
                             , fGrammarPoolMemoryManager
                         );
                         decl->setCreateReason(XMLElementDecl::InContentModel);
                         decl->setExternalElemDeclaration(isReadingExternalEntity());
                         fDTDGrammar->putElemDecl(decl);
                     }

                     ContentSpecNode* tmpLeaf = new (fGrammarPoolMemoryManager) ContentSpecNode
                     (
                         decl->getElementName()
                         , fGrammarPoolMemoryManager
                     );

                     // Check for a repetition character after the leaf
                     const XMLCh repCh = fReaderMgr->peekNextChar();
                     ContentSpecNode* tmpLeaf2 = makeRepNode(repCh, tmpLeaf, fGrammarPoolMemoryManager);
                     if (tmpLeaf != tmpLeaf2)
                         fReaderMgr->getNextChar();

                     //
                     //  Create a new sequence or choice node, with the leaf
                     //  (or rep surrounding it) we just got as its first node.
                     //  Make the new node the second node of the current node,
                     //  and then make it the current node.
                     //
                     ContentSpecNode* newCur = new (fGrammarPoolMemoryManager) ContentSpecNode
                     (
                         curType
                         , tmpLeaf2
                         , 0
                         , true
                         , true
                         , fGrammarPoolMemoryManager
                     );
                     curNode->setSecond(newCur);
                     lastNode = curNode;
                     curNode = newCur;
                 }
             }
              else
             {
                 // Cannot be valid
                 delete headNode;  // emitError may do a throw so need to clean-up first
                 if (opCh == chComma)
                 {
                     fScanner->emitError(XMLErrs::ExpectedChoiceOrCloseParen);
                 }
                  else
                 {
                     fScanner->emitError
                     (
                         XMLErrs::ExpectedSeqOrCloseParen
                         , elemDecl.getFullName()
                     );
                 }
                 return 0;
             }
         }
     }

     //
     //  We saw the terminating parenthesis so lets check for any repetition
     //  character, and create a node for that, making the head node the child
     //  of it.
     //
     XMLCh repCh = fReaderMgr->peekNextChar();
     ContentSpecNode* retNode = makeRepNode(repCh, headNode, fGrammarPoolMemoryManager);
     if (retNode != headNode)
         fReaderMgr->getNextChar();

     return retNode;
 }


 //
 //  We get here after the '<!--' part of the comment. We scan past the
 //  terminating '-->' It will calls the appropriate handler with the comment
 //  text, if one is provided. A comment can be in either the document or
 //  the DTD, so the fInDocument flag is used to know which handler to send
 //  it to.
 //
 void DTDScanner::scanComment()
 {
     enum States
     {
         InText
         , OneDash
         , TwoDashes
     };

     // Get a buffer for this
     XMLBufBid bbComment(fBufMgr);

     //
     //  Get the comment text into a temp buffer. Be sure to use temp buffer
     //  two here, since its to be used for stuff that is potentially longer
     //  than just a name.
     //
     bool   gotLeadingSurrogate = false;
     States curState = InText;
     while (true)
     {
         // Get the next character
         const XMLCh nextCh = fReaderMgr->getNextChar();

         //  Watch for an end of file
         if (!nextCh)
         {
             fScanner->emitError(XMLErrs::UnterminatedComment);
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
         }

         // Check for correct surrogate pairs
         if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
         {
             if (gotLeadingSurrogate)
                 fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
             else
                 gotLeadingSurrogate = true;
         }
         else
         {
             if (gotLeadingSurrogate)
             {
                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                     fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
             }
             // Its got to at least be a valid XML character
             else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) {

                 XMLCh tmpBuf[9];
                 XMLString::binToText
                 (
                     nextCh
                     , tmpBuf
                     , 8
                     , 16
                     , fMemoryManager
                 );
                 fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
             }

             gotLeadingSurrogate = false;
         }

         if (curState == InText)
         {
             // If its a dash, go to OneDash state. Otherwise take as text
             if (nextCh == chDash)
                 curState = OneDash;
             else
                 bbComment.append(nextCh);
         }
          else if (curState == OneDash)
         {
             //
             //  If its another dash, then we change to the two dashes states.
             //  Otherwise, we have to put in the deficit dash and the new
             //  character and go back to InText.
             //
             if (nextCh == chDash)
             {
                 curState = TwoDashes;
             }
              else
             {
                 bbComment.append(chDash);
                 bbComment.append(nextCh);
                 curState = InText;
             }
         }
          else if (curState == TwoDashes)
         {
             // The next character must be the closing bracket
             if (nextCh != chCloseAngle)
             {
                 fScanner->emitError(XMLErrs::IllegalSequenceInComment);
                 fReaderMgr->skipPastChar(chCloseAngle);
                 return;
             }
             break;
         }
     }

     // If there is a doc type handler, then pass on the comment stuff
     if (fDocTypeHandler)
         fDocTypeHandler->doctypeComment(bbComment.getRawBuffer());
 }


 bool DTDScanner::scanContentSpec(DTDElementDecl& toFill)
 {
     //
     //  Check for for a couple of the predefined content type strings. If
     //  its not one of these, its got to be a parenthesized reg ex type
     //  expression.
     //
     if (fReaderMgr->skippedString(XMLUni::fgEmptyString))
     {
         toFill.setModelType(DTDElementDecl::Empty);
         return true;
     }

     if (fReaderMgr->skippedString(XMLUni::fgAnyString))
     {
         toFill.setModelType(DTDElementDecl::Any);
         return true;
     }

     // Its got to be a parenthesized regular expression
     if (!fReaderMgr->skippedChar(chOpenParen))
     {
         fScanner->emitError
         (
             XMLErrs::ExpectedContentSpecExpr
             , toFill.getFullName()
         );
         return false;
     }

     // Get the current reader id, so we can test for partial markup
     const unsigned int curReader = fReaderMgr->getCurrentReaderNum();

     // We could have a PE ref here, but don't require space
     checkForPERef(false, true);

     //
     //  Now we look for a PCDATA string. If its PCDATA, then it must be a
     //  MIXED model. Otherwise, it must be a regular list of children in
     //  a regular expression perhaps.
     //
     bool status;
     if (fReaderMgr->skippedString(XMLUni::fgPCDATAString))
     {
         // Set the model to mixed
         toFill.setModelType(DTDElementDecl::Mixed_Simple);
         status = scanMixed(toFill);

         //
         //  If we are validating we have to check that there are no multiple
         //  uses of any child elements.
         //
         if (fScanner->getDoValidation())
         {
             if (((const MixedContentModel*)toFill.getContentModel())->hasDups())
                 fScanner->getValidator()->emitError(XMLValid::RepElemInMixed);
         }
     }
      else
     {
         //
         //  We have to do a recursive scan of the content model. Create a
         //  buffer for it to use, for efficiency. It returns the top ofthe
         //  content spec node tree, which we set if successful.
         //
         toFill.setModelType(DTDElementDecl::Children);
         XMLBufBid bbTmp(fBufMgr);
         ContentSpecNode* resNode = scanChildren(toFill, bbTmp.getBuffer());
         status = (resNode != 0);
         if (status)
             toFill.setContentSpec(resNode);
     }

     // Make sure we are on the same reader as where we started
     if (curReader != fReaderMgr->getCurrentReaderNum() && fScanner->getDoValidation())
         fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);

     return status;
 }


 void DTDScanner::scanDefaultDecl(DTDAttDef& toFill)
 {
     if (fReaderMgr->skippedString(XMLUni::fgRequiredString))
     {
         toFill.setDefaultType(XMLAttDef::Required);
         return;
     }

     if (fReaderMgr->skippedString(XMLUni::fgImpliedString))
     {
         toFill.setDefaultType(XMLAttDef::Implied);
         return;
     }

     if (fReaderMgr->skippedString(XMLUni::fgFixedString))
     {
         //
         //  There must be space before the fixed value. If there is not, then
         //  emit an error but keep going.
         //
         if (!fReaderMgr->skippedSpace())
             fScanner->emitError(XMLErrs::ExpectedWhitespace);
         else
             fReaderMgr->skipPastSpaces();
         toFill.setDefaultType(XMLAttDef::Fixed);
     }
      else
     {
         toFill.setDefaultType(XMLAttDef::Default);
     }

     //
     //  If we got here, its fixed or default, so we need to get a value.
     //  If we don't, then emit an error but just set the default value to
     //  an empty string and try to keep going.
     //
     // Check for PE ref or optional whitespace
     checkForPERef(false, true);

     XMLBufBid bbValue(fBufMgr);
     if (!scanAttValue(toFill.getFullName(), bbValue.getBuffer(), toFill.getType()))
         fScanner->emitError(XMLErrs::ExpectedDefAttrDecl);

     toFill.setValue(bbValue.getRawBuffer());
 }


 //
 //  This is called after seeing '<!ELEMENT' which indicates that an element
 //  markup is starting. This guy scans the rest of it and adds it to the
 //  element decl pool if it has not already been declared.
 //
 void DTDScanner::scanElementDecl()
 {
     //
     //  Space is legal (required actually) here so check for a PE ref. If
     //  we don't get our whitespace, then issue and error, but try to keep
     //  going.
     //
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // Get a buffer for the element name and scan in the name
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedElementName);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // Look this guy up in the element decl pool
     DTDElementDecl* decl = (DTDElementDecl*) fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, bbName.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);

     //
     //  If it does not exist, then we need to create it. If it does and
     //  its marked as declared, then that's an error, but we still need to
     //  scan over the content model so use the dummy declaration that the
     //  parsing code can fill in.
     //
     if (decl)
     {
         if (decl->isDeclared())
         {
             if (fScanner->getDoValidation())
                 fScanner->getValidator()->emitError(XMLValid::ElementAlreadyExists, bbName.getRawBuffer());

             if (!fDumElemDecl)
                 fDumElemDecl = new (fMemoryManager) DTDElementDecl
                 (
                     bbName.getRawBuffer()
                     , fEmptyNamespaceId
                     , DTDElementDecl::Any
                     , fMemoryManager
                 );
             else
                 fDumElemDecl->setElementName(bbName.getRawBuffer(),fEmptyNamespaceId);
         }
     }
      else
     {
         //
         //  Create the new empty declaration to fill in and put it into
         //  the decl pool.
         //
         decl = new (fGrammarPoolMemoryManager) DTDElementDecl
         (
             bbName.getRawBuffer()
             , fEmptyNamespaceId
             , DTDElementDecl::Any
             , fGrammarPoolMemoryManager
         );
         fDTDGrammar->putElemDecl(decl);
     }

     // Set a flag for whether we will ignore this one
     const bool isIgnored = (decl == fDumElemDecl);

     // Mark this one if being externally declared
     decl->setExternalElemDeclaration(isReadingExternalEntity());

     // Mark this one as being declared
     decl->setCreateReason(XMLElementDecl::Declared);

     // Another check for a PE ref, with at least required whitespace
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // And now scan the content model for this guy.
     if (!scanContentSpec(*decl))
     {
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // Another check for a PE ref, but we don't require whitespace here
     checkForPERef(false, true);

     // And we should have the ending angle bracket
     if (!fReaderMgr->skippedChar(chCloseAngle))
     {
         fScanner->emitError(XMLErrs::UnterminatedElementDecl, bbName.getRawBuffer());
         fReaderMgr->skipPastChar(chCloseAngle);
     }

     //
     //  If we have a DTD handler tell it about the new element decl. We
     //  tell it if its one that can be ignored, cause its an override of a
     //  previously existing decl. If it is being ignored, only call back
     //  if advanced callbacks are enabled.
     //
     if (fDocTypeHandler)
         fDocTypeHandler->elementDecl(*decl, isIgnored);
 }


 //
 //  This method will process a general or parameter entity reference. The
 //  entity name and entity text will be stored in the entity pool. The value
 //  of the entity will be scanned for any other parameter entity or char
 //  references which will be expanded. So the stored value can only have
 //  general entity references when done.
 //
 void DTDScanner::scanEntityDecl()
 {
     //
     //  Space is required here, but we cannot check for a PE Ref since
     //  there could be a legal (no-ref) percent sign here. Since any
     //  entity that ended here would be illegal, we just skip spaces
     //  and then check for a percent.
     //
     if (!fReaderMgr->lookingAtSpace())
         fScanner->emitError(XMLErrs::ExpectedWhitespace);
     else
         fReaderMgr->skipPastSpaces();
     bool isPEDecl = fReaderMgr->skippedChar(chPercent);

     //
     //  If a PE decl, then check if it is followed by a space; if it is so,
     //  eat the percent and check for spaces or a PE ref on the other side of it.
     //  Otherwise, it has to be an entity reference for a general entity.
     //
     if (isPEDecl)
     {
         if(!fReaderMgr->getCurrentReader()->isWhitespace(fReaderMgr->peekNextChar()))
         {
             isPEDecl=false;
             while (true)
             {
                if (!expandPERef(false, false, true, false))
                   fScanner->emitError(XMLErrs::ExpectedEntityRefName);
                // And skip any more spaces in the expanded value
                if (fReaderMgr->skippedSpace())
                   fReaderMgr->skipPastSpaces();
                if (!fReaderMgr->skippedChar(chPercent))
                   break;
             }
         }
         else if (!checkForPERef(false, true))
             fScanner->emitError(XMLErrs::ExpectedWhitespace);
     }

     //
     //  Now lets get a name, which should be the name of the entity. We
     //  have to get a buffer for this.
     //
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedPEName);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // If namespaces are enabled, then no colons allowed
     if (fScanner->getDoNamespaces())
     {
         if (XMLString::indexOf(bbName.getRawBuffer(), chColon) != -1)
             fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
     }

     //
     //  See if this entity already exists. If so, then the existing one
     //  takes precendence. So we use the local dummy decl to parse into
     //  and just ignore the results.
     //
     DTDEntityDecl* entityDecl;
     if (isPEDecl)
         entityDecl = fPEntityDeclPool->getByKey(bbName.getRawBuffer());
     else
         entityDecl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());

     if (entityDecl)
     {
         if (!fDumEntityDecl)
             fDumEntityDecl = new (fMemoryManager) DTDEntityDecl(fMemoryManager);
         fDumEntityDecl->setName(bbName.getRawBuffer());
         entityDecl = fDumEntityDecl;
     }
      else
     {
         // Its not in existence already, then create an entity decl for it
         entityDecl = new (fGrammarPoolMemoryManager) DTDEntityDecl(bbName.getRawBuffer(), false, fGrammarPoolMemoryManager);

         //
         //  Set the declaration location. The parameter indicates whether its
         //  declared in the content/internal subset, so we know whether or not
         //  its in the external subset.
         //
         entityDecl->setDeclaredInIntSubset(fInternalSubset);

         // Add it to the appropriate entity decl pool
         if (isPEDecl)
             fPEntityDeclPool->put(entityDecl);
          else
             fDTDGrammar->putEntityDecl(entityDecl);
     }

     // Set a flag that indicates whether we are ignoring this one
     const bool isIgnored = (entityDecl == fDumEntityDecl);

     // Set the PE flag on it
     entityDecl->setIsParameter(isPEDecl);

     //
     //  Space is legal (required actually) here so check for a PE ref. If
     //  we don't get our whitespace, then issue an error, but try to keep
     //  going.
     //
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // save the hasNoDTD status for Entity Constraint Checking
     bool hasNoDTD = fScanner->getHasNoDTD();
     if (hasNoDTD && isPEDecl)
         fScanner->setHasNoDTD(false);

     // According to the type call the value scanning method
     if (!scanEntityDef(*entityDecl, isPEDecl))
     {
         fReaderMgr->skipPastChar(chCloseAngle);
         fScanner->setHasNoDTD(true);
         fScanner->emitError(XMLErrs::ExpectedEntityValue);
         return;
     }
     if (hasNoDTD)
         fScanner->setHasNoDTD(true);

     // Space is legal (but not required) here so check for a PE ref
     checkForPERef(false, true);

     // And then we have to have the closing angle bracket
     if (!fReaderMgr->skippedChar(chCloseAngle))
     {
         fScanner->emitError(XMLErrs::UnterminatedEntityDecl, entityDecl->getName());
         fReaderMgr->skipPastChar(chCloseAngle);
     }

     //
     //  If we have a doc type handler, then call it. But only call it for
     //  ignored elements if advanced callbacks are enabled.
     //
     if (fDocTypeHandler)
         fDocTypeHandler->entityDecl(*entityDecl, isPEDecl, isIgnored);
 }


 //
 //  This method will scan a general/character entity ref. It will either
 //  expand a char ref and return the value directly, or it will expand
 //  a general entity and a reader for it onto the reader stack.
 //
 //  The return value indicates whether the value was returned directly or
 //  pushed as a reader or it failed.
 //
 //  The escaped flag tells the caller whether the returnd parameter resulted
 //  from a character reference, which escapes the character in some cases. It
 //  only makes any difference if the return indicates the value was returned
 //  directly.
 //
 //  NOTE: This is only called when scanning attribute values, so we always
 //  expand general entities.
 //
 DTDScanner::EntityExpRes
 DTDScanner::scanEntityRef(XMLCh& firstCh, XMLCh& secondCh, bool& escaped)
 {
     // Assume no escape and no second char
     escaped = false;
     secondCh = 0;

     // We have to insure its all done in a single entity
     const unsigned int curReader = fReaderMgr->getCurrentReaderNum();

     //
     //  If the next char is a pound, then its a character reference and we
     //  need to expand it always.
     //
     if (fReaderMgr->skippedChar(chPound))
     {
         //
         //  Its a character reference, so scan it and get back the numeric
         //  value it represents. If it fails, just return immediately.
         //
         if (!scanCharRef(firstCh, secondCh))
             return EntityExp_Failed;

         if (curReader != fReaderMgr->getCurrentReaderNum())
             fScanner->emitError(XMLErrs::PartialMarkupInEntity);

         // Its now escaped since it was a char ref
         escaped = true;
         return EntityExp_Returned;
     }

     // Get the name of the general entity
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedEntityRefName);
         return EntityExp_Failed;
     }

     //
     //  Next char must be a semi-colon. But if its not, just emit
     //  an error and try to continue.
     //
     if (!fReaderMgr->skippedChar(chSemiColon))
         fScanner->emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());

     // Make sure it was all in one entity reader
     if (curReader != fReaderMgr->getCurrentReaderNum())
         fScanner->emitError(XMLErrs::PartialMarkupInEntity);

     // Look it up the name the general entity pool
     XMLEntityDecl* decl = fDTDGrammar->getEntityDecl(bbName.getRawBuffer());

     // If it does not exist, then obviously an error
     if (!decl)
     {
         // XML 1.0 Section 4.1
         if (fScanner->getStandalone() || fScanner->getHasNoDTD()) {
             fScanner->emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());
         }
         else {
             if (fScanner->getDoValidation())
                 fScanner->getValidator()->emitError(XMLValid::VC_EntityNotFound, bbName.getRawBuffer());
         }

         return EntityExp_Failed;
     }


     //
     // XML 1.0 Section 4.1
     //  If we are a standalone document, then it has to have been declared
     //  in the internal subset.
     //
     if (fScanner->getStandalone() && !decl->getDeclaredInIntSubset())
         fScanner->emitError(XMLErrs::IllegalRefInStandalone, bbName.getRawBuffer());

     //
     //  If its a special char reference, then its escaped and we can return
     //  it directly.
     //
     if (decl->getIsSpecialChar())
     {
         firstCh = decl->getValue()[0];
         escaped = true;
         return EntityExp_Returned;
     }

     if (decl->isExternal())
     {
         // If its unparsed, then its not valid here
         // XML 1.0 Section 4.4.4 the appearance of a reference to an unparsed entity is forbidden.
         if (decl->isUnparsed())
         {
             fScanner->emitError(XMLErrs::NoUnparsedEntityRefs, bbName.getRawBuffer());
             return EntityExp_Failed;
         }

         // We are in an attribute value, so not valid.
         // XML 1.0 Section 4.4.4 a reference to an external entity in an attribute value is forbidden.
         fScanner->emitError(XMLErrs::NoExtRefsInAttValue);

         // And now create a reader to read this entity
         InputSource* srcUsed;
         XMLReader* reader = fReaderMgr->createReader
         (
             decl->getBaseURI()
             , decl->getSystemId()
             , decl->getPublicId()
             , false
             , XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_General
             , XMLReader::Source_External
             , srcUsed
             , fScanner->getCalculateSrcOfs()
             , fScanner->getDisableDefaultEntityResolution()
         );

         // Put a janitor on the source so it gets cleaned up on exit
         Janitor<InputSource> janSrc(srcUsed);

         //
         //  If the creation failed then throw an exception
         //
         if (!reader)
             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Gen_CouldNotOpenExtEntity, srcUsed ? srcUsed->getSystemId() : decl->getSystemId(), fMemoryManager);

         //
         //  Push the reader. If its a recursive expansion, then emit an error
         //  and return an failure.
         //
         if (!fReaderMgr->pushReader(reader, decl))
         {
             fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
             return EntityExp_Failed;
         }

         // If it starts with the XML string, then parse a text decl
         if (fScanner->checkXMLDecl(true))
             scanTextDecl();
     }
      else
     {
         //
         //  Create a reader over a memory stream over the entity value
         //  We force it to assume UTF-16 by passing in an encoding
         //  string. This way it won't both trying to predecode the
         //  first line, looking for an XML/TextDecl.
         //
         XMLReader* valueReader = fReaderMgr->createIntEntReader
         (
             decl->getName()
             , XMLReader::RefFrom_NonLiteral
             , XMLReader::Type_General
             , decl->getValue()
             , decl->getValueLen()
             , false
         );

         //
         //  Trt to push the entity reader onto the reader manager stack,
         //  where it will become the subsequent input. If it fails, that
         //  means the entity is recursive, so issue an error. The reader
         //  will have just been discarded, but we just keep going.
         //
         if (!fReaderMgr->pushReader(valueReader, decl))
             fScanner->emitError(XMLErrs::RecursiveEntity, decl->getName());
     }

     return EntityExp_Pushed;
 }


 //
 //  This method will scan a quoted literal of an entity value. It has to
 //  deal with replacement of PE references; however, since this is a DTD
 //  scanner, all such entity literals are in entity decls and therefore
 //  general entities are not expanded.
 //
 bool DTDScanner::scanEntityLiteral(XMLBuffer& toFill)
 {
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr->skipIfQuote(quoteCh))
         return false;

     // Get a buffer for pulling in entity names when we see GE refs
     XMLBufBid bbName(fBufMgr);
     XMLBuffer& nameBuf = bbName.getBuffer();

     // Remember the current reader
     const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();

     //
     //  Loop until we see the ending quote character, handling any references
     //  in the process.
     //
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     bool    gotLeadingSurrogate = false;
     while (true)
     {
         nextCh = fReaderMgr->getNextChar();

         //
         //  Watch specifically for EOF and issue a more meaningful error
         //  if that occurs (since an unterminated quoted char can cause
         //  this easily.)
         //
         if (!nextCh)
         {
             fScanner->emitError(XMLErrs::UnterminatedEntityLiteral);
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
         }

         //
         //  Break out on our terminating quote char when we are back in the
         //  same reader. Otherwise, we might trigger on a nested quote char
         //  in an expanded entity.
         //
         if ((nextCh == quoteCh)
         &&  (fReaderMgr->getCurrentReaderNum() == orgReader))
         {
             break;
         }

         if (nextCh == chPercent)
         {
             //
             //  Put the PE's value on the reader stack and then jump back
             //  to the top to start processing it. The parameter indicates
             //  that it should not scan the reference's content as an external
             //  subset.
             //
             expandPERef(false, true, true);
             continue;
         }

         //
         //  Ok, now that all the other special stuff is checked, we can
         //  look for a general entity. In here, we cannot have a naked &
         //  and will only expand numerical char refs or the intrinsic char
         //  refs. Others will be left alone.
         //
         if (nextCh == chAmpersand)
         {
             //
             //  Here, we only expand numeric char refs, but not any general
             //  entities. However, the stupid XML spec requires that we check
             //  and make sure it does refer to a general entity if its not
             //  a char ref (i.e. no naked '&' chars.)
             //
             if (fReaderMgr->skippedChar(chPound))
             {
                 // If it failed, then just jump back to the top and try to pick up
                 if (!scanCharRef(nextCh, secondCh))
                 {
                     gotLeadingSurrogate = false;
                     continue;
                 }
             }
              else
             {
                 if (!fReaderMgr->getName(nameBuf))
                 {
                     fScanner->emitError(XMLErrs::ExpectedEntityRefName);
                 }
                  else
                 {
                     //
                     //  Since we are not expanding any of this, we have to
                     //  put the amp and name into the target buffer as data.
                     //
                     toFill.append(chAmpersand);
                     toFill.append(nameBuf.getRawBuffer());

                     // Make sure we skipped a trailing semicolon
                     if (!fReaderMgr->skippedChar(chSemiColon))
                     {
                         fScanner->emitError
                         (
                             XMLErrs::UnterminatedEntityRef
                             , nameBuf.getRawBuffer()
                         );
                     }

                     // And make the new character the semicolon
                     nextCh = chSemiColon;
                 }

                 // Either way here we reset the surrogate flag
                 gotLeadingSurrogate = false;
             }
         }
         else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
         {
             if (gotLeadingSurrogate)
                 fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
             else
                 gotLeadingSurrogate = true;
         }
          else
         {
             if (gotLeadingSurrogate)
             {
                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                     fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
             }
              else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
             {
                 XMLCh tmpBuf[9];
                 XMLString::binToText
                 (
                     nextCh
                     , tmpBuf
                     , 8
                     , 16
                     , fMemoryManager
                 );
                 fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                 fReaderMgr->skipPastChar(quoteCh);
                 return false;
             }
             gotLeadingSurrogate = false;
         }

         // Looks ok, so add it to the literal
         toFill.append(nextCh);

         if (secondCh)
         {
             toFill.append(secondCh);
             secondCh=0;
         }
     }

     //
     //  If we got here and did not get back to the original reader level,
     //  then we propogated some entity out of the literal, so issue an
     //  error, but don't fail.
     //
     if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
         fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);

     return true;
 }


 //
 //  This method is called after the entity name has been scanned, and any
 //  PE referenced following the name is handled. The passed decl will be
 //  filled in with the info scanned.
 //
 bool DTDScanner::scanEntityDef(DTDEntityDecl& decl, const bool isPEDecl)
 {
     // Its got to be an entity literal
     if (fReaderMgr->lookingAtChar(chSingleQuote)
     ||  fReaderMgr->lookingAtChar(chDoubleQuote))
     {
         // Get a buffer for the literal
         XMLBufBid bbValue(fBufMgr);

         if (!scanEntityLiteral(bbValue.getBuffer()))
             return false;

         // Set it on the entity decl
         decl.setValue(bbValue.getRawBuffer());
         return true;
     }

     //
     //  Its got to be an external entity, so there must be an external id.
     //  Get buffers for them and scan an external id into them.
     //
     XMLBufBid bbPubId(fBufMgr);
     XMLBufBid bbSysId(fBufMgr);
     if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_External))
         return false;

     ReaderMgr::LastExtEntityInfo lastInfo;
     fReaderMgr->getLastExtEntityInfo(lastInfo);

     // Fill in the id fields of the decl with the info we got
     const XMLCh* publicId = bbPubId.getRawBuffer();
     const XMLCh* systemId = bbSysId.getRawBuffer();
     decl.setPublicId((publicId && *publicId) ? publicId : 0);
     decl.setSystemId((systemId && *systemId) ? systemId : 0);
     decl.setBaseURI((lastInfo.systemId && *lastInfo.systemId) ? lastInfo.systemId : 0);

     // If its a PE decl, we are done
     bool gotSpaces = checkForPERef(false, true);
     if (isPEDecl)
     {
         //
         //  Check for a common error here. NDATA is not allowed for PEs
         //  so check for the NDATA string. If found give a nice meaningful
         //  error and continue parsing to eat the NDATA text.
         //
         if (gotSpaces)
         {
             if (fReaderMgr->skippedString(XMLUni::fgNDATAString))
                 fScanner->emitError(XMLErrs::NDATANotValidForPE);
         }
          else
         {
             return true;
         }
     }

     // If looking at close angle now, we are done
     if (fReaderMgr->lookingAtChar(chCloseAngle))
         return true;

     // Else we had to have seem the whitespace
     if (!gotSpaces)
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // We now have to see a notation data string
     if (!fReaderMgr->skippedString(XMLUni::fgNDATAString))
         fScanner->emitError(XMLErrs::ExpectedNDATA);

     // Space is required here, but try to go on if not
     if (!checkForPERef(false, true))
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

     // Get a name
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedNotationName);
         return false;
     }

     // Set the decl's notation name
     decl.setNotationName(bbName.getRawBuffer());

     return true;
 }


 //
 //  This method is called after an attribute decl name or a notation decl has
 //  been scanned and then an opening parenthesis was see, indicating the list
 //  of values. It scans the enumeration values and creates a single string
 //  which has a single space between each value.
 //
 //  The terminating close paren ends this scan.
 //
 bool DTDScanner::scanEnumeration( const   DTDAttDef&  attDef
                                     ,       XMLBuffer&  toFill
                                     , const bool        notation)
 {
     // Reset the passed buffer
     toFill.reset();

     // Check for PE ref but don't require space
     checkForPERef(false, true);

     // If this is a notation, we need an opening paren
     if (notation)
     {
         if (!fReaderMgr->skippedChar(chOpenParen))
             fScanner->emitError(XMLErrs::ExpectedOpenParen);
     }

     // We need a local buffer to use as well
     XMLBufBid bbTmp(fBufMgr);

     while (true)
     {
         // Space is allowed here for either type so check for PE ref
         checkForPERef(false, true);

         // And then get either a name or a name token
         bool success;
         if (notation)
             success = fReaderMgr->getName(bbTmp.getBuffer());
         else
             success = fReaderMgr->getNameToken(bbTmp.getBuffer());

         if (!success)
         {
             fScanner->emitError
             (
                 XMLErrs::ExpectedEnumValue
                 , attDef.getFullName()
             );
             return false;
         }

         // Append this value to the target value
         toFill.append(bbTmp.getRawBuffer(), bbTmp.getLen());

         // Space is allowed here for either type so check for PE ref
         checkForPERef(false, true);

         // Check for the terminating paren
         if (fReaderMgr->skippedChar(chCloseParen))
             break;

         // And append a space separator
         toFill.append(chSpace);

         // Check for the pipe character separator
         if (!fReaderMgr->skippedChar(chPipe))
         {
             fScanner->emitError(XMLErrs::ExpectedEnumSepOrParen);
             return false;
         }
     }
     return true;
 }


 bool DTDScanner::scanEq()
 {
     fReaderMgr->skipPastSpaces();
     if (fReaderMgr->skippedChar(chEqual))
     {
         fReaderMgr->skipPastSpaces();
         return true;
     }
     return false;
 }


 //
 //  This method is called when an external entity reference is seen in the
 //  DTD or an external DTD subset is encountered, and their contents pushed
 //  onto the reader stack. This method will scan that contents.
 //
 void DTDScanner::scanExtSubsetDecl(const bool inIncludeSect, const bool isDTD)
 {
     // Indicate we are in the external subset now
     FlagJanitor<bool> janContentFlag(&fInternalSubset, false);


     bool bAcceptDecl = !inIncludeSect;

     // Get a buffer for whitespace
     XMLBufBid bbSpace(fBufMgr);

     //
     //  If we have a doc type handler and we are not being called recursively
     //  to handle an include section, tell it the ext subset starts
     //
     if (fDocTypeHandler && !inIncludeSect)
         fDocTypeHandler->startExtSubset();

     //
     //  We have to play a trick here if the current entity we are parsing
     //  is a PE. Because the spooling code will put out a whitespace before
     //  and after an expanded PE if its being scanned outside the context of
     //  a literal entity, this will confuse this external subset code.
     //
     //  So, we see if that is what is happening and, if so, eat the single
     //  space, a check for the <?xml string. If we find it, we parse that
     //  markup right now and put the space back.
     //
     if (fReaderMgr->isScanningPERefOutOfLiteral())
     {
         if (fReaderMgr->skippedSpace())
         {
             if (fScanner->checkXMLDecl(true))
             {
                 scanTextDecl();
                 bAcceptDecl = false;

                 // <TBD> Figure out how to do this
                 // fReaderMgr->unGet(chSpace);
             }
         }
     }

     // Get the current reader number
     const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();

     //
     //  Loop until we hit the end of the external subset entity. Note that
     //  we use a double loop here in order to avoid the overhead of doing
     //  the exception setup/teardown work on every loop.
     //
     bool inMarkup = false;
     bool inCharData = false;
     while (true)
     {
         bool bDoBreak=false;    // workaround for Borland bug with 'break' in 'catch'
         try
         {
             while (true)
             {
                 const XMLCh nextCh = fReaderMgr->peekNextChar();

                 if (nextCh == chOpenAngle)
                 {
                     // Get the reader we started this on
                     // XML 1.0 P28a Well-formedness constraint: PE Between Declarations
                     const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
                     bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);

                     //
                     //  Now scan the markup. Set the flag so that we will know that
                     //  we were in markup if an end of entity exception occurs.
                     //
                     fReaderMgr->getNextChar();
                     inMarkup = true;
                     scanMarkupDecl(bAcceptDecl);
                     inMarkup = false;

                     //
                     //  And see if we got back to the same level. If not, then its
                     //  a partial markup error.
                     //
                     if (fReaderMgr->getCurrentReaderNum() != orgReader){
                         if (wasInPE)
                             fScanner->emitError(XMLErrs::PEBetweenDecl);
                         else if (fScanner->getDoValidation())
                             fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
                     }

                 }
                  else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
                 {
                     //
                     //  If we have a doc type handler, and advanced callbacks are
                     //  enabled, then gather up whitespace and call back. Otherwise
                     //  just skip whitespaces.
                     //
                     if (fDocTypeHandler)
                     {
                         inCharData = true;
                         fReaderMgr->getSpaces(bbSpace.getBuffer());
                         inCharData = false;

                         fDocTypeHandler->doctypeWhitespace
                         (
                             bbSpace.getRawBuffer()
                             , bbSpace.getLen()
                         );
                     }
                      else
                     {
                         //
                         //  If we hit an end of entity in the middle of white
                         //  space, that's fine. We'll just come back in here
                         //  again on the next round and skip some more.
                         //
                         fReaderMgr->skipPastSpaces();
                     }
                 }
                  else if (nextCh == chPercent)
                 {
                     //
                     //  Expand (and scan if external) the reference value. Tell
                     //  it to throw an end of entity exception at the end of the
                     //  entity.
                     //
                     fReaderMgr->getNextChar();
                     expandPERef(true, false, false, true);
                 }
                  else if (inIncludeSect && (nextCh == chCloseSquare))
                 {
                     //
                     //  Its the end of a conditional include section. So scan it and
                     //  decrement the include depth counter.
                     //
                     fReaderMgr->getNextChar();
                     if (!fReaderMgr->skippedChar(chCloseSquare))
                     {
                         fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                         fReaderMgr->skipPastChar(chCloseAngle);
                     }
                      else if (!fReaderMgr->skippedChar(chCloseAngle))
                     {
                         fScanner->emitError(XMLErrs::ExpectedEndOfConditional);
                         fReaderMgr->skipPastChar(chCloseAngle);
                     }
                     return;
                 }
                  else if (!nextCh)
                 {
                     return; // nothing left
                 }
                 else
                 {
                     fReaderMgr->getNextChar();
                     if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
                     {
                         XMLCh tmpBuf[9];
                         XMLString::binToText
                         (
                             nextCh
                             , tmpBuf
                             , 8
                             , 16
                             , fMemoryManager
                         );
                         fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                     }
                     else
                     {
                         fScanner->emitError(XMLErrs::InvalidDocumentStructure);
                     }

                     // Try to get realigned
                     static const XMLCh toSkip[] =
                     {
                         chPercent, chCloseSquare, chOpenAngle, chNull
                     };
                     fReaderMgr->skipUntilInOrWS(toSkip);
                 }
                 bAcceptDecl = false;
             }
         }

         catch(const EndOfEntityException& toCatch)
         {
             //
             //  If the external entity ended while we were in markup, then that's
             //  a partial markup error.
             //
             if (inMarkup)
             {
                 fScanner->emitError(XMLErrs::PartialMarkupInEntity);
                 inMarkup = false;
             }

             // If we were in char data, then send what we got
             if (inCharData)
             {
                 // Send what we got, then rethrow
                 if (fDocTypeHandler)
                 {
                     fDocTypeHandler->doctypeWhitespace
                     (
                         bbSpace.getRawBuffer()
                         , bbSpace.getLen()
                     );
                 }
                 inCharData = false;
             }

             //
             //  If the entity that just ended was the entity that we started
             //  on, then this is the end of the external subset.
             //
             if (orgReader == toCatch.getReaderNum())
                 bDoBreak=true;
         }
         if(bDoBreak)
             break;
     }

     // If we have a doc type handler, tell it the ext subset ends
     if (fDocTypeHandler && isDTD)
         fDocTypeHandler->endExtSubset();
 }


 //
 //  This method will scan for an id, either public or external.
 //
 //
 // [75] ExternalID ::= 'SYSTEM' S SystemLiteral
 //                     | 'PUBLIC' S PubidLiteral S SystemLiteral
 // [83] PublicID ::= 'PUBLIC' S PubidLiteral
 //
 bool DTDScanner::scanId(          XMLBuffer&  pubIdToFill
                             ,       XMLBuffer&  sysIdToFill
                             , const IDTypes     whatKind)
 {
     // Clean out both return buffers
     pubIdToFill.reset();
     sysIdToFill.reset();

     //
     //  Check first for the system id first. If we find it, and system id
     //  is one of the legal values, then lets try to scan it.
     //
     // 'SYSTEM' S SystemLiteral
     if (fReaderMgr->skippedString(XMLUni::fgSysIDString))
     {
         // If they were looking for a public id, then we failed
         if (whatKind == IDType_Public)
         {
             fScanner->emitError(XMLErrs::ExpectedPublicId);
             return false;
         }

         // We must skip spaces
         if (!fReaderMgr->skipPastSpaces())
         {
             fScanner->emitError(XMLErrs::ExpectedWhitespace);
             return false;
         }

         // Get the system literal value
         return scanSystemLiteral(sysIdToFill);
     }

     // Now scan for public id
     // 'PUBLIC' S PubidLiteral S SystemLiteral
     //  or
     // 'PUBLIC' S PubidLiteral

     // If we don't have any public id string => Error
     if (!fReaderMgr->skippedString(XMLUni::fgPubIDString)) {
         fScanner->emitError(XMLErrs::ExpectedSystemOrPublicId);
         return false;
     }

     //
     //  So following this we must have whitespace, a public literal, whitespace,
     //  and a system literal.
     //
     if (!fReaderMgr->skipPastSpaces())
     {
         fScanner->emitError(XMLErrs::ExpectedWhitespace);

         //
         //  Just in case, if they just forgot the whitespace but the next char
         //  is a single or double quote, then keep going.
         //
         const XMLCh chPeek = fReaderMgr->peekNextChar();
         if ((chPeek != chDoubleQuote) && (chPeek != chSingleQuote))
             return false;
     }

     if (!scanPublicLiteral(pubIdToFill))
         return false;

     // If they wanted a public id, then this is all
     if (whatKind == IDType_Public)
         return true;

     // check if there is any space follows
     bool hasSpace = fReaderMgr->skipPastSpaces();

     //
     //  In order to recover best here we need to see if
     //  the next thing is a quote or not
     //
     const XMLCh chPeek = fReaderMgr->peekNextChar();
     const bool bIsQuote =  ((chPeek == chDoubleQuote)
                          || (chPeek == chSingleQuote));

     if (!hasSpace)
     {
         if (whatKind == IDType_External)
         {
             //
             //  If its an external Id, then we need to see the system id.
             //  So, emit the error. But, if the next char is a quote, don't
             //  give up since its probably going to work. The user just
             //  missed the separating space. Otherwise, fail.
             //
             fScanner->emitError(XMLErrs::ExpectedWhitespace);
             if (!bIsQuote)
                 return false;
         }
          else
         {
             //
             //  We can legally return here. But, if the next char is a quote,
             //  then that's probably not what was desired, since its probably
             //  just that space was forgotten and there really is a system
             //  id to follow.
             //
             //  So treat it like missing whitespace if so and keep going.
             //  Else, just return success.
             //
             if (bIsQuote)
                 fScanner->emitError(XMLErrs::ExpectedWhitespace);
              else
                 return true;
         }
     }

     if (bIsQuote) {
         // there is a quote coming, scan the system literal
         if (!scanSystemLiteral(sysIdToFill))
             return false;
     }
     else {
         // no quote, if expecting exteral id, this is an error
         if (whatKind == IDType_External)
             fScanner->emitError(XMLErrs::ExpectedQuotedString);
     }

     return true;
 }


 //
 //  This method will scan the contents of an ignored section. It assumes that
 //  we already are in the body, i.e. we've seen <![IGNORE[ at this point. So
 //  we have to just scan until we see a matching ]]> closing markup.
 //
 void DTDScanner::scanIgnoredSection()
 {
     //
     //  Depth starts at one because we are already in one section and want
     //  to parse until we hit its end.
     //
     unsigned long depth = 1;
     bool gotLeadingSurrogate = false;
     while (true)
     {
         const XMLCh nextCh = fReaderMgr->getNextChar();

         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         if (nextCh == chOpenAngle)
         {
             if (fReaderMgr->skippedChar(chBang)
             &&  fReaderMgr->skippedChar(chOpenSquare))
             {
                 depth++;
             }
         }
          else if (nextCh == chCloseSquare)
         {
             if (fReaderMgr->skippedChar(chCloseSquare))
             {
                 while (fReaderMgr->skippedChar(chCloseSquare))
                 {
                     // Do nothing, just skip them
                 }

                 if (fReaderMgr->skippedChar(chCloseAngle))
                 {
                     depth--;
                     if (!depth)
                         break;
                 }
             }
         }
         // Deal with surrogate pairs
         else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
         {
             //  Its a leading surrogate. If we already got one, then
             //  issue an error, else set leading flag to make sure that
             //  we look for a trailing next time.
             if (gotLeadingSurrogate)
                 fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
             else
                 gotLeadingSurrogate = true;
         }
         else
         {
             //  If its a trailing surrogate, make sure that we are
             //  prepared for that. Else, its just a regular char so make
             //  sure that we were not expected a trailing surrogate.
             if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
             {
                 // Its trailing, so make sure we were expecting it
                 if (!gotLeadingSurrogate)
                     fScanner->emitError(XMLErrs::Unexpected2ndSurrogateChar);
             }
             else
             {
                 //  Its just a char, so make sure we were not expecting a
                 //  trailing surrogate.
                 if (gotLeadingSurrogate)
                     fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);

                 // Its got to at least be a valid XML character
                 else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh))
                 {
                     XMLCh tmpBuf[9];
                     XMLString::binToText
                     (
                         nextCh
                         , tmpBuf
                         , 8
                         , 16
                         , fMemoryManager
                     );
                     fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                 }
             }
             gotLeadingSurrogate = false;
         }
     }
 }


 //
 //  This method scans the entire internal subset. All we can have here is
 //  decl markup, and PE references. The expanded PE references must contain
 //  whole markup, so we don't have to worry about their content at this
 //  level. We just scan them, expand them, push them, and parse their content
 //  right there, via the expandERef() method.
 //
 bool DTDScanner::scanInternalSubset()
 {
     // Indicate we are in the internal subset now
     FlagJanitor<bool> janContentFlag(&fInternalSubset, true);

     // If we have a doc type handler, tell it the internal subset starts
     if (fDocTypeHandler)
         fDocTypeHandler->startIntSubset();

     // Get a buffer for whitespace
     XMLBufBid bbSpace(fBufMgr);

     bool noErrors = true;
     while (true)
     {
         const XMLCh nextCh = fReaderMgr->peekNextChar();

         //
         //  If we get an end of file marker, just unget it and return a
         //  failure status. The caller will then see the end of file and
         //  faill out correctly.
         //
         if (!nextCh)
             return false;

         // Watch for the end of internal subset marker
         if (nextCh == chCloseSquare)
         {
             fReaderMgr->getNextChar();
             break;
         }

         if (nextCh == chPercent)
         {
             //
             //  Expand (and scan if external) the reference value. Tell
             //  it to set the reader to cause an end of entity exception
             //  when this reader dies, which is what the scanExtSubset
             //  method wants (who is called to scan this.)
             //
             fReaderMgr->getNextChar();
             expandPERef(true, false, false, true);
         }
          else if (nextCh == chOpenAngle)
         {
             // Remember this reader before we start the scan, for checking
             // XML 1.0 P28a Well-formedness constraint: PE Between Declarations
             const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();
             bool wasInPE = (fReaderMgr->getCurrentReader()->getType() == XMLReader::Type_PE);

             // And scan this markup
             fReaderMgr->getNextChar();
             scanMarkupDecl(false);

             // If we did not get back to entry level, then partial markup
             if (fReaderMgr->getCurrentReaderNum() != orgReader) {
                 if (wasInPE)
                     fScanner->emitError(XMLErrs::PEBetweenDecl);
                 else if (fScanner->getDoValidation())
                     fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);
             }
         }
          else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh))
         {
             //
             //  IF we are doing advanced callbacks and have a doc type
             //  handler, then get the whitespace and call the doc type
             //  handler with it. Otherwise, just skip whitespace.
             //
             if (fDocTypeHandler)
             {
                 fReaderMgr->getSpaces(bbSpace.getBuffer());
                 fDocTypeHandler->doctypeWhitespace
                 (
                     bbSpace.getRawBuffer()
                     , bbSpace.getLen()
                 );
             }
              else
             {
                 fReaderMgr->skipPastSpaces();
             }
         }
          else
         {
             // Not valid, so emit an error
             XMLCh tmpBuf[9];
             XMLString::binToText
             (
                 fReaderMgr->getNextChar()
                 , tmpBuf
                 , 8
                 , 16
                 , fMemoryManager
             );
             fScanner->emitError
             (
                 XMLErrs::InvalidCharacterInIntSubset
                 , tmpBuf
             );

             //
             //  If an '>', then probably an abnormally terminated
             //  internal subset so just return.
             //
             if (nextCh == chCloseAngle)
             {
                 noErrors = false;
                 break;
             }

             //
             //  Otherwise, try to sync back up by scanning forward for
             //  a reasonable start character.
             //
             static const XMLCh toSkip[] =
             {
                 chPercent, chCloseSquare, chOpenAngle, chNull
             };
             fReaderMgr->skipUntilInOrWS(toSkip);
         }
     }

     // If we have a doc type handler, tell it the internal subset ends
     if (fDocTypeHandler)
         fDocTypeHandler->endIntSubset();

     return noErrors;
 }


 //
 //  This method is called once we see a < in the input of an int/ext subset,
 //  which indicates the start of some sort of markup.
 //
 void DTDScanner::scanMarkupDecl(const bool parseTextDecl)
 {
     //
     //  We only have two valid first characters here. One is a ! which opens
     //  some markup decl. The other is a ?, which could begin either a PI
     //  or a text decl. If parseTextDecl is false, we cannot accept a text
     //  decl.
     //
     const XMLCh nextCh = fReaderMgr->getNextChar();

     if (nextCh == chBang)
     {
         if (fReaderMgr->skippedChar(chDash))
         {
             if (fReaderMgr->skippedChar(chDash))
             {
                 scanComment();
             }
              else
             {
                 fScanner->emitError(XMLErrs::CommentsMustStartWith);
                 fReaderMgr->skipPastChar(chCloseAngle);
             }
         }
          else if (fReaderMgr->skippedChar(chOpenSquare))
         {
             //
             //  Its a conditional section. This is only valid in the external
             //  subset, so issue an error if we aren't there.
             //
             if (fInternalSubset)
             {
                 fScanner->emitError(XMLErrs::ConditionalSectInIntSubset);
                 fReaderMgr->skipPastChar(chCloseAngle);
                 return;
             }

             // A PE ref can happen here, but space is not required
             checkForPERef(false, true);

             if (fReaderMgr->skippedString(XMLUni::fgIncludeString))
             {
                 checkForPERef(false, true);

                 // Check for the following open square bracket
                 if (!fReaderMgr->skippedChar(chOpenSquare))
                     fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);

                 // Get the reader we started this on
                 const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();

                 checkForPERef(false, true);

                 //
                 //  Recurse back to the ext subset call again, telling it its
                 //  in an include section.
                 //
                 scanExtSubsetDecl(true, false);

                 //
                 //  And see if we got back to the same level. If not, then its
                 //  a partial markup error.
                 //
                 if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
                     fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);

             }
              else if (fReaderMgr->skippedString(XMLUni::fgIgnoreString))
             {
                 checkForPERef(false, true);

                 // Check for the following open square bracket
                 if (!fReaderMgr->skippedChar(chOpenSquare))
                     fScanner->emitError(XMLErrs::ExpectedINCLUDEBracket);

                 // Get the reader we started this on
                 const unsigned int orgReader = fReaderMgr->getCurrentReaderNum();

                 // And scan over the ignored part
                 scanIgnoredSection();

                 //
                 //  And see if we got back to the same level. If not, then its
                 //  a partial markup error.
                 //
                 if (fReaderMgr->getCurrentReaderNum() != orgReader && fScanner->getDoValidation())
                     fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE);

             }
              else
             {
                 fScanner->emitError(XMLErrs::ExpectedIncOrIgn);
                 fReaderMgr->skipPastChar(chCloseAngle);
             }
         }
          else if (fReaderMgr->skippedString(XMLUni::fgAttListString))
         {
             scanAttListDecl();
         }
          else if (fReaderMgr->skippedString(XMLUni::fgElemString))
         {
             scanElementDecl();
         }
          else if (fReaderMgr->skippedString(XMLUni::fgEntityString))
         {
             scanEntityDecl();
         }
          else if (fReaderMgr->skippedString(XMLUni::fgNotationString))
         {
             scanNotationDecl();
         }
          else
         {
             fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
             fReaderMgr->skipPastChar(chCloseAngle);
         }
     }
      else if (nextCh == chQuestion)
     {
         // It could be a PI or the XML declaration. Check for Decl
         if (fScanner->checkXMLDecl(false))
         {
             // If we are not accepting text decls, its an error
             if (parseTextDecl)
             {
                 scanTextDecl();
             }
              else
             {
                 // Emit the error and skip past this markup
                 fScanner->emitError(XMLErrs::TextDeclNotLegalHere);
                 fReaderMgr->skipPastChar(chCloseAngle);
             }
         }
          else
         {
             // It has to be a PI
             scanPI();
         }
     }
      else
     {
         // Can't be valid so emit error and try to skip past end of this decl
         fScanner->emitError(XMLErrs::ExpectedMarkupDecl);
         fReaderMgr->skipPastChar(chCloseAngle);
     }
 }


 //
 //  This method is called for a mixed model element's content mode. We've
 //  already scanned past the '(PCDATA' part by the time we get here. So
 //  everything else is element names separated by | characters until we
 //  hit the end. The passed element decl's content model is filled in with
 //  the information found.
 //
 bool DTDScanner::scanMixed(DTDElementDecl& toFill)
 {
     //
     //  The terminating star is only required if there is something more
     //  than (PCDATA).
     //
     bool starRequired = false;

     // Get a buffer to be used below to get element names
     XMLBufBid bbName(fBufMgr);
     XMLBuffer& nameBuf = bbName.getBuffer();

     //
     //  Create an initial content spec node. Its just a leaf node with a
     //  PCDATA element id. This current node pointer will be pushed down the
     //  tree as we go.
     //
     ContentSpecNode* curNode = new (fGrammarPoolMemoryManager) ContentSpecNode
     (
         new (fGrammarPoolMemoryManager) QName
         (
             XMLUni::fgZeroLenString
             , XMLUni::fgZeroLenString
             , XMLElementDecl::fgPCDataElemId
             , fGrammarPoolMemoryManager
         )
         , false
         , fGrammarPoolMemoryManager
     );

     //
     //  Set the initial leaf as the temporary head. If we hit the first choice
     //  node, it will be set up here. When done, this is the node that's set
     //  as the content spec for the element.
     //
     ContentSpecNode* headNode = curNode;

     // Remember the original node so we can sense the first choice node
     ContentSpecNode* orgNode = curNode;

     //
     //  We just loop around, getting the | character at the top and then
     //  looking for the next element name. We keep up with the last node
     //  and add each new one to its right node.
     //
     while (true)
     {
         //
         //  First of all we check for some grunt work details of skipping
         //  whitespace, expand PE refs, and catching invalid reps.
         //
         if (fReaderMgr->lookingAtChar(chPercent))
         {
             // Expand it and continue
             checkForPERef(false, true);
         }
          else if (fReaderMgr->skippedChar(chAsterisk))
         {
             //
             //  Tell them they can't have reps in mixed model, but eat
             //  it and keep going if we are allowed to.
             //
             if (fScanner->emitErrorWillThrowException(XMLErrs::NoRepInMixed))
             {
                 delete headNode;
             }
             fScanner->emitError(XMLErrs::NoRepInMixed);
         }
          else if (fReaderMgr->skippedSpace())
         {
             // Spaces are ok at this point, just eat them and continue
             fReaderMgr->skipPastSpaces();
         }
          else
         {
             if (!fReaderMgr->skippedChar(chPipe))
             {
                 // Has to be the closing paren now.
                 if (!fReaderMgr->skippedChar(chCloseParen))
                 {
                     delete headNode;
                     fScanner->emitError(XMLErrs::UnterminatedContentModel, toFill.getElementName()->getLocalPart());
                     return false;
                 }

                 bool starSkipped = true;
                 if (!fReaderMgr->skippedChar(chAsterisk)) {

                     starSkipped = false;

                     if (starRequired)
                     {
                         if (fScanner->emitErrorWillThrowException(XMLErrs::ExpectedAsterisk))
                         {
                             delete headNode;
                         }
                         fScanner->emitError(XMLErrs::ExpectedAsterisk);
                     }
                 }

                 //
                 //  Create a zero or more node and make the original head
                 //  node its first child.
                 //
                 if (starRequired || starSkipped) {
                     headNode = new (fGrammarPoolMemoryManager) ContentSpecNode
                     (
                         ContentSpecNode::ZeroOrMore
                         , headNode
                         , 0
                         , true
                         , true
                         , fGrammarPoolMemoryManager
                     );
                 }

                 // Store the head node as the content spec of the element.
                 toFill.setContentSpec(headNode);
                 break;
             }

             // Its more than just a PCDATA, so an ending star will be required now
             starRequired = true;

             // Space is legal here so check for a PE ref, but don't require space
             checkForPERef(false, true);

             // Get a name token
             if (!fReaderMgr->getName(nameBuf))
             {
                 delete headNode;
                 fScanner->emitError(XMLErrs::ExpectedElementName);
                 return false;
             }

             //
             //  Create a leaf node for it. If we can find the element id for
             //  this element, then use it. Else, we have to fault in an element
             //  decl, marked as created because of being in a content model.
             //
             XMLElementDecl* decl = fDTDGrammar->getElemDecl(fEmptyNamespaceId, 0, nameBuf.getRawBuffer(), Grammar::TOP_LEVEL_SCOPE);
             if (!decl)
             {
                 decl = new (fGrammarPoolMemoryManager) DTDElementDecl
                 (
                     nameBuf.getRawBuffer()
                     , fEmptyNamespaceId
                     , DTDElementDecl::Any
                     , fGrammarPoolMemoryManager
                 );
                 decl->setCreateReason(XMLElementDecl::InContentModel);
                 decl->setExternalElemDeclaration(isReadingExternalEntity());
                 fDTDGrammar->putElemDecl(decl);
             }

             //
             //  If the current node is the original node, this is the first choice
             //  node, so create an initial choice node with the current node and
             //  the new element id. Store this as the head node.
             //
             //  Otherwise, we have to steal the right node of the previous choice
             //  and weave in another choice node there, which has the old choice
             //  as its left and the new leaf as its right.
             //
             if (curNode == orgNode)
             {
                 curNode = new (fGrammarPoolMemoryManager) ContentSpecNode
                 (
                     ContentSpecNode::Choice
                     , curNode
                     , new (fGrammarPoolMemoryManager) ContentSpecNode
                       (
                           decl->getElementName()
                           , fGrammarPoolMemoryManager
                       )
                     , true
                     , true
                     , fGrammarPoolMemoryManager
                 );

                 // Remember the top node
                 headNode = curNode;
             }
              else
             {
                 ContentSpecNode* oldRight = curNode->orphanSecond();
                 curNode->setSecond
                 (
                     new (fGrammarPoolMemoryManager) ContentSpecNode
                     (
                         ContentSpecNode::Choice
                         , oldRight
                         , new (fGrammarPoolMemoryManager) ContentSpecNode
                           (
                               decl->getElementName()
                               , fGrammarPoolMemoryManager
                           )
                         , true
                         , true
                         , fGrammarPoolMemoryManager
                     )
                 );

                 // Make the new right node the current node
                 curNode = curNode->getSecond();
             }
         }
     }

     return true;
 }


 //
 //  This method is called when we see a '<!NOTATION' string while scanning
 //  markup decl. It parses out the notation and its id and stores a new
 //  notation decl object in the notation decl pool.
 //
 void DTDScanner::scanNotationDecl()
 {
     // Space is required here so check for a PE ref, and require space
     if (!checkForPERef(false, true))
     {
         fScanner->emitError(XMLErrs::ExpectedWhitespace);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     //
     //  And now we get a name, which is the name of the notation. Get a
     //  buffer for the name.
     //
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::ExpectedNotationName);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // If namespaces are enabled, then no colons allowed
     if (fScanner->getDoNamespaces())
     {
         if (XMLString::indexOf(bbName.getRawBuffer(), chColon) != -1)
             fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
     }

     // Space is required here so check for a PE ref, and require space
     if (!checkForPERef(false, true))
     {
         fScanner->emitError(XMLErrs::ExpectedWhitespace);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     //
     //  And scan an external or public id. We need buffers to use for both
     //  of these.
     //
     XMLBufBid bbPubId(fBufMgr);
     XMLBufBid bbSysId(fBufMgr);
     if (!scanId(bbPubId.getBuffer(), bbSysId.getBuffer(), IDType_Either))
     {
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // We can have an optional space or PE ref here
     checkForPERef(false, true);

     //
     //  See if it already exists. If so, add it to the notatino decl pool.
     //  Otherwise, if advanced callbacks are on, create a temp one and
     //  call out for that one.
     //
     XMLNotationDecl* decl = fDTDGrammar->getNotationDecl(bbName.getRawBuffer());
     bool isIgnoring = (decl != 0);
     if (isIgnoring)
     {
         fScanner->emitError(XMLErrs::NotationAlreadyExists, bbName.getRawBuffer());
     }
      else
     {
         // Fill in a new notation declaration and add it to the pool
         const XMLCh* publicId = bbPubId.getRawBuffer();
         const XMLCh* systemId = bbSysId.getRawBuffer();
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr->getLastExtEntityInfo(lastInfo);

         decl = new (fGrammarPoolMemoryManager) XMLNotationDecl
         (
             bbName.getRawBuffer()
             , (publicId && *publicId) ? publicId : 0
             , (systemId && *systemId) ? systemId : 0
             , (lastInfo.systemId && *lastInfo.systemId) ? lastInfo.systemId : 0
             , fGrammarPoolMemoryManager
         );
         fDTDGrammar->putNotationDecl(decl);
     }

     //
     //  If we have a document type handler, then tell it about this. If we
     //  are ignoring it, only call out if advanced callbacks are enabled.
     //
     if (fDocTypeHandler)
     {
         fDocTypeHandler->notationDecl
         (
             *decl
             , isIgnoring
         );
     }

     // And one more optional space or PE ref
     checkForPERef(false, true);

     // And skip the terminating bracket
     if (!fReaderMgr->skippedChar(chCloseAngle))
         fScanner->emitError(XMLErrs::UnterminatedNotationDecl);
 }


 //
 //  Scans a PI and calls the appropriate callbacks. A PI can happen in either
 //  the document or the DTD, so it calls the appropriate handler according
 //  to the fInDocument flag.
 //
 //  At entry we have just scanned the <? part, and need to now start on the
 //  PI target name.
 //
 void DTDScanner::scanPI()
 {
     const XMLCh* namePtr = 0;
     const XMLCh* targetPtr = 0;

     //
     //  If there are any spaces here, then warn about it. If we aren't in
     //  'first error' mode, then we'll come back and can easily pick up
     //  again by just skipping them.
     //
     if (fReaderMgr->lookingAtSpace())
     {
         fScanner->emitError(XMLErrs::PINameExpected);
         fReaderMgr->skipPastSpaces();
     }

     // Get a buffer for the PI name and scan it in
     XMLBufBid bbName(fBufMgr);
     if (!fReaderMgr->getName(bbName.getBuffer()))
     {
         fScanner->emitError(XMLErrs::PINameExpected);
         fReaderMgr->skipPastChar(chCloseAngle);
         return;
     }

     // Point the name pointer at the raw data
     namePtr = bbName.getRawBuffer();

     // See if it issome form of 'xml' and emit a warning
     //if (!XMLString::compareIString(namePtr, XMLUni::fgXMLString))
     if (bbName.getLen() == 3 &&
         (((namePtr[0] == chLatin_x) || (namePtr[0] == chLatin_X)) &&
          ((namePtr[1] == chLatin_m) || (namePtr[1] == chLatin_M)) &&
          ((namePtr[2] == chLatin_l) || (namePtr[2] == chLatin_L))))
         fScanner->emitError(XMLErrs::NoPIStartsWithXML);

     // If namespaces are enabled, then no colons allowed
     if (fScanner->getDoNamespaces())
     {
         if (XMLString::indexOf(namePtr, chColon) != -1)
             fScanner->emitError(XMLErrs::ColonNotLegalWithNS);
     }

     //
     //  If we don't hit a space next, then the PI has no target. If we do
     //  then get out the target. Get a buffer for it as well
     //
     XMLBufBid bbTarget(fBufMgr);
     if (fReaderMgr->skippedSpace())
     {
         // Skip any leading spaces
         fReaderMgr->skipPastSpaces();

         bool gotLeadingSurrogate = false;

         // It does have a target, so lets move on to deal with that.
         while (1)
         {
             const XMLCh nextCh = fReaderMgr->getNextChar();

             // Watch for an end of file, which is always bad here
             if (!nextCh)
             {
                 fScanner->emitError(XMLErrs::UnterminatedPI);
                 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
             }

             // Watch for potential terminating character
             if (nextCh == chQuestion)
             {
                 // It must be followed by '>' to be a termination of the target
                 if (fReaderMgr->skippedChar(chCloseAngle))
                     break;
             }

             // Check for correct surrogate pairs
             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 if (gotLeadingSurrogate)
                     fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                 else
                     gotLeadingSurrogate = true;
             }
              else
             {
                 if (gotLeadingSurrogate)
                 {
                     if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                         fScanner->emitError(XMLErrs::Expected2ndSurrogateChar);
                 }
                 // Its got to at least be a valid XML character
                 else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) {

                     XMLCh tmpBuf[9];
                     XMLString::binToText
                     (
                         nextCh
                         , tmpBuf
                         , 8
                         , 16
                         , fMemoryManager
                     );
                     fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf);
                 }

                 gotLeadingSurrogate = false;
             }
             bbTarget.append(nextCh);
         }
     }
      else
     {
         // No target, but make sure its terminated ok
         if (!fReaderMgr->skippedChar(chQuestion))
         {
             fScanner->emitError(XMLErrs::UnterminatedPI);
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }

         if (!fReaderMgr->skippedChar(chCloseAngle))
         {
             fScanner->emitError(XMLErrs::UnterminatedPI);
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }
     }

     // Point the target pointer at the raw data
     targetPtr = bbTarget.getRawBuffer();

     //
     //  If we have a handler, then call it.
     //
     if (fDocTypeHandler)
     {
         fDocTypeHandler->doctypePI
         (
             namePtr
             , targetPtr
         );
     }
 }


 //
 //  This method scans a public literal. It must be quoted and all of its
 //  characters must be valid public id characters. The quotes are discarded
 //  and the results are returned.
 //
 bool DTDScanner::scanPublicLiteral(XMLBuffer& toFill)
 {
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr->skipIfQuote(quoteCh)) {
         fScanner->emitError(XMLErrs::ExpectedQuotedString);
         return false;
     }

     while (true)
     {
         const XMLCh nextCh = fReaderMgr->getNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         if (nextCh == quoteCh)
             break;

         //
         //  If its not a valid public id char, then report it but keep going
         //  since that's the best recovery scheme.
         //
         if (!fReaderMgr->getCurrentReader()->isPublicIdChar(nextCh))
         {
             XMLCh tmpBuf[9];
             XMLString::binToText
             (
                 nextCh
                 , tmpBuf
                 , 8
                 , 16
                 , fMemoryManager
             );
             fScanner->emitError(XMLErrs::InvalidPublicIdChar, tmpBuf);
         }

         toFill.append(nextCh);
     }
     return true;
 }


 //
 //  This method handles scanning in a quoted system literal. It expects to
 //  start on the open quote and returns after eating the ending quote. There
 //  are not really any restrictions on the contents of system literals.
 //
 bool DTDScanner::scanSystemLiteral(XMLBuffer& toFill)
 {
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr->skipIfQuote(quoteCh)) {
         fScanner->emitError(XMLErrs::ExpectedQuotedString);
         return false;
     }

     while (true)
     {
         const XMLCh nextCh = fReaderMgr->getNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         // Break out on terminating quote
         if (nextCh == quoteCh)
             break;

         toFill.append(nextCh);
     }
     return true;
 }


 //
 //  This method is called to scan a text decl line, which can be the first
 //  line in an external entity or external subset.
 //
 //  On entry the <? has been scanned, and next should be 'xml' followed by
 //  some whitespace, version string, etc...
 //    [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'
 //
 void DTDScanner::scanTextDecl()
 {
     // Skip any subsequent whitespace before the version string
     fReaderMgr->skipPastSpaces();

     // Next should be the version string
     XMLBufBid bbVersion(fBufMgr);
     if (fReaderMgr->skippedString(XMLUni::fgVersionString))
     {
         if (!scanEq())
         {
             fScanner->emitError(XMLErrs::ExpectedEqSign);
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }

         //
         //  Followed by a single or double quoted version. Get a buffer for
         //  the string.
         //
         if (!getQuotedString(bbVersion.getBuffer()))
         {
             fScanner->emitError(XMLErrs::BadXMLVersion);
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }

         // If its not our supported version, issue an error but continue
         if (XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_1)) {
             if (fScanner->getXMLVersion() != XMLReader::XMLV1_1)
         	    fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer());
         }
         else if (!XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_0))
             fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer());
     }

     // Ok, now we must have an encoding string
     XMLBufBid bbEncoding(fBufMgr);
     fReaderMgr->skipPastSpaces();
     bool gotEncoding = false;
     if (fReaderMgr->skippedString(XMLUni::fgEncodingString))
     {
         // There must be a equal sign next
         if (!scanEq())
         {
             fScanner->emitError(XMLErrs::ExpectedEqSign);
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }

         // Followed by a single or double quoted version string
         getQuotedString(bbEncoding.getBuffer());
         if (bbEncoding.isEmpty() || !XMLString::isValidEncName(bbEncoding.getRawBuffer()))
         {
             fScanner->emitError(XMLErrs::BadXMLEncoding, bbEncoding.getRawBuffer());
             fReaderMgr->skipPastChar(chCloseAngle);
             return;
         }

         // Indicate that we got an encoding
         gotEncoding = true;
     }

     //
     // Encoding declarations are required in the external entity
     // if there is a text declaration present
     //
     if (!gotEncoding)
     {
       fScanner->emitError(XMLErrs::EncodingRequired);
       fReaderMgr->skipPastChar(chCloseAngle);
       return;

     }

     fReaderMgr->skipPastSpaces();
     if (!fReaderMgr->skippedChar(chQuestion))
     {
         fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
         fReaderMgr->skipPastChar(chCloseAngle);
     }
      else if (!fReaderMgr->skippedChar(chCloseAngle))
     {
         fScanner->emitError(XMLErrs::UnterminatedXMLDecl);
         fReaderMgr->skipPastChar(chCloseAngle);
     }

     //
     //  If we have a document type handler and advanced callbacks are on,
     //  then call the TextDecl callback
     //
     if (fDocTypeHandler)
     {
         fDocTypeHandler->TextDecl
         (
             bbVersion.getRawBuffer()
             , bbEncoding.getRawBuffer()
         );
     }

     //
     //  If we got an encoding string, then we have to call back on the reader
     //  to tell it what the encoding is.
     //
     if (!bbEncoding.isEmpty())
     {
         if (!fReaderMgr->getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
             fScanner->emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
     }
 }

 XERCES_CPP_NAMESPACE_END