src/xercesc/internal/WFXMLScanner.cpp - platform/external/xerces-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
   * $Id: WFXMLScanner.cpp 568078 2007-08-21 11:43:25Z amassari $
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/internal/WFXMLScanner.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/RuntimeException.hpp>
 #include <xercesc/util/UnexpectedEOFException.hpp>
 #include <xercesc/sax/InputSource.hpp>
 #include <xercesc/framework/XMLDocumentHandler.hpp>
 #include <xercesc/framework/XMLEntityHandler.hpp>
 #include <xercesc/framework/XMLPScanToken.hpp>
 #include <xercesc/framework/XMLValidityCodes.hpp>
 #include <xercesc/internal/EndOfEntityException.hpp>
 #include <xercesc/util/OutOfMemoryException.hpp>

 XERCES_CPP_NAMESPACE_BEGIN

 // ---------------------------------------------------------------------------
 //  WFXMLScanner: Constructors and Destructor
 // ---------------------------------------------------------------------------


 typedef JanitorMemFunCall<WFXMLScanner> CleanupType;
 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;


 WFXMLScanner::WFXMLScanner( XMLValidator* const  valToAdopt
                           , GrammarResolver* const grammarResolver
                           , MemoryManager* const manager) :

     XMLScanner(valToAdopt, grammarResolver, manager)
     , fElementIndex(0)
     , fElements(0)
     , fEntityTable(0)
     , fAttrNameHashList(0)
     , fAttrNSList(0)
     , fElementLookup(0)
 {
     CleanupType cleanup(this, &WFXMLScanner::cleanUp);

     try
     {
         commonInit();
     }
     catch(const OutOfMemoryException&)
     {
         // Don't cleanup when out of memory, since executing the
         // code can cause problems.
         cleanup.release();

         throw;
     }

     cleanup.release();
 }

 WFXMLScanner::WFXMLScanner( XMLDocumentHandler* const docHandler
                           , DocTypeHandler* const     docTypeHandler
                           , XMLEntityHandler* const   entityHandler
                           , XMLErrorReporter* const   errHandler
                           , XMLValidator* const       valToAdopt
                           , GrammarResolver* const    grammarResolver
                           , MemoryManager* const      manager) :

     XMLScanner(docHandler, docTypeHandler, entityHandler, errHandler, valToAdopt, grammarResolver, manager)
     , fElementIndex(0)
     , fElements(0)
     , fEntityTable(0)
     , fAttrNameHashList(0)
     , fAttrNSList(0)
     , fElementLookup(0)
 {
     CleanupType cleanup(this, &WFXMLScanner::cleanUp);

     try
     {
         commonInit();
     }
     catch(const OutOfMemoryException&)
     {
         // Don't cleanup when out of memory, since executing the
         // code can cause problems.
         cleanup.release();

         throw;
     }

     cleanup.release();
 }

 WFXMLScanner::~WFXMLScanner()
 {
     cleanUp();
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Getter methods
 // ---------------------------------------------------------------------------
 NameIdPool<DTDEntityDecl>* WFXMLScanner::getEntityDeclPool()
 {
     return 0;
 }

 const NameIdPool<DTDEntityDecl>* WFXMLScanner::getEntityDeclPool() const
 {
     return 0;
 }

 // ---------------------------------------------------------------------------
 //  WFXMLScanner: Main entry point to scan a document
 // ---------------------------------------------------------------------------
 void WFXMLScanner::scanDocument(const InputSource& src)
 {
     //  Bump up the sequence id for this parser instance. This will invalidate
     //  any previous progressive scan tokens.
     fSequenceId++;

     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);

     try
     {
         //  Reset the scanner and its plugged in stuff for a new run. This
         //  resets all the data structures, creates the initial reader and
         //  pushes it on the stack, and sets up the base document path.
         scanReset(src);

         // If we have a document handler, then call the start document
         if (fDocHandler)
             fDocHandler->startDocument();

         //  Scan the prolog part, which is everything before the root element
         //  including the DTD subsets.
         scanProlog();

         //  If we got to the end of input, then its not a valid XML file.
         //  Else, go on to scan the content.
         if (fReaderMgr.atEOF())
         {
             emitError(XMLErrs::EmptyMainEntity);
         }
         else
         {
             // Scan content, and tell it its not an external entity
             if (scanContent())
             {
                 // That went ok, so scan for any miscellaneous stuff
                 if (!fReaderMgr.atEOF())
                     scanMiscellaneous();
             }
         }

         // If we have a document handler, then call the end document
         if (fDocHandler)
             fDocHandler->endDocument();
     }
     //  NOTE:
     //
     //  In all of the error processing below, the emitError() call MUST come
     //  before the flush of the reader mgr, or it will fail because it tries
     //  to find out the position in the XML source of the error.
     catch(const XMLErrs::Codes)
     {
         // This is a 'first failure' exception, so fall through
     }
     catch(const XMLValid::Codes)
     {
         // This is a 'first fatal error' type exit, so fall through
     }
     catch(const XMLException& excToCatch)
     {
         //  Emit the error and catch any user exception thrown from here. Make
         //  sure in all cases we flush the reader manager.
         fInException = true;
         try
         {
             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                 emitError
                 (
                     XMLErrs::XMLException_Warning
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else
                 emitError
                 (
                     XMLErrs::XMLException_Error
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
         }
         catch(const OutOfMemoryException&)
         {
             // This is a special case for out-of-memory
             // conditions, because resetting the ReaderMgr
             // can be problematic.
             resetReaderMgr.release();

             throw;
         }
     }
     catch(const OutOfMemoryException&)
     {
         // This is a special case for out-of-memory
         // conditions, because resetting the ReaderMgr
         // can be problematic.
         resetReaderMgr.release();

         throw;
     }
 }


 bool WFXMLScanner::scanNext(XMLPScanToken& token)
 {
     // Make sure this token is still legal
     if (!isLegalToken(token))
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);

     // Find the next token and remember the reader id
     unsigned int orgReader;
     XMLTokens curToken;
     bool retVal = true;

     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);

     try
     {
         while (true)
         {
             //  We have to handle any end of entity exceptions that happen here.
             //  We could be at the end of X nested entities, each of which will
             //  generate an end of entity exception as we try to move forward.
             try
             {
                 curToken = senseNextToken(orgReader);
                 break;
             }
             catch(const EndOfEntityException& toCatch)
             {
                 // Send an end of entity reference event
                 if (fDocHandler)
                     fDocHandler->endEntityReference(toCatch.getEntity());
             }
         }

         if (curToken == Token_CharData)
         {
             scanCharData(fCDataBuf);
         }
         else if (curToken == Token_EOF)
         {
             if (!fElemStack.isEmpty())
             {
                 const ElemStack::StackElem* topElem = fElemStack.popTop();
                 emitError
                 (
                     XMLErrs::EndedWithTagsOnStack
                     , topElem->fThisElement->getFullName()
                 );
             }

             retVal = false;
         }
         else
         {
             // Its some sort of markup
             bool gotData = true;
             switch(curToken)
             {
                 case Token_CData :
                     // Make sure we are within content
                     if (fElemStack.isEmpty())
                         emitError(XMLErrs::CDATAOutsideOfContent);
                     scanCDSection();
                     break;

                 case Token_Comment :
                     scanComment();
                     break;

                 case Token_EndTag :
                     scanEndTag(gotData);
                     break;

                 case Token_PI :
                     scanPI();
                     break;

                 case Token_StartTag :
                     if (fDoNamespaces)
                         scanStartTagNS(gotData);
                     else
                         scanStartTag(gotData);
                     break;

                 default :
                     fReaderMgr.skipToChar(chOpenAngle);
                     break;
             }

             if (orgReader != fReaderMgr.getCurrentReaderNum())
                 emitError(XMLErrs::PartialMarkupInEntity);

             // If we hit the end, then do the miscellaneous part
             if (!gotData)
             {
                 // That went ok, so scan for any miscellaneous stuff
                 scanMiscellaneous();

                 if (fDocHandler)
                     fDocHandler->endDocument();
             }
         }
     }
     //  NOTE:
     //
     //  In all of the error processing below, the emitError() call MUST come
     //  before the flush of the reader mgr, or it will fail because it tries
     //  to find out the position in the XML source of the error.
     catch(const XMLErrs::Codes)
     {
         // This is a 'first failure' exception, so return failure
         retVal = false;
     }
     catch(const XMLValid::Codes)
     {
         // This is a 'first fatal error' type exit, so return failure
         retVal = false;
     }
     catch(const XMLException& excToCatch)
     {
         //  Emit the error and catch any user exception thrown from here. Make
         //  sure in all cases we flush the reader manager.
         fInException = true;
         try
         {
             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                 emitError
                 (
                     XMLErrs::XMLException_Warning
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else
                 emitError
                 (
                     XMLErrs::XMLException_Error
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
         }
         catch(const OutOfMemoryException&)
         {
             // This is a special case for out-of-memory
             // conditions, because resetting the ReaderMgr
             // can be problematic.
             resetReaderMgr.release();

             throw;
         }

         // Return failure
         retVal = false;
     }
     catch(const OutOfMemoryException&)
     {
         throw;
     }

     // If we are not at the end, release the object that will
     // reset the ReaderMgr.
     if (retVal)
         resetReaderMgr.release();

     return retVal;
 }


 // ---------------------------------------------------------------------------
 //  WFXMLScanner: Private helper methods.
 // ---------------------------------------------------------------------------

 //  This method handles the common initialization, to avoid having to do
 //  it redundantly in multiple constructors.
 void WFXMLScanner::commonInit()
 {
     fEntityTable = new (fMemoryManager) ValueHashTableOf<XMLCh>(11, fMemoryManager);
     fAttrNameHashList = new (fMemoryManager)ValueVectorOf<unsigned int>(16, fMemoryManager);
     fAttrNSList = new (fMemoryManager) ValueVectorOf<XMLAttr*>(8, fMemoryManager);
     fElements = new (fMemoryManager) RefVectorOf<XMLElementDecl>(32, true, fMemoryManager);
     fElementLookup = new (fMemoryManager) RefHashTableOf<XMLElementDecl>(109, false, fMemoryManager);

     //  Add the default entity entries for the character refs that must always
     //  be present.
     fEntityTable->put((void*) XMLUni::fgAmp, chAmpersand);
     fEntityTable->put((void*) XMLUni::fgLT, chOpenAngle);
     fEntityTable->put((void*) XMLUni::fgGT, chCloseAngle);
     fEntityTable->put((void*) XMLUni::fgQuot, chDoubleQuote);
     fEntityTable->put((void*) XMLUni::fgApos, chSingleQuote);
 }

 void WFXMLScanner::cleanUp()
 {
     delete fEntityTable;
     delete fAttrNameHashList;
     delete fAttrNSList;
     delete fElementLookup;
     delete fElements;
 }

 unsigned int
 WFXMLScanner::resolvePrefix(const   XMLCh* const          prefix
                             , const ElemStack::MapModes mode)
 {
     //  Watch for the special namespace prefixes. We always map these to
     //  special URIs. 'xml' gets mapped to the official URI that its defined
     //  to map to by the NS spec. xmlns gets mapped to a special place holder
     //  URI that we define (so that it maps to something checkable.)
     if (XMLString::equals(prefix, XMLUni::fgXMLNSString))
         return fXMLNSNamespaceId;
     else if (XMLString::equals(prefix, XMLUni::fgXMLString))
         return fXMLNamespaceId;

     //  Ask the element stack to search up itself for a mapping for the
     //  passed prefix.
     bool unknown;
     unsigned int uriId = fElemStack.mapPrefixToURI(prefix, mode, unknown);

     // If it was unknown, then the URI was faked in but we have to issue an error
     if (unknown)
         emitError(XMLErrs::UnknownPrefix, prefix);

     return uriId;
 }

 //  This method will reset the scanner data structures, and related plugged
 //  in stuff, for a new scan session. We get the input source for the primary
 //  XML entity, create the reader for it, and push it on the stack so that
 //  upon successful return from here we are ready to go.
 void WFXMLScanner::scanReset(const InputSource& src)
 {
     //  For all installed handlers, send reset events. This gives them
     //  a chance to flush any cached data.
     if (fDocHandler)
         fDocHandler->resetDocument();
     if (fEntityHandler)
         fEntityHandler->resetEntities();
     if (fErrorReporter)
         fErrorReporter->resetErrors();

     //  Reset the element stack, and give it the latest ids for the special
     //  URIs it has to know about.
     fElemStack.reset
     (
         fEmptyNamespaceId
         , fUnknownNamespaceId
         , fXMLNamespaceId
         , fXMLNSNamespaceId
     );

     // Reset some status flags
     fInException = false;
     fStandalone = false;
     fErrorCount = 0;
     fHasNoDTD = true;
     fElementIndex = 0;

     // Reset elements lookup table
     fElementLookup->removeAll();

     //  Handle the creation of the XML reader object for this input source.
     //  This will provide us with transcoding and basic lexing services.
     XMLReader* newReader = fReaderMgr.createReader
     (
         src
         , true
         , XMLReader::RefFrom_NonLiteral
         , XMLReader::Type_General
         , XMLReader::Source_External
         , fCalculateSrcOfs
     );

     if (!newReader) {
         if (src.getIssueFatalErrorIfNotFound())
             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource, src.getSystemId(), fMemoryManager);
         else
             ThrowXMLwithMemMgr1(RuntimeException, XMLExcepts::Scan_CouldNotOpenSource_Warning, src.getSystemId(), fMemoryManager);
     }

     // Push this read onto the reader manager
     fReaderMgr.pushReader(newReader, 0);

     // and reset security-related things if necessary:
     if(fSecurityManager != 0)
     {
         fEntityExpansionLimit = fSecurityManager->getEntityExpansionLimit();
         fEntityExpansionCount = 0;
     }
 }

 //  This method is called between markup in content. It scans for character
 //  data that is sent to the document handler. It watches for any markup
 //  characters that would indicate that the character data has ended. It also
 //  handles expansion of general and character entities.
 //
 //  sendData() is a local static helper for this method which handles some
 //  code that must be done in three different places here.
 void WFXMLScanner::sendCharData(XMLBuffer& toSend)
 {
     // If no data in the buffer, then nothing to do
     if (toSend.isEmpty())
         return;

     // Always assume its just char data if not validating
     if (fDocHandler)
         fDocHandler->docCharacters(toSend.getRawBuffer(), toSend.getLen(), false);

     // Reset buffer
     toSend.reset();
 }

 // ---------------------------------------------------------------------------
 //  WFXMLScanner: Private scanning methods
 // ---------------------------------------------------------------------------

 //  This method will kick off the scanning of the primary content of the
 //  document, i.e. the elements.
 bool WFXMLScanner::scanContent()
 {
     //  Go into a loop until we hit the end of the root element, or we fall
     //  out because there is no root element.
     //
     //  We have to do kind of a deeply nested double loop here in order to
     //  avoid doing the setup/teardown of the exception handler on each
     //  round. Doing it this way we only do it when an exception actually
     //  occurs.
     bool gotData = true;
     bool inMarkup = false;
     while (gotData)
     {
         try
         {
             while (gotData)
             {
                 //  Sense what the next top level token is. According to what
                 //  this tells us, we will call something to handle that kind
                 //  of thing.
                 unsigned int orgReader;
                 const XMLTokens curToken = senseNextToken(orgReader);

                 //  Handle character data and end of file specially. Char data
                 //  is not markup so we don't want to handle it in the loop
                 //  below.
                 if (curToken == Token_CharData)
                 {
                     //  Scan the character data and call appropriate events. Let
                     //  him use our local character data buffer for efficiency.
                     scanCharData(fCDataBuf);
                     continue;
                 }
                 else if (curToken == Token_EOF)
                 {
                     //  The element stack better be empty at this point or we
                     //  ended prematurely before all elements were closed.
                     if (!fElemStack.isEmpty())
                     {
                         const ElemStack::StackElem* topElem = fElemStack.popTop();
                         emitError
                         (
                             XMLErrs::EndedWithTagsOnStack
                             , topElem->fThisElement->getFullName()
                         );
                     }

                     // Its the end of file, so clear the got data flag
                     gotData = false;
                     continue;
                 }

                 // We are in some sort of markup now
                 inMarkup = true;

                 //  According to the token we got, call the appropriate
                 //  scanning method.
                 switch(curToken)
                 {
                     case Token_CData :
                         // Make sure we are within content
                         if (fElemStack.isEmpty())
                             emitError(XMLErrs::CDATAOutsideOfContent);
                         scanCDSection();
                         break;

                     case Token_Comment :
                         scanComment();
                         break;

                     case Token_EndTag :
                         scanEndTag(gotData);
                         break;

                     case Token_PI :
                         scanPI();
                         break;

                     case Token_StartTag :
                         if (fDoNamespaces)
                             scanStartTagNS(gotData);
                         else
                             scanStartTag(gotData);
                         break;

                     default :
                         fReaderMgr.skipToChar(chOpenAngle);
                         break;
                 }

                 if (orgReader != fReaderMgr.getCurrentReaderNum())
                     emitError(XMLErrs::PartialMarkupInEntity);

                 // And we are back out of markup again
                 inMarkup = false;
             }
         }
         catch(const EndOfEntityException& toCatch)
         {
             //  If we were in some markup when this happened, then its a
             //  partial markup error.
             if (inMarkup)
                 emitError(XMLErrs::PartialMarkupInEntity);

             // Send an end of entity reference event
             if (fDocHandler)
                 fDocHandler->endEntityReference(toCatch.getEntity());

             inMarkup = false;
         }
     }

     // It went ok, so return success
     return true;
 }


 void WFXMLScanner::scanEndTag(bool& gotData)
 {
     //  Assume we will still have data until proven otherwise. It will only
     //  ever be false if this is the end of the root element.
     gotData = true;

     //  Check if the element stack is empty. If so, then this is an unbalanced
     //  element (i.e. more ends than starts, perhaps because of bad text
     //  causing one to be skipped.)
     if (fElemStack.isEmpty())
     {
         emitError(XMLErrs::MoreEndThanStartTags);
         fReaderMgr.skipPastChar(chCloseAngle);
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_UnbalancedStartEnd, fMemoryManager);
     }

     //  Pop the stack of the element we are supposed to be ending. Remember
     //  that we don't own this. The stack just keeps them and reuses them.
     unsigned int uriId = (fDoNamespaces)
         ? fElemStack.getCurrentURI() : fEmptyNamespaceId;
     const ElemStack::StackElem* topElem = fElemStack.popTop();

     // See if it was the root element, to avoid multiple calls below
     const bool isRoot = fElemStack.isEmpty();

     // Make sure that its the end of the element that we expect
     if (!fReaderMgr.skippedString(topElem->fThisElement->getFullName()))
     {
         emitError
         (
             XMLErrs::ExpectedEndOfTagX
             , topElem->fThisElement->getFullName()
         );
         fReaderMgr.skipPastChar(chCloseAngle);
         return;
     }

     // Make sure we are back on the same reader as where we started
     if (topElem->fReaderNum != fReaderMgr.getCurrentReaderNum())
         emitError(XMLErrs::PartialTagMarkupError);

     // Skip optional whitespace
     fReaderMgr.skipPastSpaces();

     // Make sure we find the closing bracket
     if (!fReaderMgr.skippedChar(chCloseAngle))
     {
         emitError
         (
             XMLErrs::UnterminatedEndTag
             , topElem->fThisElement->getFullName()
         );
     }

     // If we have a doc handler, tell it about the end tag
     if (fDocHandler)
     {
         fDocHandler->endElement
         (
             *topElem->fThisElement
             , uriId
             , isRoot
             , topElem->fThisElement->getElementName()->getPrefix()
         );
     }

     // If this was the root, then done with content
     gotData = !isRoot;
 }

 void WFXMLScanner::scanDocTypeDecl()
 {
     // Just skips over it
     // REVISIT: Should we issue a warning
     static const XMLCh doctypeIE[] =
     {
         chOpenSquare, chCloseAngle, chNull
     };
     XMLCh nextCh = fReaderMgr.skipUntilIn(doctypeIE);

     if (nextCh == chOpenSquare)
         fReaderMgr.skipPastChar(chCloseSquare);

     fReaderMgr.skipPastChar(chCloseAngle);
 }

 bool WFXMLScanner::scanStartTag(bool& gotData)
 {
     //  Assume we will still have data until proven otherwise. It will only
     //  ever be false if this is the root and its empty.
     gotData = true;

     //  Get the QName. In this case, we are not doing namespaces, so we just
     //  use it as is and don't have to break it into parts.
     if (!fReaderMgr.getName(fQNameBuf))
     {
         emitError(XMLErrs::ExpectedElementName);
         fReaderMgr.skipToChar(chOpenAngle);
         return false;
     }

     // Assume it won't be an empty tag
     bool isEmpty = false;

     // See if its the root element
     const bool isRoot = fElemStack.isEmpty();

     //  Lets try to look up the element
     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
     XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf);

     if (!elemDecl) {

         if (fElementIndex < fElements->size()) {
             elemDecl = fElements->elementAt(fElementIndex);
         }
         else {
             elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
             (
                 fGrammarPoolMemoryManager
             );
             fElements->addElement(elemDecl);
         }

         elemDecl->setElementName(XMLUni::fgZeroLenString, qnameRawBuf, fEmptyNamespaceId);
         fElementLookup->put((void*)elemDecl->getFullName(), elemDecl);
         fElementIndex++;
     }

     // Expand the element stack and add the new element
     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());

     // Skip any whitespace after the name
     fReaderMgr.skipPastSpaces();

     //  We loop until we either see a /> or >, handling attribute/value
     //  pairs until we get there.
     unsigned int    attCount = 0;
     unsigned int    curAttListSize = fAttrList->size();
     while (true)
     {
         // And get the next non-space character
         XMLCh nextCh = fReaderMgr.peekNextChar();

         //  If the next character is not a slash or closed angle bracket,
         //  then it must be whitespace, since whitespace is required
         //  between the end of the last attribute and the name of the next
         //  one.
         if (attCount)
         {
             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
             {
                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                 {
                     // Ok, skip by them and peek another char
                     fReaderMgr.skipPastSpaces();
                     nextCh = fReaderMgr.peekNextChar();
                 }
                  else
                 {
                     // Emit the error but keep on going
                     emitError(XMLErrs::ExpectedWhitespace);
                 }
             }
         }

         //  Ok, here we first check for any of the special case characters.
         //  If its not one, then we do the normal case processing, which
         //  assumes that we've hit an attribute value, Otherwise, we do all
         //  the special case checks.
         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
         {
             //  Assume its going to be an attribute, so get a name from
             //  the input.
             if (!fReaderMgr.getName(fAttNameBuf))
             {
                 emitError(XMLErrs::ExpectedAttrName);
                 fReaderMgr.skipPastChar(chCloseAngle);
                 return false;
             }

             // And next must be an equal sign
             if (!scanEq())
             {
                 static const XMLCh tmpList[] =
                 {
                     chSingleQuote, chDoubleQuote, chCloseAngle
                     , chOpenAngle, chForwardSlash, chNull
                 };

                 emitError(XMLErrs::ExpectedEqSign);

                 //  Try to sync back up by skipping forward until we either
                 //  hit something meaningful.
                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
                 {
                     // Jump back to top for normal processing of these
                     continue;
                 }
                 else if ((chFound == chSingleQuote)
                       ||  (chFound == chDoubleQuote)
                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                 {
                     // Just fall through assuming that the value is to follow
                 }
                 else if (chFound == chOpenAngle)
                 {
                     // Assume a malformed tag and that new one is starting
                     emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                     return false;
                 }
                 else
                 {
                     // Something went really wrong
                     return false;
                 }
             }

             //  See if this attribute is declared more than one for this element.
             const XMLCh* attNameRawBuf = fAttNameBuf.getRawBuffer();
             unsigned int attNameHash = XMLString::hash(attNameRawBuf, 109, fMemoryManager);

             if (attCount) {

                 for (unsigned int k=0; k < attCount; k++) {

                     if (fAttrNameHashList->elementAt(k) == attNameHash) {
                         if (
                                XMLString::equals
                                (
                                    fAttrList->elementAt(k)->getName()
                                    , attNameRawBuf
                                )
                            )
                         {
                             emitError
                             (
                                 XMLErrs::AttrAlreadyUsedInSTag
                                 , attNameRawBuf
                                 , qnameRawBuf
                             );
                             break;
                         }
                     }
                 }
             }

             //  Skip any whitespace before the value and then scan the att
             //  value. This will come back normalized with entity refs and
             //  char refs expanded.
             fReaderMgr.skipPastSpaces();
             if (!scanAttValue(attNameRawBuf, fAttValueBuf))
             {
                 static const XMLCh tmpList[] =
                 {
                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
                 };

                 emitError(XMLErrs::ExpectedAttrValue);

                 //  It failed, so lets try to get synced back up. We skip
                 //  forward until we find some whitespace or one of the
                 //  chars in our list.
                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                 if ((chFound == chCloseAngle)
                 ||  (chFound == chForwardSlash)
                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                 {
                     //  Just fall through and process this attribute, though
                     //  the value will be "".
                 }
                 else if (chFound == chOpenAngle)
                 {
                     // Assume a malformed tag and that new one is starting
                     emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                     return false;
                 }
                 else
                 {
                     // Something went really wrong
                     return false;
                 }
             }

             //  Add this attribute to the attribute list that we use to
             //  pass them to the handler. We reuse its existing elements
             //  but expand it as required.
             XMLAttr* curAtt;
             if (attCount >= curAttListSize)
             {
                 curAtt = new (fMemoryManager) XMLAttr
                 (
                     0
                     , attNameRawBuf
                     , XMLUni::fgZeroLenString
                     , fAttValueBuf.getRawBuffer()
                     , XMLAttDef::CData
                     , true
                     , fMemoryManager
                 );
                 fAttrList->addElement(curAtt);
                 fAttrNameHashList->addElement(attNameHash);
             }
             else
             {
                 curAtt = fAttrList->elementAt(attCount);
                 curAtt->set
                 (
                     0
                     , attNameRawBuf
                     , XMLUni::fgZeroLenString
                     , fAttValueBuf.getRawBuffer()
                 );
                 curAtt->setSpecified(true);
                 fAttrNameHashList->setElementAt(attNameHash, attCount);
             }
             attCount++;

             // And jump back to the top of the loop
             continue;
         }

         //  It was some special case character so do all of the checks and
         //  deal with it.
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         if (nextCh == chForwardSlash)
         {
             fReaderMgr.getNextChar();
             isEmpty = true;
             if (!fReaderMgr.skippedChar(chCloseAngle))
                 emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
             break;
         }
         else if (nextCh == chCloseAngle)
         {
             fReaderMgr.getNextChar();
             break;
         }
         else if (nextCh == chOpenAngle)
         {
             //  Check for this one specially, since its going to be common
             //  and it is kind of auto-recovering since we've already hit the
             //  next open bracket, which is what we would have seeked to (and
             //  skipped this whole tag.)
             emitError(XMLErrs::UnterminatedStartTag, elemDecl->getFullName());
             break;
         }
         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
         {
             //  Check for this one specially, which is probably a missing
             //  attribute name, e.g. ="value". Just issue expected name
             //  error and eat the quoted string, then jump back to the
             //  top again.
             emitError(XMLErrs::ExpectedAttrName);
             fReaderMgr.getNextChar();
             fReaderMgr.skipQuotedString(nextCh);
             fReaderMgr.skipPastSpaces();
             continue;
         }
     }

     //  If empty, validate content right now if we are validating and then
     //  pop the element stack top. Else, we have to update the current stack
     //  top's namespace mapping elements.
     if (isEmpty)
     {
         // Pop the element stack back off since it'll never be used now
         fElemStack.popTop();

         // If the elem stack is empty, then it was an empty root
         if (isRoot)
             gotData = false;
     }

     //  If we have a document handler, then tell it about this start tag. We
     //  don't have any URI id to send along, so send fEmptyNamespaceId. We also do not send
     //  any prefix since its just one big name if we are not doing namespaces.
     if (fDocHandler)
     {
         fDocHandler->startElement
         (
             *elemDecl
             , fEmptyNamespaceId
             , 0
             , *fAttrList
             , attCount
             , isEmpty
             , isRoot
         );
     }

     return true;
 }


 //  This method is called to scan a start tag when we are processing
 //  namespaces. There are two different versions of this method, one for
 //  namespace aware processing an done for non-namespace aware processing.
 //
 //  This method is called after we've scanned the < of a start tag. So we
 //  have to get the element name, then scan the attributes, after which
 //  we are either going to see >, />, or attributes followed by one of those
 //  sequences.
 bool WFXMLScanner::scanStartTagNS(bool& gotData)
 {
     //  Assume we will still have data until proven otherwise. It will only
     //  ever be false if this is the root and its empty.
     gotData = true;

     //  The current position is after the open bracket, so we need to read in
     //  in the element name.
     int colonPosition;
     if (!fReaderMgr.getQName(fQNameBuf, &colonPosition))
     {
         if (fQNameBuf.isEmpty())
             emitError(XMLErrs::ExpectedElementName);
         else
             emitError(XMLErrs::InvalidElementName, fQNameBuf.getRawBuffer());
         fReaderMgr.skipToChar(chOpenAngle);
         return false;
     }

     // See if its the root element
     const bool isRoot = fElemStack.isEmpty();

 	// Assume it won't be an empty tag
     bool isEmpty = false;

     // Skip any whitespace after the name
     fReaderMgr.skipPastSpaces();

     //  Lets try to look up the element
     const XMLCh* qnameRawBuf = fQNameBuf.getRawBuffer();
     XMLElementDecl* elemDecl = fElementLookup->get(qnameRawBuf);

     if (!elemDecl) {
         if (!XMLString::compareNString(qnameRawBuf, XMLUni::fgXMLNSColonString, 6))
             emitError(XMLErrs::NoXMLNSAsElementPrefix, qnameRawBuf);

         if (fElementIndex < fElements->size()) {
             elemDecl = fElements->elementAt(fElementIndex);
         }
         else {
             elemDecl = new (fGrammarPoolMemoryManager) DTDElementDecl
             (
                 fGrammarPoolMemoryManager
             );
             fElements->addElement(elemDecl);
         }

         elemDecl->setElementName(qnameRawBuf, fEmptyNamespaceId);
         fElementLookup->put((void*)elemDecl->getFullName(), elemDecl);
         fElementIndex++;
     }

     // Expand the element stack and add the new element
     fElemStack.addLevel(elemDecl, fReaderMgr.getCurrentReaderNum());

     // reset NS attribute list
     fAttrNSList->removeAllElements();

     // We loop until we either see a /> or >, handling attribute/value
     // pairs until we get there.
     unsigned int attCount = 0;
     unsigned int curAttListSize = fAttrList->size();
     while (true)
     {
         // And get the next non-space character
         XMLCh nextCh = fReaderMgr.peekNextChar();

         //  If the next character is not a slash or closed angle bracket,
         //  then it must be whitespace, since whitespace is required
         //  between the end of the last attribute and the name of the next
         //  one.
         if (attCount)
         {
             if ((nextCh != chForwardSlash) && (nextCh != chCloseAngle))
             {
                 if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                 {
                     // Ok, skip by them and peek another char
                     fReaderMgr.skipPastSpaces();
                     nextCh = fReaderMgr.peekNextChar();
                 }
                 else
                 {
                     // Emit the error but keep on going
                     emitError(XMLErrs::ExpectedWhitespace);
                 }
             }
         }

         //  Ok, here we first check for any of the special case characters.
         //  If its not one, then we do the normal case processing, which
         //  assumes that we've hit an attribute value, Otherwise, we do all
         //  the special case checks.
         if (!fReaderMgr.getCurrentReader()->isSpecialStartTagChar(nextCh))
         {
             //  Assume its going to be an attribute, so get a name from
             //  the input.
             int colonPosition;
             if (!fReaderMgr.getQName(fAttNameBuf, &colonPosition))
             {
                 if (fAttNameBuf.isEmpty())
                     emitError(XMLErrs::ExpectedAttrName);
                 else
                     emitError(XMLErrs::InvalidAttrName, fAttNameBuf.getRawBuffer());
                 fReaderMgr.skipPastChar(chCloseAngle);
                 return false;
             }

             // And next must be an equal sign
             if (!scanEq())
             {
                 static const XMLCh tmpList[] =
                 {
                     chSingleQuote, chDoubleQuote, chCloseAngle
                     , chOpenAngle, chForwardSlash, chNull
                 };

                 emitError(XMLErrs::ExpectedEqSign);

                 //  Try to sync back up by skipping forward until we either
                 //  hit something meaningful.
                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                 if ((chFound == chCloseAngle) || (chFound == chForwardSlash))
                 {
                     // Jump back to top for normal processing of these
                     continue;
                 }
                 else if ((chFound == chSingleQuote)
                       ||  (chFound == chDoubleQuote)
                       ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                 {
                     // Just fall through assuming that the value is to follow
                 }
                 else if (chFound == chOpenAngle)
                 {
                     // Assume a malformed tag and that new one is starting
                     emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                     return false;
                 }
                 else
                 {
                     // Something went really wrong
                     return false;
                 }
             }

             //  See if this attribute is declared more than one for this element.
             const XMLCh* attNameRawBuf = fAttNameBuf.getRawBuffer();
             unsigned int attNameHash = XMLString::hash(attNameRawBuf, 109, fMemoryManager);
             if (attCount) {

                 for (unsigned int k=0; k < attCount; k++) {

                     if (fAttrNameHashList->elementAt(k) == attNameHash) {
                         if (XMLString::equals(
                                 fAttrList->elementAt(k)->getQName()
                                 , attNameRawBuf))
                         {
                             emitError
                             (
                                 XMLErrs::AttrAlreadyUsedInSTag
                                 , attNameRawBuf
                                 , qnameRawBuf
                             );
                             break;
                         }
                     }
                 }
             }

             //  Skip any whitespace before the value and then scan the att
             //  value. This will come back normalized with entity refs and
             //  char refs expanded.
             fReaderMgr.skipPastSpaces();
             if (!scanAttValue(attNameRawBuf, fAttValueBuf))
             {
                 static const XMLCh tmpList[] =
                 {
                     chCloseAngle, chOpenAngle, chForwardSlash, chNull
                 };

                 emitError(XMLErrs::ExpectedAttrValue);

                 //  It failed, so lets try to get synced back up. We skip
                 //  forward until we find some whitespace or one of the
                 //  chars in our list.
                 const XMLCh chFound = fReaderMgr.skipUntilInOrWS(tmpList);

                 if ((chFound == chCloseAngle)
                 ||  (chFound == chForwardSlash)
                 ||  fReaderMgr.getCurrentReader()->isWhitespace(chFound))
                 {
                     //  Just fall through and process this attribute, though
                     //  the value will be "".
                 }
                 else if (chFound == chOpenAngle)
                 {
                     // Assume a malformed tag and that new one is starting
                     emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
                     return false;
                 }
                 else
                 {
                     // Something went really wrong
                     return false;
                 }
             }

             //  Add this attribute to the attribute list that we use to
             //  pass them to the handler. We reuse its existing elements
             //  but expand it as required.
             const XMLCh* attValueRawBuf = fAttValueBuf.getRawBuffer();
             XMLAttr* curAtt = 0;
             if (attCount >= curAttListSize)
             {
                 curAtt = new (fMemoryManager) XMLAttr
                 (
                     fEmptyNamespaceId
                     , attNameRawBuf
                     , attValueRawBuf
                     , XMLAttDef::CData
                     , true
                     , fMemoryManager
                 );
                 fAttrList->addElement(curAtt);
                 fAttrNameHashList->addElement(attNameHash);
             }
             else
             {
                 curAtt = fAttrList->elementAt(attCount);
                 curAtt->set
                 (
                     fEmptyNamespaceId
                     , attNameRawBuf
                     , attValueRawBuf
                 );
                 curAtt->setSpecified(true);
                 fAttrNameHashList->setElementAt(attNameHash, attCount);
             }

             // Map prefix to namespace
             const XMLCh* attPrefix = curAtt->getPrefix();
             const XMLCh* attLocalName = curAtt->getName();
             const XMLCh* namespaceURI = fAttValueBuf.getRawBuffer();

             if (attPrefix && *attPrefix) {
                 if (XMLString::equals(attPrefix, XMLUni::fgXMLString)) {
                     curAtt->setURIId(fXMLNamespaceId);
                 }
                 else if (XMLString::equals(attPrefix, XMLUni::fgXMLNSString)) {

                     if (XMLString::equals(attLocalName, XMLUni::fgXMLNSString))
                         emitError(XMLErrs::NoUseOfxmlnsAsPrefix);
                     else if (XMLString::equals(attLocalName, XMLUni::fgXMLString)) {
                         if (!XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
                             emitError(XMLErrs::PrefixXMLNotMatchXMLURI);
                     }

                     if (!namespaceURI)
                         emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);
                     else if(!*namespaceURI && fXMLVersion == XMLReader::XMLV1_0)
                         emitError(XMLErrs::NoEmptyStrNamespace, attNameRawBuf);

                     fElemStack.addPrefix
                     (
                         attLocalName
                         , fURIStringPool->addOrFind(namespaceURI)
                     );
                     curAtt->setURIId(fXMLNSNamespaceId);
                 }
                 else {
                     fAttrNSList->addElement(curAtt);
                 }
             }
             else {
                 if (XMLString::equals(XMLUni::fgXMLNSString, attLocalName)) {

                     if (XMLString::equals(namespaceURI, XMLUni::fgXMLNSURIName))
                         emitError(XMLErrs::NoUseOfxmlnsURI);
                     else if (XMLString::equals(namespaceURI, XMLUni::fgXMLURIName))
                         emitError(XMLErrs::XMLURINotMatchXMLPrefix);

                     fElemStack.addPrefix
                     (
                         XMLUni::fgZeroLenString
                         , fURIStringPool->addOrFind(namespaceURI)
                     );
                 }
             }

             // increment attribute count
             attCount++;

             // And jump back to the top of the loop
             continue;
         }

         //  It was some special case character so do all of the checks and
         //  deal with it.
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         if (nextCh == chForwardSlash)
         {
             fReaderMgr.getNextChar();
             isEmpty = true;
             if (!fReaderMgr.skippedChar(chCloseAngle))
                 emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
             break;
         }
         else if (nextCh == chCloseAngle)
         {
             fReaderMgr.getNextChar();
             break;
         }
         else if (nextCh == chOpenAngle)
         {
             //  Check for this one specially, since its going to be common
             //  and it is kind of auto-recovering since we've already hit the
             //  next open bracket, which is what we would have seeked to (and
             //  skipped this whole tag.)
             emitError(XMLErrs::UnterminatedStartTag, qnameRawBuf);
             break;
         }
         else if ((nextCh == chSingleQuote) || (nextCh == chDoubleQuote))
         {
             //  Check for this one specially, which is probably a missing
             //  attribute name, e.g. ="value". Just issue expected name
             //  error and eat the quoted string, then jump back to the
             //  top again.
             emitError(XMLErrs::ExpectedAttrName);
             fReaderMgr.getNextChar();
             fReaderMgr.skipQuotedString(nextCh);
             fReaderMgr.skipPastSpaces();
             continue;
         }
     }

     // Handle provided attributes that we did not map their prefixes
     for (unsigned int i=0; i < fAttrNSList->size(); i++) {

         XMLAttr* providedAttr = fAttrNSList->elementAt(i);

         providedAttr->setURIId
         (
 	        resolvePrefix
             (
                 providedAttr->getPrefix(),
                 ElemStack::Mode_Attribute
             )
         );
     }

     if(attCount) {

         //
         // Decide if to use hash table to do duplicate checking
         //
         bool toUseHashTable = false;
         setAttrDupChkRegistry(attCount, toUseHashTable);

         // check for duplicate namespace attributes:
         // by checking for qualified names with the same local part and with prefixes
         // which have been bound to namespace names that are identical.
         XMLAttr* loopAttr;
         XMLAttr* curAtt;
         for (unsigned int attrIndex=0; attrIndex < attCount-1; attrIndex++) {
             loopAttr = fAttrList->elementAt(attrIndex);

             if (!toUseHashTable)
             {
                 for (unsigned int curAttrIndex = attrIndex+1; curAttrIndex < attCount; curAttrIndex++) {
                     curAtt = fAttrList->elementAt(curAttrIndex);
                     if (curAtt->getURIId() == loopAttr->getURIId() &&
                         XMLString::equals(curAtt->getName(), loopAttr->getName())) {
                         emitError
                             (
                             XMLErrs::AttrAlreadyUsedInSTag
                             , curAtt->getName()
                             , elemDecl->getFullName()
                             );
                     }
                 }
             }
             else
             {
                 if (fAttrDupChkRegistry->containsKey((void*)loopAttr->getName(), loopAttr->getURIId()))
                 {
                     emitError
                     (
                     XMLErrs::AttrAlreadyUsedInSTag
                     , loopAttr->getName()
                     , elemDecl->getFullName()
                     );
                 }

                 fAttrDupChkRegistry->put((void*)loopAttr->getName(), loopAttr->getURIId(), loopAttr);
             }
         }
     }

     // Resolve the qualified name to a URI.
     unsigned int uriId = resolvePrefix
     (
         elemDecl->getElementName()->getPrefix()
         , ElemStack::Mode_Element
     );

     // Now we can update the element stack
     fElemStack.setCurrentURI(uriId);

     // Tell the document handler about this start tag
     if (fDocHandler)
     {
         fDocHandler->startElement
         (
             *elemDecl
             , uriId
             , elemDecl->getElementName()->getPrefix()
             , *fAttrList
             , attCount
             , isEmpty
             , isRoot
         );
     }

     //  If empty, validate content right now if we are validating and then
     //  pop the element stack top. Else, we have to update the current stack
     //  top's namespace mapping elements.
     if (isEmpty)
     {
         // Pop the element stack back off since it'll never be used now
         fElemStack.popTop();

         // If the elem stack is empty, then it was an empty root
         if (isRoot)
             gotData = false;
     }

     return true;
 }

 unsigned int
 WFXMLScanner::resolveQName(const   XMLCh* const qName
                            ,       XMLBuffer&   prefixBuf
                            , const short        mode
                            ,       int&         prefixColonPos)
 {
     //  Lets split out the qName into a URI and name buffer first. The URI
     //  can be empty.
     prefixColonPos = XMLString::indexOf(qName, chColon);
     if (prefixColonPos == -1)
     {
         //  Its all name with no prefix, so put the whole thing into the name
         //  buffer. Then map the empty string to a URI, since the empty string
         //  represents the default namespace. This will either return some
         //  explicit URI which the default namespace is mapped to, or the
         //  the default global namespace.
         bool unknown = false;

         prefixBuf.reset();
         return fElemStack.mapPrefixToURI(XMLUni::fgZeroLenString, (ElemStack::MapModes) mode, unknown);
     }
     else
     {
         //  Copy the chars up to but not including the colon into the prefix
         //  buffer.
         prefixBuf.set(qName, prefixColonPos);

         //  Watch for the special namespace prefixes. We always map these to
         //  special URIs. 'xml' gets mapped to the official URI that its defined
         //  to map to by the NS spec. xmlns gets mapped to a special place holder
         //  URI that we define (so that it maps to something checkable.)
         const XMLCh* prefixRawBuf = prefixBuf.getRawBuffer();
         if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLNSString)) {

             // if this is an element, it is an error to have xmlns as prefix
             if (mode == ElemStack::Mode_Element)
                 emitError(XMLErrs::NoXMLNSAsElementPrefix, qName);

             return fXMLNSNamespaceId;
         }
         else if (XMLString::equals(prefixRawBuf, XMLUni::fgXMLString)) {
             return  fXMLNamespaceId;
         }
         else
         {
             bool unknown = false;
             unsigned int uriId = fElemStack.mapPrefixToURI(prefixRawBuf, (ElemStack::MapModes) mode, unknown);

             if (unknown)
                 emitError(XMLErrs::UnknownPrefix, prefixRawBuf);

             return uriId;
         }
     }
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Private parsing methods
 // ---------------------------------------------------------------------------
 bool WFXMLScanner::scanAttValue(const XMLCh* const attrName
                               ,     XMLBuffer&   toFill)
 {
     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr.skipIfQuote(quoteCh))
         return false;

     //  We have to get the current reader because we have to ignore closing
     //  quotes until we hit the same reader again.
     const unsigned int curReader = fReaderMgr.getCurrentReaderNum();

     //  Loop until we get the attribute value. Note that we use a double
     //  loop here to avoid the setup/teardown overhead of the exception
     //  handler on every round.
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     bool    gotLeadingSurrogate = false;
     bool    escaped;
     while (true)
     {
     try
     {
         while(true)
         {
             nextCh = fReaderMgr.getNextChar();

             if (!nextCh)
                 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

             // Check for our ending quote in the same entity
             if (nextCh == quoteCh)
             {
                 if (curReader == fReaderMgr.getCurrentReaderNum())
                     return true;

                 // Watch for spillover into a previous entity
                 if (curReader > fReaderMgr.getCurrentReaderNum())
                 {
                     emitError(XMLErrs::PartialMarkupInEntity);
                     return false;
                 }
             }

             //  Check for an entity ref now, before we let it affect our
             //  whitespace normalization logic below. We ignore the empty flag
             //  in this one.
             escaped = false;
             if (nextCh == chAmpersand)
             {
                 if (scanEntityRef(true, nextCh, secondCh, escaped) != EntityExp_Returned)
                 {
                     gotLeadingSurrogate = false;
                     continue;
                 }
             }
             else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 // Deal with surrogate pairs
                 //  Its a leading surrogate. If we already got one, then
                 //  issue an error, else set leading flag to make sure that
                 //  we look for a trailing next time.
                 if (gotLeadingSurrogate)
                 {
                     emitError(XMLErrs::Expected2ndSurrogateChar);
                 }
                 else
                     gotLeadingSurrogate = true;
             }
             else
             {
                 //  If its a trailing surrogate, make sure that we are
                 //  prepared for that. Else, its just a regular char so make
                 //  sure that we were not expected a trailing surrogate.
                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                 {
                     // Its trailing, so make sure we were expecting it
                     if (!gotLeadingSurrogate)
                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
                 }
                 else
                 {
                     //  Its just a char, so make sure we were not expecting a
                     //  trailing surrogate.
                     if (gotLeadingSurrogate) {
                         emitError(XMLErrs::Expected2ndSurrogateChar);
                     }
                     // Its got to at least be a valid XML character
                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                     {
                         XMLCh tmpBuf[9];
                         XMLString::binToText
                         (
                             nextCh
                             , tmpBuf
                             , 8
                             , 16
                             , fMemoryManager
                         );
                         emitError(XMLErrs::InvalidCharacterInAttrValue, attrName, tmpBuf);
                     }
                 }
                 gotLeadingSurrogate = false;
             }

             //  If its not escaped, then make sure its not a < character, which
             //  is not allowed in attribute values.
             if (!escaped) {
                 if (nextCh == chOpenAngle)
                     emitError(XMLErrs::BracketInAttrValue, attrName);
                 else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
                     nextCh = chSpace;
             }

             // Else add it to the buffer
             toFill.append(nextCh);

             if (secondCh)
             {
                 toFill.append(secondCh);
                 secondCh=0;
             }
         }
     }
     catch(const EndOfEntityException&)
     {
         // Just eat it and continue.
         gotLeadingSurrogate = false;
         escaped = false;
     }
     }
     return true;
 }


 //  This method scans a CDATA section. It collects the character into one
 //  of the temp buffers and calls the document handler, if any, with the
 //  characters. It assumes that the <![CDATA string has been scanned before
 //  this call.
 void WFXMLScanner::scanCDSection()
 {
     static const XMLCh CDataClose[] =
     {
             chCloseSquare, chCloseAngle, chNull
     };

     //  The next character should be the opening square bracket. If not
     //  issue an error, but then try to recover by skipping any whitespace
     //  and checking again.
     if (!fReaderMgr.skippedChar(chOpenSquare))
     {
         emitError(XMLErrs::ExpectedOpenSquareBracket);
         fReaderMgr.skipPastSpaces();

         // If we still don't find it, then give up, else keep going
         if (!fReaderMgr.skippedChar(chOpenSquare))
             return;
     }

     // Get a buffer for this
     XMLBufBid bbCData(&fBufMgr);

     //  We just scan forward until we hit the end of CDATA section sequence.
     //  CDATA is effectively a big escape mechanism so we don't treat markup
     //  characters specially here.
     bool            emittedError = false;
     bool    gotLeadingSurrogate = false;
     while (true)
     {
         const XMLCh nextCh = fReaderMgr.getNextChar();

         // Watch for unexpected end of file
         if (!nextCh)
         {
             emitError(XMLErrs::UnterminatedCDATASection);
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
         }

         //  If this is a close square bracket it could be our closing
         //  sequence.
         if (nextCh == chCloseSquare && fReaderMgr.skippedString(CDataClose))
         {
             //  make sure we were not expecting a trailing surrogate.
             if (gotLeadingSurrogate)
                 emitError(XMLErrs::Expected2ndSurrogateChar);

             // If we have a doc handler, call it
             if (fDocHandler)
             {
                 fDocHandler->docCharacters
                 (
                     bbCData.getRawBuffer()
                     , bbCData.getLen()
                     , true
                 );
             }

             // And we are done
             break;
         }

         //  Make sure its a valid character. But if we've emitted an error
         //  already, don't bother with the overhead since we've already told
         //  them about it.
         if (!emittedError)
         {
             // Deal with surrogate pairs
             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 //  Its a leading surrogate. If we already got one, then
                 //  issue an error, else set leading flag to make sure that
                 //  we look for a trailing next time.
                 if (gotLeadingSurrogate)
                     emitError(XMLErrs::Expected2ndSurrogateChar);
                 else
                     gotLeadingSurrogate = true;
             }
             else
             {
                 //  If its a trailing surrogate, make sure that we are
                 //  prepared for that. Else, its just a regular char so make
                 //  sure that we were not expected a trailing surrogate.
                 if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                 {
                     // Its trailing, so make sure we were expecting it
                     if (!gotLeadingSurrogate)
                         emitError(XMLErrs::Unexpected2ndSurrogateChar);
                 }
                 else
                 {
                     //  Its just a char, so make sure we were not expecting a
                     //  trailing surrogate.
                     if (gotLeadingSurrogate)
                         emitError(XMLErrs::Expected2ndSurrogateChar);

                     // Its got to at least be a valid XML character
                     else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                     {
                         XMLCh tmpBuf[9];
                         XMLString::binToText
                         (
                             nextCh
                             , tmpBuf
                             , 8
                             , 16
                             , fMemoryManager
                         );
                         emitError(XMLErrs::InvalidCharacter, tmpBuf);
                         emittedError = true;
                     }
                 }
                 gotLeadingSurrogate = false;
             }
         }

         // Add it to the buffer
         bbCData.append(nextCh);
     }
 }


 void WFXMLScanner::scanCharData(XMLBuffer& toUse)
 {
     //  We have to watch for the stupid ]]> sequence, which is illegal in
     //  character data. So this is a little state machine that handles that.
     enum States
     {
         State_Waiting
         , State_GotOne
         , State_GotTwo
     };

     // Reset the buffer before we start
     toUse.reset();

     // Turn on the 'throw at end' flag of the reader manager
     ThrowEOEJanitor jan(&fReaderMgr, true);

     //  In order to be more efficient we have to use kind of a deeply nested
     //  set of blocks here. The outer block puts on a try and catches end of
     //  entity exceptions. The inner loop is the per-character loop. If we
     //  put the try inside the inner loop, it would work but would require
     //  the exception handling code setup/teardown code to be invoked for
     //  each character.
     XMLCh   nextCh;
     XMLCh   secondCh = 0;
     States  curState = State_Waiting;
     bool    escaped = false;
     bool    gotLeadingSurrogate = false;
     bool    notDone = true;
     while (notDone)
     {
         try
         {
             while (true)
             {
                 //  Eat through as many plain content characters as possible without
                 //  needing special handling.  Moving most content characters here,
                 //  in this one call, rather than running the overall loop once
                 //  per content character, is a speed optimization.
                 if (curState == State_Waiting  &&  !gotLeadingSurrogate)
                 {
                      fReaderMgr.movePlainContentChars(toUse);
                 }

                 // Try to get another char from the source
                 //   The code from here on down covers all contengencies,
                 if (!fReaderMgr.getNextCharIfNot(chOpenAngle, nextCh))
                 {
                     // If we were waiting for a trailing surrogate, its an error
                     if (gotLeadingSurrogate)
                         emitError(XMLErrs::Expected2ndSurrogateChar);

                     notDone = false;
                     break;
                 }

                 //  Watch for a reference. Note that the escapement mechanism
                 //  is ignored in this content.
                 escaped = false;
                 if (nextCh == chAmpersand)
                 {
                     sendCharData(toUse);

                     // Turn off the throwing at the end of entity during this
                     ThrowEOEJanitor jan(&fReaderMgr, false);

                     if (scanEntityRef(false, nextCh, secondCh, escaped) != EntityExp_Returned)
                     {
                         gotLeadingSurrogate = false;
                         continue;
                     }
                 }
                 else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
                 {
                     // Deal with surrogate pairs
                     //  Its a leading surrogate. If we already got one, then
                     //  issue an error, else set leading flag to make sure that
                     //  we look for a trailing next time.
                     if (gotLeadingSurrogate)
                     {
                         emitError(XMLErrs::Expected2ndSurrogateChar);
                     }
                     else
                         gotLeadingSurrogate = true;
                 }
                 else
                 {
                     //  If its a trailing surrogate, make sure that we are
                     //  prepared for that. Else, its just a regular char so make
                     //  sure that we were not expected a trailing surrogate.
                     if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF))
                     {
                         // Its trailing, so make sure we were expecting it
                         if (!gotLeadingSurrogate)
                             emitError(XMLErrs::Unexpected2ndSurrogateChar);
                     }
                     else
                     {
                         //  Its just a char, so make sure we were not expecting a
                         //  trailing surrogate.
                         if (gotLeadingSurrogate) {
                             emitError(XMLErrs::Expected2ndSurrogateChar);
                         }
                         // Its got to at least be a valid XML character
                         else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh))
                         {
                             XMLCh tmpBuf[9];
                             XMLString::binToText
                             (
                                 nextCh
                                 , tmpBuf
                                 , 8
                                 , 16
                                 , fMemoryManager
                             );
                             emitError(XMLErrs::InvalidCharacter, tmpBuf);
                         }
                     }
                     gotLeadingSurrogate = false;
                 }

                 // Keep the state machine up to date
                 if (!escaped)
                 {
                     if (nextCh == chCloseSquare)
                     {
                         if (curState == State_Waiting)
                             curState = State_GotOne;
                         else if (curState == State_GotOne)
                             curState = State_GotTwo;
                     }
                     else if (nextCh == chCloseAngle)
                     {
                         if (curState == State_GotTwo)
                             emitError(XMLErrs::BadSequenceInCharData);
                         curState = State_Waiting;
                     }
                     else
                     {
                         curState = State_Waiting;
                     }
                 }
                 else
                 {
                     curState = State_Waiting;
                 }

                 // Add this char to the buffer
                 toUse.append(nextCh);

                 if (secondCh)
                 {
                     toUse.append(secondCh);
                     secondCh=0;
                 }
             }
         }
         catch(const EndOfEntityException& toCatch)
         {
             //  Some entity ended, so we have to send any accumulated
             //  chars and send an end of entity event.
             sendCharData(toUse);
             gotLeadingSurrogate = false;

             if (fDocHandler)
                 fDocHandler->endEntityReference(toCatch.getEntity());
         }
     }

     // Send any char data that we accumulated into the buffer
     sendCharData(toUse);
 }

 InputSource* WFXMLScanner::resolveSystemId(const XMLCh* const /*sysId*/
                                           ,const XMLCh* const /*pubId*/)
 {
     return 0;
 }

 //  This method will scan a general/character entity ref. It will either
 //  expand a char ref and return it directly, or push a reader for a general
 //  entity.
 //
 //  The return value indicates whether the char parameters hold the value
 //  or whether the value was pushed as a reader, or that it failed.
 //
 //  The escaped flag tells the caller whether the returned parameter resulted
 //  from a character reference, which escapes the character in some cases. It
 //  only makes any difference if the return value indicates the value was
 //  returned directly.
 XMLScanner::EntityExpRes
 WFXMLScanner::scanEntityRef(const bool
                             ,     XMLCh&  firstCh
                             ,     XMLCh&  secondCh
                             ,     bool&   escaped)
 {
     // Assume no escape
     secondCh = 0;
     escaped = false;

     // We have to insure that its all in one entity
     const unsigned int curReader = fReaderMgr.getCurrentReaderNum();

     //  If the next char is a pound, then its a character reference and we
     //  need to expand it always.
     if (fReaderMgr.skippedChar(chPound))
     {
         //  Its a character reference, so scan it and get back the numeric
         //  value it represents.
         if (!scanCharRef(firstCh, secondCh))
             return EntityExp_Failed;

         escaped = true;

         if (curReader != fReaderMgr.getCurrentReaderNum())
             emitError(XMLErrs::PartialMarkupInEntity);

         return EntityExp_Returned;
     }

     // Expand it since its a normal entity ref
     XMLBufBid bbName(&fBufMgr);
     if (!fReaderMgr.getName(bbName.getBuffer()))
     {
         emitError(XMLErrs::ExpectedEntityRefName);
         return EntityExp_Failed;
     }

     //  Next char must be a semi-colon. But if its not, just emit
     //  an error and try to continue.
     if (!fReaderMgr.skippedChar(chSemiColon))
         emitError(XMLErrs::UnterminatedEntityRef, bbName.getRawBuffer());

     // Make sure we ended up on the same entity reader as the & char
     if (curReader != fReaderMgr.getCurrentReaderNum())
         emitError(XMLErrs::PartialMarkupInEntity);

     // Look up the name in the general entity pool
     // If it does not exist, then obviously an error
     if (!fEntityTable->containsKey(bbName.getRawBuffer()))
     {
         // XML 1.0 Section 4.1
         // Well-formedness Constraint for entity not found:
         //   In a document without any DTD, a document with only an internal DTD subset which contains no parameter entity references,
         //      or a document with "standalone='yes'", for an entity reference that does not occur within the external subset
         //      or a parameter entity
         if (fStandalone || fHasNoDTD)
             emitError(XMLErrs::EntityNotFound, bbName.getRawBuffer());

         return EntityExp_Failed;
     }

     // here's where we need to check if there's a SecurityManager,
     // how many entity references we've had
     if(fSecurityManager != 0 && ++fEntityExpansionCount > fEntityExpansionLimit) {
         XMLCh expLimStr[16];
         XMLString::binToText(fEntityExpansionLimit, expLimStr, 15, 10, fMemoryManager);
         emitError
         (
             XMLErrs::EntityExpansionLimitExceeded
             , expLimStr
         );
         // there seems nothing better to be done than to reset the entity expansion counter
         fEntityExpansionCount = 0;
     }

     firstCh = fEntityTable->get(bbName.getRawBuffer());
     escaped = true;
     return EntityExp_Returned;
 }

 // ---------------------------------------------------------------------------
 //  WFXMLScanner: Grammar preparsing
 // ---------------------------------------------------------------------------
 Grammar* WFXMLScanner::loadGrammar(const   InputSource&
                                    , const short
                                    , const bool)
 {
     // REVISIT: emit a warning or throw an exception
     return 0;
 }


 XERCES_CPP_NAMESPACE_END