src/xercesc/internal/XMLScanner.cpp - platform/external/xerces-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * $Id: XMLScanner.cpp 568078 2007-08-21 11:43:25Z amassari $
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/internal/XMLScanner.hpp>
 #include <xercesc/internal/ValidationContextImpl.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/Mutexes.hpp>
 #include <xercesc/util/RuntimeException.hpp>
 #include <xercesc/util/UnexpectedEOFException.hpp>
 #include <xercesc/util/XMLMsgLoader.hpp>
 #include <xercesc/util/XMLRegisterCleanup.hpp>
 #include <xercesc/util/XMLInitializer.hpp>
 #include <xercesc/framework/LocalFileInputSource.hpp>
 #include <xercesc/framework/URLInputSource.hpp>
 #include <xercesc/framework/XMLDocumentHandler.hpp>
 #include <xercesc/framework/XMLEntityHandler.hpp>
 #include <xercesc/framework/XMLPScanToken.hpp>
 #include <xercesc/framework/XMLValidator.hpp>
 #include <xercesc/internal/EndOfEntityException.hpp>
 #include <xercesc/validators/DTD/DocTypeHandler.hpp>
 #include <xercesc/validators/common/GrammarResolver.hpp>
 #include <xercesc/util/OutOfMemoryException.hpp>
 #include <xercesc/util/XMLResourceIdentifier.hpp>

 XERCES_CPP_NAMESPACE_BEGIN

 // ---------------------------------------------------------------------------
 //  Local static data
 // ---------------------------------------------------------------------------
 static XMLUInt32       gScannerId;
 static bool            sRegistered = false;

 static XMLMutex*       sScannerMutex = 0;
 static XMLRegisterCleanup scannerMutexCleanup;

 static XMLMsgLoader*   gMsgLoader = 0;
 static XMLRegisterCleanup cleanupMsgLoader;


 // ---------------------------------------------------------------------------
 //  Local, static functions
 // ---------------------------------------------------------------------------

 //  Cleanup for the message loader
 void XMLScanner::reinitMsgLoader()
 {
 	delete gMsgLoader;
 	gMsgLoader = 0;
 }

 //  Cleanup for the scanner mutex
 void XMLScanner::reinitScannerMutex()
 {
     delete sScannerMutex;
     sScannerMutex = 0;
     sRegistered = false;
 }

 //
 //  We need to fault in this mutex. But, since its used for synchronization
 //  itself, we have to do this the low level way using a compare and swap.
 //
 static XMLMutex& gScannerMutex()
 {
     if (!sRegistered)
     {
         XMLMutexLock lockInit(XMLPlatformUtils::fgAtomicMutex);

         if (!sRegistered)
         {
             sScannerMutex = new XMLMutex(XMLPlatformUtils::fgMemoryManager);
             scannerMutexCleanup.registerCleanup(XMLScanner::reinitScannerMutex);
             sRegistered = true;
         }
     }
     return *sScannerMutex;
 }

 static XMLMsgLoader& gScannerMsgLoader()
 {
     if (!gMsgLoader)
     {
         XMLMutexLock lockInit(&gScannerMutex());

         // If we haven't loaded our message yet, then do that
         if (!gMsgLoader)
         {
             gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);
             if (!gMsgLoader)
                 XMLPlatformUtils::panic(PanicHandler::Panic_CantLoadMsgDomain);

             // Register this object to be cleaned up at termination
             cleanupMsgLoader.registerCleanup(XMLScanner::reinitMsgLoader);
         }
     }

     return *gMsgLoader;
 }

 void XMLInitializer::initializeScannerMsgLoader()
 {
     gMsgLoader = XMLPlatformUtils::loadMsgSet(XMLUni::fgXMLErrDomain);

     // Register this object to be cleaned up at termination
     if (gMsgLoader) {
         cleanupMsgLoader.registerCleanup(XMLScanner::reinitMsgLoader);
     }

     sScannerMutex = new XMLMutex(XMLPlatformUtils::fgMemoryManager);
     if (sScannerMutex) {
         scannerMutexCleanup.registerCleanup(XMLScanner::reinitScannerMutex);
         sRegistered = true;
     }
 }


 typedef JanitorMemFunCall<XMLScanner>   CleanupType;
 typedef JanitorMemFunCall<ReaderMgr>    ReaderMgrResetType;


 // ---------------------------------------------------------------------------
 //  XMLScanner: Constructors and Destructor
 // ---------------------------------------------------------------------------
 XMLScanner::XMLScanner(XMLValidator* const valToAdopt,
                        GrammarResolver* const grammarResolver,
                        MemoryManager* const manager)
     : fBufferSize(1024 * 1024)
     , fStandardUriConformant(false)
     , fCalculateSrcOfs(false)
     , fDoNamespaces(false)
     , fExitOnFirstFatal(true)
     , fValidationConstraintFatal(false)
     , fInException(false)
     , fStandalone(false)
     , fHasNoDTD(true)
     , fValidate(false)
     , fValidatorFromUser(false)
     , fDoSchema(false)
     , fSchemaFullChecking(false)
     , fIdentityConstraintChecking(true)
     , fToCacheGrammar(false)
     , fUseCachedGrammar(false)
     , fLoadExternalDTD(true)
     , fNormalizeData(true)
     , fGenerateSyntheticAnnotations(false)
     , fValidateAnnotations(false)
     , fIgnoreCachedDTD(false)
     , fIgnoreAnnotations(false)
     , fDisableDefaultEntityResolution(false)
     , fSkipDTDValidation(false)
     , fErrorCount(0)
     , fEntityExpansionLimit(0)
     , fEntityExpansionCount(0)
     , fEmptyNamespaceId(0)
     , fUnknownNamespaceId(0)
     , fXMLNamespaceId(0)
     , fXMLNSNamespaceId(0)
     , fSchemaNamespaceId(0)
     , fUIntPool(0)
     , fUIntPoolRow(0)
     , fUIntPoolCol(0)
     , fUIntPoolRowTotal(2)
     , fScannerId(0)
     , fSequenceId(0)
     , fAttrList(0)
     , fAttrDupChkRegistry(0)
     , fDocHandler(0)
     , fDocTypeHandler(0)
     , fEntityHandler(0)
     , fErrorReporter(0)
     , fErrorHandler(0)
     , fPSVIHandler(0)
     , fValidationContext(0)
     , fEntityDeclPoolRetrieved(false)
     , fReaderMgr(manager)
     , fValidator(valToAdopt)
     , fValScheme(Val_Never)
     , fGrammarResolver(grammarResolver)
     , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
     , fGrammar(0)
     , fRootGrammar(0)
     , fURIStringPool(0)
     , fRootElemName(0)
     , fExternalSchemaLocation(0)
     , fExternalNoNamespaceSchemaLocation(0)
     , fSecurityManager(0)
     , fXMLVersion(XMLReader::XMLV1_0)
     , fMemoryManager(manager)
     , fBufMgr(manager)
     , fAttNameBuf(1023, manager)
     , fAttValueBuf(1023, manager)
     , fCDataBuf(1023, manager)
     , fQNameBuf(1023, manager)
     , fPrefixBuf(1023, manager)
     , fURIBuf(1023, manager)
     , fWSNormalizeBuf(1023, manager)
     , fElemStack(manager)
 {
     CleanupType cleanup(this, &XMLScanner::cleanUp);

     try
     {
         commonInit();
     }
     catch(const OutOfMemoryException&)
     {
         // Don't cleanup when out of memory, since executing the
         // code can cause problems.
         cleanup.release();

         throw;
     }

     cleanup.release();
 }

 XMLScanner::XMLScanner( XMLDocumentHandler* const  docHandler
                           , DocTypeHandler* const    docTypeHandler
                           , XMLEntityHandler* const  entityHandler
                           , XMLErrorReporter* const  errHandler
                           , XMLValidator* const      valToAdopt
                           , GrammarResolver* const   grammarResolver
                           , MemoryManager* const     manager)

     : fBufferSize(1024 * 1024)
     , fStandardUriConformant(false)
     , fCalculateSrcOfs(false)
     , fDoNamespaces(false)
     , fExitOnFirstFatal(true)
     , fValidationConstraintFatal(false)
     , fInException(false)
     , fStandalone(false)
     , fHasNoDTD(true)
     , fValidate(false)
     , fValidatorFromUser(false)
     , fDoSchema(false)
     , fSchemaFullChecking(false)
     , fIdentityConstraintChecking(true)
     , fToCacheGrammar(false)
     , fUseCachedGrammar(false)
 	, fLoadExternalDTD(true)
     , fNormalizeData(true)
     , fGenerateSyntheticAnnotations(false)
     , fValidateAnnotations(false)
     , fIgnoreCachedDTD(false)
     , fIgnoreAnnotations(false)
     , fDisableDefaultEntityResolution(false)
     , fSkipDTDValidation(false)
     , fErrorCount(0)
     , fEntityExpansionLimit(0)
     , fEntityExpansionCount(0)
     , fEmptyNamespaceId(0)
     , fUnknownNamespaceId(0)
     , fXMLNamespaceId(0)
     , fXMLNSNamespaceId(0)
     , fSchemaNamespaceId(0)
     , fUIntPool(0)
     , fUIntPoolRow(0)
     , fUIntPoolCol(0)
     , fUIntPoolRowTotal(2)
     , fScannerId(0)
     , fSequenceId(0)
     , fAttrList(0)
     , fAttrDupChkRegistry(0)
     , fDocHandler(docHandler)
     , fDocTypeHandler(docTypeHandler)
     , fEntityHandler(entityHandler)
     , fErrorReporter(errHandler)
     , fErrorHandler(0)
     , fPSVIHandler(0)
     , fValidationContext(0)
     , fEntityDeclPoolRetrieved(false)
     , fReaderMgr(manager)
     , fValidator(valToAdopt)
     , fValScheme(Val_Never)
     , fGrammarResolver(grammarResolver)
     , fGrammarPoolMemoryManager(grammarResolver->getGrammarPoolMemoryManager())
     , fGrammar(0)
     , fRootGrammar(0)
     , fURIStringPool(0)
     , fRootElemName(0)
     , fExternalSchemaLocation(0)
     , fExternalNoNamespaceSchemaLocation(0)
     , fSecurityManager(0)
     , fXMLVersion(XMLReader::XMLV1_0)
     , fMemoryManager(manager)
     , fBufMgr(manager)
     , fAttNameBuf(1023, manager)
     , fAttValueBuf(1023, manager)
     , fCDataBuf(1023, manager)
     , fQNameBuf(1023, manager)
     , fPrefixBuf(1023, manager)
     , fURIBuf(1023, manager)
     , fWSNormalizeBuf(1023, manager)
     , fElemStack(manager)
 {
     CleanupType cleanup(this, &XMLScanner::cleanUp);

     try
     {
         commonInit();
     }
     catch(const OutOfMemoryException&)
     {
         // Don't cleanup when out of memory, since executing the
         // code can cause problems.
         cleanup.release();

         throw;
     }

     cleanup.release();
 }

 XMLScanner::~XMLScanner()
 {
     cleanUp();
 }


 void XMLScanner::setValidator(XMLValidator* const valToAdopt)
 {
     if (fValidatorFromUser)
         delete fValidator;
     fValidator = valToAdopt;
     fValidatorFromUser = true;
     initValidator(fValidator);
 }


 // ---------------------------------------------------------------------------
 //  XMLScanner: Main entry point to scan a document
 // ---------------------------------------------------------------------------
 void XMLScanner::scanDocument(  const   XMLCh* const    systemId)
 {
     //  First we try to parse it as a URL. If that fails, we assume its
     //  a file and try it that way.
     InputSource* srcToUse = 0;
     try
     {
         //  Create a temporary URL. Since this is the primary document,
         //  it has to be fully qualified. If not, then assume we are just
         //  mistaking a file for a URL.
         XMLURL tmpURL(fMemoryManager);

         if (XMLURL::parse(systemId, tmpURL)) {

             if (tmpURL.isRelative()) {
                 if (!fStandardUriConformant)
                     srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                 else {
                     // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                     // emit the error directly
                     MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                     fInException = true;
                     emitError
                     (
                         XMLErrs::XMLException_Fatal
                         , e.getCode()
                         , e.getType()
                         , e.getMessage()
                     );
                     return;
                 }
             }
             else
             {
                 if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                     MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                     fInException = true;
                     emitError
                     (
                         XMLErrs::XMLException_Fatal
                         , e.getCode()
                         , e.getType()
                         , e.getMessage()
                     );
                     return;
                 }
                 srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
             }
         }
         else {

             if (!fStandardUriConformant)
                 srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
             else {
                 // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                 // emit the error directly
                 // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                 MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                 fInException = true;
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , e.getCode()
                     , e.getType()
                     , e.getMessage()
                 );
                 return;
             }
         }
     }
     catch(const XMLException& excToCatch)
     {
         //  For any other XMLException,
         //  emit the error and catch any user exception thrown from here.
         fInException = true;
         if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
             emitError
             (
                 XMLErrs::XMLException_Warning
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
             emitError
             (
                 XMLErrs::XMLException_Fatal
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         else
             emitError
             (
                 XMLErrs::XMLException_Error
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         return;
     }

     Janitor<InputSource> janSrc(srcToUse);
     scanDocument(*srcToUse);
 }

 void XMLScanner::scanDocument(  const   char* const systemId)
 {
     // We just delegate this to the XMLCh version after transcoding
     XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
     ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
     scanDocument(tmpBuf);
 }


 //  This method begins a progressive parse. It scans through the prolog and
 //  returns a token to be used on subsequent scanNext() calls. If the return
 //  value is true, then the token is legal and ready for further use. If it
 //  returns false, then the scan of the prolog failed and the token is not
 //  going to work on subsequent scanNext() calls.
 bool XMLScanner::scanFirst( const   XMLCh* const    systemId
                             ,       XMLPScanToken&  toFill)
 {
     //  First we try to parse it as a URL. If that fails, we assume its
     //  a file and try it that way.
     InputSource* srcToUse = 0;
     try
     {
         //  Create a temporary URL. Since this is the primary document,
         //  it has to be fully qualified. If not, then assume we are just
         //  mistaking a file for a URL.
         XMLURL tmpURL(fMemoryManager);
         if (XMLURL::parse(systemId, tmpURL)) {
             if (tmpURL.isRelative()) {
                 if (!fStandardUriConformant)
                     srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                 else {
                     // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                     // emit the error directly
                     MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                     fInException = true;
                     emitError
                     (
                         XMLErrs::XMLException_Fatal
                         , e.getCode()
                         , e.getType()
                         , e.getMessage()
                     );
                     return false;
                 }
             }
             else
             {
                 if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                     MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                     fInException = true;
                     emitError
                     (
                         XMLErrs::XMLException_Fatal
                         , e.getCode()
                         , e.getType()
                         , e.getMessage()
                     );
                     return false;
                 }
                 srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
             }
         }
         else {
             if (!fStandardUriConformant)
                 srcToUse = new (fMemoryManager) LocalFileInputSource(systemId,  fMemoryManager);
             else {
                 // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                 // emit the error directly
                 // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                 MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL);
                 fInException = true;
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , e.getCode()
                     , e.getType()
                     , e.getMessage()
                 );
                 return false;
             }
         }
     }
     catch(const XMLException& excToCatch)
     {
         //  For any other XMLException,
         //  emit the error and catch any user exception thrown from here.
         fInException = true;
         if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
             emitError
             (
                 XMLErrs::XMLException_Warning
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
             emitError
             (
                 XMLErrs::XMLException_Fatal
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         else
             emitError
             (
                 XMLErrs::XMLException_Error
                 , excToCatch.getCode()
                 , excToCatch.getType()
                 , excToCatch.getMessage()
             );
         return false;
     }

     Janitor<InputSource> janSrc(srcToUse);
     return scanFirst(*srcToUse, toFill);
 }

 bool XMLScanner::scanFirst( const   char* const     systemId
                             ,       XMLPScanToken&  toFill)
 {
     // We just delegate this to the XMLCh version after transcoding
     XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
     ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
     return scanFirst(tmpBuf, toFill);
 }

 bool XMLScanner::scanFirst( const   InputSource&    src
                            ,       XMLPScanToken&  toFill)
 {
     //  Bump up the sequence id for this new scan cycle. This will invalidate
     //  any previous tokens we've returned.
     fSequenceId++;

     ReaderMgrResetType  resetReaderMgr(&fReaderMgr, &ReaderMgr::reset);

    // Reset the scanner and its plugged in stuff for a new run.  This
     // resets all the data structures, creates the initial reader and
     // pushes it on the stack, and sets up the base document path
     scanReset(src);

     // If we have a document handler, then call the start document
     if (fDocHandler)
         fDocHandler->startDocument();

     try
     {
         //  Scan the prolog part, which is everything before the root element
         //  including the DTD subsets. This is all that is done on the scan
         //  first.
         scanProlog();

         //  If we got to the end of input, then its not a valid XML file.
         //  Else, go on to scan the content.
         if (fReaderMgr.atEOF())
         {
             emitError(XMLErrs::EmptyMainEntity);
         }
     }
     //  NOTE:
     //
     //  In all of the error processing below, the emitError() call MUST come
     //  before the flush of the reader mgr, or it will fail because it tries
     //  to find out the position in the XML source of the error.
     catch(const XMLErrs::Codes)
     {
         // This is a 'first failure' exception so return failure
         return false;
     }
     catch(const XMLValid::Codes)
     {
         // This is a 'first fatal error' type exit, return failure
         return false;
     }
     catch(const XMLException& excToCatch)
     {
         //  Emit the error and catch any user exception thrown from here. Make
         //  sure in all cases we flush the reader manager.
         fInException = true;
         try
         {
             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                 emitError
                 (
                     XMLErrs::XMLException_Warning
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else
                 emitError
                 (
                     XMLErrs::XMLException_Error
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
         }
         catch(const OutOfMemoryException&)
         {
             // This is a special case for out-of-memory
             // conditions, because resetting the ReaderMgr
             // can be problematic.
             resetReaderMgr.release();

             throw;
         }

         return false;
     }
     catch(const OutOfMemoryException&)
     {
         // This is a special case for out-of-memory
         // conditions, because resetting the ReaderMgr
         // can be problematic.
         resetReaderMgr.release();

         throw;
     }

     // Fill in the caller's token to make it legal and return success
     toFill.set(fScannerId, fSequenceId);

     // Release the object that will reset the ReaderMgr, since there's
     // more to scan.
     resetReaderMgr.release();

     return true;
 }


 void XMLScanner::scanReset(XMLPScanToken& token)
 {
     // Make sure this token is still legal
     if (!isLegalToken(token))
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Scan_BadPScanToken, fMemoryManager);

     // Reset the reader manager
     fReaderMgr.reset();

     // And invalidate any tokens by bumping our sequence number
     fSequenceId++;

     // Reset our error count
     fErrorCount = 0;
 }

 void XMLScanner::setParseSettings(XMLScanner* const refScanner)
 {
     setDocHandler(refScanner->getDocHandler());
     setDocTypeHandler(refScanner->getDocTypeHandler());
     setErrorHandler(refScanner->getErrorHandler());
     setErrorReporter(refScanner->getErrorReporter());
     setEntityHandler(refScanner->getEntityHandler());
     setDoNamespaces(refScanner->getDoNamespaces());
     setDoSchema(refScanner->getDoSchema());
     setCalculateSrcOfs(refScanner->getCalculateSrcOfs());
     setStandardUriConformant(refScanner->getStandardUriConformant());
     setExitOnFirstFatal(refScanner->getExitOnFirstFatal());
     setValidationConstraintFatal(refScanner->getValidationConstraintFatal());
     setIdentityConstraintChecking(refScanner->getIdentityConstraintChecking());
     setValidationSchemaFullChecking(refScanner->getValidationSchemaFullChecking());
     cacheGrammarFromParse(refScanner->isCachingGrammarFromParse());
     useCachedGrammarInParse(refScanner->isUsingCachedGrammarInParse());
     setLoadExternalDTD(refScanner->getLoadExternalDTD());
     setNormalizeData(refScanner->getNormalizeData());
     setExternalSchemaLocation(refScanner->getExternalSchemaLocation());
     setExternalNoNamespaceSchemaLocation(refScanner->getExternalNoNamespaceSchemaLocation());
     setValidationScheme(refScanner->getValidationScheme());
     setSecurityManager(refScanner->getSecurityManager());
     setPSVIHandler(refScanner->getPSVIHandler());
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Private helper methods.
 // ---------------------------------------------------------------------------

 //  This method handles the common initialization, to avoid having to do
 //  it redundantly in multiple constructors.
 void XMLScanner::commonInit()
 {
     //  We have to do a little init that involves statics, so we have to
     //  use the mutex to protect it.
     {
         XMLMutexLock lockInit(&gScannerMutex());

         // And assign ourselves the next available scanner id
         fScannerId = ++gScannerId;
     }

     //  Create the attribute list, which is used to store attribute values
     //  during start tag processing. Give it a reasonable initial size that
     //  will serve for most folks, though it will grow as required.
     fAttrList = new (fMemoryManager) RefVectorOf<XMLAttr>(32, true, fMemoryManager);

     //  Create the id ref list. This is used to enforce XML 1.0 ID ref
     //  semantics, i.e. all id refs must refer to elements that exist
     fValidationContext = new (fMemoryManager) ValidationContextImpl(fMemoryManager);
     fValidationContext->setElemStack(&fElemStack);

     //  Create the GrammarResolver
     //fGrammarResolver = new GrammarResolver();

     // create initial, 64-element, fUIntPool
     fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) *fUIntPoolRowTotal);
     memset(fUIntPool, 0, sizeof(unsigned int *) * fUIntPoolRowTotal);
     fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
     memset(fUIntPool[0], 0, sizeof(unsigned int) << 6);

     // Register self as handler for XMLBufferFull events on the CDATA buffer
     fCDataBuf.setFullHandler(this, fBufferSize);

    if (fValidator) {
        fValidatorFromUser = true;
        initValidator(fValidator);
    }
 }

 void XMLScanner::cleanUp()
 {
     delete fAttrList;
     delete fAttrDupChkRegistry;
     delete fValidationContext;
     fMemoryManager->deallocate(fRootElemName);//delete [] fRootElemName;
     fMemoryManager->deallocate(fExternalSchemaLocation);//delete [] fExternalSchemaLocation;
     fMemoryManager->deallocate(fExternalNoNamespaceSchemaLocation);//delete [] fExternalNoNamespaceSchemaLocation;
     // delete fUIntPool
     if (fUIntPool)
     {
         for (unsigned int i=0; i<=fUIntPoolRow; i++)
         {
             fMemoryManager->deallocate(fUIntPool[i]);
         }
         fMemoryManager->deallocate(fUIntPool);
     }
 }

 void XMLScanner::initValidator(XMLValidator* theValidator) {

     //  Tell the validator about the stuff it needs to know in order to
     //  do its work.
     theValidator->setScannerInfo(this, &fReaderMgr, &fBufMgr);
     theValidator->setErrorReporter(fErrorReporter);
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Error emitting methods
 // ---------------------------------------------------------------------------

 //  These methods are called whenever the scanner wants to emit an error.
 //  It handles getting the message loaded, doing token replacement, etc...
 //  and then calling the error handler, if its installed.
 bool XMLScanner::emitErrorWillThrowException(const XMLErrs::Codes toEmit)
 {
     if (XMLErrs::isFatal(toEmit) && fExitOnFirstFatal && !fInException)
         return true;
     return false;
 }

 void XMLScanner::emitError(const XMLErrs::Codes toEmit)
 {
     // Bump the error count if it is not a warning
     if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
         incrementErrorCount();

     if (fErrorReporter)
     {
         // Load the message into a local for display
         const unsigned int msgSize = 1023;
         XMLCh errText[msgSize + 1];

         if (!gScannerMsgLoader().loadMsg(toEmit, errText, msgSize))
         {
                 // <TBD> Probably should load a default msg here
         }

         //  Create a LastExtEntityInfo structure and get the reader manager
         //  to fill it in for us. This will give us the information about
         //  the last reader on the stack that was an external entity of some
         //  sort (i.e. it will ignore internal entities.
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr.getLastExtEntityInfo(lastInfo);

         fErrorReporter->error
         (
             toEmit
             , XMLUni::fgXMLErrDomain
             , XMLErrs::errorType(toEmit)
             , errText
             , lastInfo.systemId
             , lastInfo.publicId
             , lastInfo.lineNumber
             , lastInfo.colNumber
         );
     }

     // Bail out if its fatal an we are to give up on the first fatal error
     if (emitErrorWillThrowException(toEmit))
         throw toEmit;
 }

 void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                             , const XMLCh* const        text1
                             , const XMLCh* const        text2
                             , const XMLCh* const        text3
                             , const XMLCh* const        text4)
 {
     // Bump the error count if it is not a warning
     if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
         incrementErrorCount();

     if (fErrorReporter)
     {
         //  Load the message into alocal and replace any tokens found in
         //  the text.
         const unsigned int maxChars = 2047;
         XMLCh errText[maxChars + 1];

         if (!gScannerMsgLoader().loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
         {
                 // <TBD> Should probably load a default message here
         }

         //  Create a LastExtEntityInfo structure and get the reader manager
         //  to fill it in for us. This will give us the information about
         //  the last reader on the stack that was an external entity of some
         //  sort (i.e. it will ignore internal entities.
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr.getLastExtEntityInfo(lastInfo);

         fErrorReporter->error
         (
             toEmit
             , XMLUni::fgXMLErrDomain
             , XMLErrs::errorType(toEmit)
             , errText
             , lastInfo.systemId
             , lastInfo.publicId
             , lastInfo.lineNumber
             , lastInfo.colNumber
         );
     }

     // Bail out if its fatal an we are to give up on the first fatal error
     if (emitErrorWillThrowException(toEmit))
         throw toEmit;
 }

 void XMLScanner::emitError( const   XMLErrs::Codes    toEmit
                             , const char* const         text1
                             , const char* const         text2
                             , const char* const         text3
                             , const char* const         text4)
 {
     // Bump the error count if it is not a warning
     if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
         incrementErrorCount();

     if (fErrorReporter)
     {
         //  Load the message into alocal and replace any tokens found in
         //  the text.
         const unsigned int maxChars = 2047;
         XMLCh errText[maxChars + 1];

         if (!gScannerMsgLoader().loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
         {
                 // <TBD> Should probably load a default message here
         }

         //  Create a LastExtEntityInfo structure and get the reader manager
         //  to fill it in for us. This will give us the information about
         //  the last reader on the stack that was an external entity of some
         //  sort (i.e. it will ignore internal entities.
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr.getLastExtEntityInfo(lastInfo);

         fErrorReporter->error
         (
             toEmit
             , XMLUni::fgXMLErrDomain
             , XMLErrs::errorType(toEmit)
             , errText
             , lastInfo.systemId
             , lastInfo.publicId
             , lastInfo.lineNumber
             , lastInfo.colNumber
         );
     }

     // Bail out if its fatal an we are to give up on the first fatal error
     if (emitErrorWillThrowException(toEmit))
         throw toEmit;
 }

 void XMLScanner::emitError( const   XMLErrs::Codes      toEmit
                             , const XMLExcepts::Codes   originalExceptCode
                             , const XMLCh* const        text1
                             , const XMLCh* const        text2
                             , const XMLCh* const        text3
                             , const XMLCh* const        text4)
 {
     // Bump the error count if it is not a warning
     if (XMLErrs::errorType(toEmit) != XMLErrorReporter::ErrType_Warning)
         incrementErrorCount();

     if (fErrorReporter)
     {
         //  Load the message into alocal and replace any tokens found in
         //  the text.
         const unsigned int maxChars = 2047;
         XMLCh errText[maxChars + 1];

         if (!gScannerMsgLoader().loadMsg(toEmit, errText, maxChars, text1, text2, text3, text4, fMemoryManager))
         {
                 // <TBD> Should probably load a default message here
         }

         //  Create a LastExtEntityInfo structure and get the reader manager
         //  to fill it in for us. This will give us the information about
         //  the last reader on the stack that was an external entity of some
         //  sort (i.e. it will ignore internal entities.
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr.getLastExtEntityInfo(lastInfo);

         fErrorReporter->error
         (
             originalExceptCode
             , XMLUni::fgExceptDomain    //fgXMLErrDomain
             , XMLErrs::errorType(toEmit)
             , errText
             , lastInfo.systemId
             , lastInfo.publicId
             , lastInfo.lineNumber
             , lastInfo.colNumber
         );
     }

     // Bail out if its fatal an we are to give up on the first fatal error
     if (emitErrorWillThrowException(toEmit))
         throw toEmit;
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Getter methods
 // ---------------------------------------------------------------------------

 //  This method allows the caller to query the current location of the scanner.
 //  It will return the sys/public ids of the current entity, and the line/col
 //  position within it.
 //
 //  NOTE: This API returns the location with the last external file. So if its
 //  currently scanning an entity, the position returned will be the end of
 //  the entity reference in the file that had the reference.
 //
 /*bool
 XMLScanner::getLastExtLocation(         XMLCh* const    sysIdToFill
                                 , const unsigned int    maxSysIdChars
                                 ,       XMLCh* const    pubIdToFill
                                 , const unsigned int    maxPubIdChars
                                 ,       XMLSSize_t&     lineToFill
                                 ,       XMLSSize_t&     colToFill) const
 {
     // Create a local info object and get it filled in by the reader manager
     ReaderMgr::LastExtEntityInfo lastInfo;
     fReaderMgr.getLastExtEntityInfo(lastInfo);

     // Fill in the line and column number
     lineToFill = lastInfo.lineNumber;
     colToFill = lastInfo.colNumber;

     // And copy over as much of the ids as will fit
     sysIdToFill[0] = 0;
     if (lastInfo.systemId)
     {
         if (XMLString::stringLen(lastInfo.systemId) > maxSysIdChars)
             return false;
         XMLString::copyString(sysIdToFill, lastInfo.systemId);
     }

     pubIdToFill[0] = 0;
     if (lastInfo.publicId)
     {
         if (XMLString::stringLen(lastInfo.publicId) > maxPubIdChars)
             return false;
         XMLString::copyString(pubIdToFill, lastInfo.publicId);
     }
     return true;
 }*/


 // ---------------------------------------------------------------------------
 //  XMLScanner: Private scanning methods
 // ---------------------------------------------------------------------------

 //  This method is called after the end of the root element, to handle
 //  any miscellaneous stuff hanging around.
 void XMLScanner::scanMiscellaneous()
 {
     // Get a buffer for this work
     XMLBufBid bbCData(&fBufMgr);

     while (true)
     {
         try
         {
             const XMLCh nextCh = fReaderMgr.peekNextChar();

             // Watch for end of file and break out
             if (!nextCh)
                 break;

             if (nextCh == chOpenAngle)
             {
                 if (checkXMLDecl(true))
                 {
                     // Can't have an XML decl here
                     emitError(XMLErrs::NotValidAfterContent);
                     fReaderMgr.skipPastChar(chCloseAngle);
                 }
                 else if (fReaderMgr.skippedString(XMLUni::fgPIString))
                 {
                     scanPI();
                 }
                  else if (fReaderMgr.skippedString(XMLUni::fgCommentString))
                 {
                     scanComment();
                 }
                 else
                 {
                     // This can't be possible, so just give up
                     emitError(XMLErrs::ExpectedCommentOrPI);
                     fReaderMgr.skipPastChar(chCloseAngle);
                 }
             }
             else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
             {
                 //  If we have a doc handler, then gather up the spaces and
                 //  call back. Otherwise, just skip over whitespace.
                 if (fDocHandler)
                 {
                     fReaderMgr.getSpaces(bbCData.getBuffer());
                     fDocHandler->ignorableWhitespace
                     (
                         bbCData.getRawBuffer()
                         , bbCData.getLen()
                         , false
                     );
                 }
                 else
                 {
                     fReaderMgr.skipPastSpaces();
                 }
             }
             else
             {
                 emitError(XMLErrs::ExpectedCommentOrPI);
                 fReaderMgr.skipPastChar(chCloseAngle);
             }
         }
         catch(const EndOfEntityException&)
         {
             //  Some entity leaked out of the content part of the document. Issue
             //  a warning and keep going.
             emitError(XMLErrs::EntityPropogated);
         }
     }
 }


 //  Scans a PI and calls the appropriate callbacks. At entry we have just
 //  scanned the <? part, and need to now start on the PI target name.
 void XMLScanner::scanPI()
 {
     const XMLCh* namePtr = 0;
     const XMLCh* targetPtr = 0;

     //  If there are any spaces here, then warn about it. If we aren't in
     //  'first error' mode, then we'll come back and can easily pick up
     //  again by just skipping them.
     if (fReaderMgr.lookingAtSpace())
     {
         emitError(XMLErrs::PINameExpected);
         fReaderMgr.skipPastSpaces();
     }

     // Get a buffer for the PI name and scan it in
     XMLBufBid bbName(&fBufMgr);
     if (!fReaderMgr.getName(bbName.getBuffer()))
     {
         emitError(XMLErrs::PINameExpected);
         fReaderMgr.skipPastChar(chCloseAngle);
         return;
     }

     // Point the name pointer at the raw data
     namePtr = bbName.getRawBuffer();

     // See if it is some form of 'xml' and emit a warning
     //if (!XMLString::compareIString(namePtr, XMLUni::fgXMLString))
     if (bbName.getLen() == 3 &&
         (((namePtr[0] == chLatin_x) || (namePtr[0] == chLatin_X)) &&
          ((namePtr[1] == chLatin_m) || (namePtr[1] == chLatin_M)) &&
          ((namePtr[2] == chLatin_l) || (namePtr[2] == chLatin_L))))
         emitError(XMLErrs::NoPIStartsWithXML);

     // If namespaces are enabled, then no colons allowed
     if (fDoNamespaces)
     {
         if (XMLString::indexOf(namePtr, chColon) != -1)
             emitError(XMLErrs::ColonNotLegalWithNS);
     }

     //  If we don't hit a space next, then the PI has no target. If we do
     //  then get out the target. Get a buffer for it as well
     XMLBufBid bbTarget(&fBufMgr);
     if (fReaderMgr.skippedSpace())
     {
         // Skip any leading spaces
         fReaderMgr.skipPastSpaces();

         bool gotLeadingSurrogate = false;

         // It does have a target, so lets move on to deal with that.
         while (1)
         {
             const XMLCh nextCh = fReaderMgr.getNextChar();

             // Watch for an end of file, which is always bad here
             if (!nextCh)
             {
                 emitError(XMLErrs::UnterminatedPI);
                 ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
             }

             // Watch for potential terminating character
             if (nextCh == chQuestion)
             {
                 // It must be followed by '>' to be a termination of the target
                 if (fReaderMgr.skippedChar(chCloseAngle))
                     break;
             }

             // Check for correct surrogate pairs
             if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
             {
                 if (gotLeadingSurrogate)
                     emitError(XMLErrs::Expected2ndSurrogateChar);
                 else
                     gotLeadingSurrogate = true;
             }
              else
             {
                 if (gotLeadingSurrogate)
                 {
                     if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                         emitError(XMLErrs::Expected2ndSurrogateChar);
                 }
                 // Its got to at least be a valid XML character
                 else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) {

                     XMLCh tmpBuf[9];
                     XMLString::binToText
                     (
                         nextCh
                         , tmpBuf
                         , 8
                         , 16
                         , fMemoryManager
                     );
                     emitError(XMLErrs::InvalidCharacter, tmpBuf);
                 }

                 gotLeadingSurrogate = false;
             }

             bbTarget.append(nextCh);
         }
     }
     else
     {
         // No target, but make sure its terminated ok
         if (!fReaderMgr.skippedChar(chQuestion))
         {
             emitError(XMLErrs::UnterminatedPI);
             fReaderMgr.skipPastChar(chCloseAngle);
             return;
         }

         if (!fReaderMgr.skippedChar(chCloseAngle))
         {
             emitError(XMLErrs::UnterminatedPI);
             fReaderMgr.skipPastChar(chCloseAngle);
             return;
         }
     }

     // Point the target pointer at the raw data
     targetPtr = bbTarget.getRawBuffer();

     // If we have a handler, then call it
     if (fDocHandler)
     {
         fDocHandler->docPI
         (
             namePtr
             , targetPtr
        );
     }

     //mark PI is seen within the current element
     if (! fElemStack.isEmpty())
         fElemStack.setCommentOrPISeen();

 }

 //  Scans all the input from the start of the file to the root element.
 //  There does not have to be anything in the prolog necessarily, but usually
 //  there is at least an XMLDecl.
 //
 //  On exit from here we are either at the end of the file or about to read
 //  the opening < of the root element.
 void XMLScanner::scanProlog()
 {
     bool sawDocTypeDecl = false;
     // Get a buffer for whitespace processing
     XMLBufBid bbCData(&fBufMgr);

     //  Loop through the prolog. If there is no content, this could go all
     //  the way to the end of the file.
     try
     {
         while (true)
         {
             const XMLCh nextCh = fReaderMgr.peekNextChar();

             if (nextCh == chOpenAngle)
             {
                 //  Ok, it could be the xml decl, a comment, the doc type line,
                 //  or the start of the root element.
                 if (checkXMLDecl(true))
                 {
                     // There shall be at lease --ONE-- space in between
                     // the tag '<?xml' and the VersionInfo.
                     //
                     //  If we are not at line 1, col 6, then the decl was not
                     //  the first text, so its invalid.
                     const XMLReader* curReader = fReaderMgr.getCurrentReader();
                     if ((curReader->getLineNumber() != 1)
                     ||  (curReader->getColumnNumber() != 7))
                     {
                         emitError(XMLErrs::XMLDeclMustBeFirst);
                     }

                     scanXMLDecl(Decl_XML);
                 }
                 else if (fReaderMgr.skippedString(XMLUni::fgPIString))
                 {
                     scanPI();
                 }
                  else if (fReaderMgr.skippedString(XMLUni::fgCommentString))
                 {
                     scanComment();
                 }
                  else if (fReaderMgr.skippedString(XMLUni::fgDocTypeString))
                 {
                     if (sawDocTypeDecl) {
                         emitError(XMLErrs::DuplicateDocTypeDecl);
                     }
                     scanDocTypeDecl();
                     sawDocTypeDecl = true;

                     // if reusing grammar, this has been validated already in first scan
                     // skip for performance
                     if (fValidate && !fGrammar->getValidated()) {
                         //  validate the DTD scan so far
                         fValidator->preContentValidation(fUseCachedGrammar, true);
                     }
                 }
                 else
                 {
                     // Assume its the start of the root element
                     return;
                 }
             }
             else if (fReaderMgr.getCurrentReader()->isWhitespace(nextCh))
             {
                 //  If we have a document handler then gather up the
                 //  whitespace and call back. Otherwise just skip over spaces.
                 if (fDocHandler)
                 {
                     fReaderMgr.getSpaces(bbCData.getBuffer());
                     fDocHandler->ignorableWhitespace
                     (
                         bbCData.getRawBuffer()
                         , bbCData.getLen()
                         , false
                     );
                 }
                  else
                 {
                     fReaderMgr.skipPastSpaces();
                 }
             }
              else
             {
                 emitError(XMLErrs::InvalidDocumentStructure);

                 // Watch for end of file and break out
                 if (!nextCh)
                     break;
                 else
                     fReaderMgr.skipPastChar(chCloseAngle);
             }

         }
     }
     catch(const EndOfEntityException&)
     {
         //  We should never get an end of entity here. They should only
         //  occur within the doc type scanning method, and not leak out to
         //  here.
         emitError
         (
             XMLErrs::UnexpectedEOE
             , "in prolog"
         );
     }
 }


 //  Scans the <?xml .... ?> line. This stuff is all sequential so we don't
 //  do any state machine loop here. We just bull straight through it. It ends
 //  past the closing bracket. If there is a document handler, then its called
 //  on the XMLDecl callback.
 //
 //  On entry, the <?xml has been scanned, and we pick it up from there.
 //
 //  NOTE: In order to provide good recovery from bad XML here, we try to be
 //  very flexible. No matter what order the stuff is in, we'll keep going
 //  though we'll issue errors.
 //
 //  The parameter tells us which type of decl we should expect, Text or XML.
 //    [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
 //    [77] TextDecl::= '<?xml' VersionInfo? EncodingDecl S? '?>'
 void XMLScanner::scanXMLDecl(const DeclTypes type)
 {
     // Get us some buffers to use
     XMLBufBid bbVersion(&fBufMgr);
     XMLBufBid bbEncoding(&fBufMgr);
     XMLBufBid bbStand(&fBufMgr);
     XMLBufBid bbDummy(&fBufMgr);
     XMLBufBid bbName(&fBufMgr);

     //  We use this little enum and array to keep up with what we found
     //  and what order we found them in. This lets us get them free form
     //  without too much overhead, but still know that they were in the
     //  wrong order.
     enum Strings
     {
         VersionString
         , EncodingString
         , StandaloneString
         , UnknownString

         , StringCount
     };
     int flags[StringCount] = { -1, -1, -1, -1 };

     //  Also set up a list of buffers in the right order so that we know
     //  where to put stuff.
     XMLBuffer* buffers[StringCount] ;
     buffers[0] = &bbVersion.getBuffer();
     buffers[1] = &bbEncoding.getBuffer();
     buffers[2] = &bbStand.getBuffer();
     buffers[3] = &bbDummy.getBuffer();

     int curCount = 0;
     Strings curString;
     XMLBuffer& nameBuf = bbName.getBuffer();
     while (true)
     {
         // Skip any spaces
         const unsigned int spaceCount = fReaderMgr.skipPastSpaces(true);

         // If we are looking at a question mark, then break out
         if (fReaderMgr.lookingAtChar(chQuestion))
             break;

         // If this is not the first string, then we require the spaces
         if (!spaceCount && curCount)
             emitError(XMLErrs::ExpectedWhitespace);

         //  Get characters up to the next whitespace or equal's sign.
         if (!scanUpToWSOr(nameBuf, chEqual))
             emitError(XMLErrs::ExpectedDeclString);

         // See if it matches any of our expected strings
         if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgVersionString))
             curString = VersionString;
         else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgEncodingString))
             curString = EncodingString;
         else if (XMLString::equals(nameBuf.getRawBuffer(), XMLUni::fgStandaloneString))
             curString = StandaloneString;
         else
             curString = UnknownString;

         //  If its an unknown string, then give that error. Else check to
         //  see if this one has been done already and give that error.
         if (curString == UnknownString)
             emitError(XMLErrs::ExpectedDeclString, nameBuf.getRawBuffer());
         else if (flags[curString] != -1)
             emitError(XMLErrs::DeclStringRep, nameBuf.getRawBuffer());
         else if (flags[curString] == -1)
             flags[curString] = ++curCount;

         //  Scan for an equal's sign. If we don't find it, issue an error
         //  but keep trying to go on.
         if (!scanEq(true))
             emitError(XMLErrs::ExpectedEqSign);

         //  Get a quote string into the buffer for the string that we are
         //  currently working on.
         if (!getQuotedString(*buffers[curString]))
         {
             emitError(XMLErrs::ExpectedQuotedString);
             fReaderMgr.skipPastChar(chCloseAngle);
             return;
         }

         // And validate the value according which one it was
         const XMLCh* rawValue = buffers[curString]->getRawBuffer();
         if (curString == VersionString)
         {
             if (XMLString::equals(rawValue, XMLUni::fgVersion1_1)) {
                 if (type == Decl_XML) {
                 	fXMLVersion = XMLReader::XMLV1_1;
                     fReaderMgr.setXMLVersion(XMLReader::XMLV1_1);
                 }
                 else {
             	    if (fXMLVersion != XMLReader::XMLV1_1)
             	        emitError(XMLErrs::UnsupportedXMLVersion, rawValue);
             	}
             }
             else if (XMLString::equals(rawValue, XMLUni::fgVersion1_0)) {
                 if (type == Decl_XML) {
                 	fXMLVersion = XMLReader::XMLV1_0;
                     fReaderMgr.setXMLVersion(XMLReader::XMLV1_0);
                 }
             }
             else
                 emitError(XMLErrs::UnsupportedXMLVersion, rawValue);
         }
          else if (curString == EncodingString)
         {
             if (!XMLString::isValidEncName(rawValue))
                 emitError(XMLErrs::BadXMLEncoding, rawValue);
         }
          else if (curString == StandaloneString)
         {
             if (XMLString::equals(rawValue, XMLUni::fgYesString))
                 fStandalone = true;
             else if (XMLString::equals(rawValue, XMLUni::fgNoString))
                 fStandalone = false;
             else
             {
                 emitError(XMLErrs::BadStandalone);
                 //if (!XMLString::compareIString(rawValue, XMLUni::fgYesString))
                 //else if (!XMLString::compareIString(rawValue, XMLUni::fgNoString))
                 if (buffers[curString]->getLen() == 3 &&
                     (((rawValue[0] == chLatin_y) || (rawValue[0] == chLatin_Y)) &&
                      ((rawValue[1] == chLatin_e) || (rawValue[1] == chLatin_E)) &&
                      ((rawValue[2] == chLatin_s) || (rawValue[2] == chLatin_S))))
                     fStandalone = true;
                 else if (buffers[curString]->getLen() == 2 &&
                     (((rawValue[0] == chLatin_n) || (rawValue[0] == chLatin_N)) &&
                      ((rawValue[1] == chLatin_o) || (rawValue[1] == chLatin_O))))
                     fStandalone = false;
             }
         }
     }

     //  Make sure that the strings present are in order. We don't care about
     //  which ones are present at this point, just that any there are in the
     //  right order.
     int curTop = 0;
     for (int index = VersionString; index < StandaloneString; index++)
     {
         if (flags[index] != -1)
         {
             if (flags[index] !=  curTop + 1)
             {
                 emitError(XMLErrs::DeclStringsInWrongOrder);
                 break;
             }
             curTop = flags[index];
         }
     }

     //  If its an XML decl, the version must be present.
     //  If its a Text decl, then encoding must be present AND standalone must not be present.
     if ((type == Decl_XML) && (flags[VersionString] == -1))
         emitError(XMLErrs::XMLVersionRequired);
     else if (type == Decl_Text) {
         if (flags[StandaloneString] != -1)
             emitError(XMLErrs::StandaloneNotLegal);
         if (flags[EncodingString] == -1)
             emitError(XMLErrs::EncodingRequired);
     }

     if (!fReaderMgr.skippedChar(chQuestion))
     {
         emitError(XMLErrs::UnterminatedXMLDecl);
         fReaderMgr.skipPastChar(chCloseAngle);
     }
      else if (!fReaderMgr.skippedChar(chCloseAngle))
     {
         emitError(XMLErrs::UnterminatedXMLDecl);
         fReaderMgr.skipPastChar(chCloseAngle);
     }

     //  Do this before we possibly update the reader with the
     //  actual encoding string. Otherwise, we will pass the wrong thing
     //  for the last parameter!
     const XMLCh* actualEnc = fReaderMgr.getCurrentEncodingStr();

     //  Ok, we've now seen the real encoding string, if there was one, so
     //  lets call back on the current reader and tell it what the real
     //  encoding string was. If it fails, that's because it represents some
     //  sort of contradiction with the autosensed format, and it keeps the
     //  original encoding.
     //
     //  NOTE: This can fail for a number of reasons, such as a bogus encoding
     //  name or because its in flagrant contradiction of the auto-sensed
     //  format.
     if (flags[EncodingString] != -1)
     {
         if (!fReaderMgr.getCurrentReader()->setEncoding(bbEncoding.getRawBuffer()))
             emitError(XMLErrs::ContradictoryEncoding, bbEncoding.getRawBuffer());
         else
             actualEnc = bbEncoding.getRawBuffer();
     }

     //  If we have a document handler then call the XML Decl callback.
     if (type == Decl_XML)
     {
         if (fDocHandler)
             fDocHandler->XMLDecl
             (
                 bbVersion.getRawBuffer()
                 , bbEncoding.getRawBuffer()
                 , bbStand.getRawBuffer()
                 , actualEnc
             );
     }
     else if (type == Decl_Text)
     {
         if (fDocTypeHandler)
             fDocTypeHandler->TextDecl
             (
                 bbVersion.getRawBuffer()
                 , bbEncoding.getRawBuffer()
             );
     }
 }

 const XMLCh* XMLScanner::getURIText(const   unsigned int    uriId) const
 {
     if (fURIStringPool->exists(uriId)) {
         // Look up the URI in the string pool and return its id
         const XMLCh* value = fURIStringPool->getValueForId(uriId);
         if (!value)
             return XMLUni::fgZeroLenString;

         return value;
     }
     else
         return XMLUni::fgZeroLenString;
 }

 bool XMLScanner::getURIText(  const   unsigned int    uriId
                       ,       XMLBuffer&      uriBufToFill) const
 {
     if (fURIStringPool->exists(uriId)) {
         // Look up the URI in the string pool and return its id
         const XMLCh* value = fURIStringPool->getValueForId(uriId);
         if (!value)
             return false;

         uriBufToFill.set(value);
         return true;
     }
     else
         return false;
 }

 bool XMLScanner::checkXMLDecl(bool startWithAngle) {

     // [23] XMLDecl     ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
     // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
     //
     // [3]  S           ::= (#x20 | #x9 | #xD | #xA)+
     if (startWithAngle) {
         if (fReaderMgr.peekString(XMLUni::fgXMLDeclString)) {
             if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpace)
                || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTab)
                || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLF)
                || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCR))
             {
                 return true;
             }
         }
         else if (fReaderMgr.skippedString(XMLUni::fgXMLDeclStringSpaceU)
            || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringHTabU)
            || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringLFU)
            || fReaderMgr.skippedString(XMLUni::fgXMLDeclStringCRU))
         {
             //  Just in case, check for upper case. If found, issue
             //  an error, but keep going.
             emitError(XMLErrs::XMLDeclMustBeLowerCase);
             return true;
         }
     }
     else {
         if (fReaderMgr.peekString(XMLUni::fgXMLString)) {
             if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpace)
                || fReaderMgr.skippedString(XMLUni::fgXMLStringHTab)
                || fReaderMgr.skippedString(XMLUni::fgXMLStringLF)
                || fReaderMgr.skippedString(XMLUni::fgXMLStringCR))
             {
                 return true;
             }
         }
         else if (fReaderMgr.skippedString(XMLUni::fgXMLStringSpaceU)
            || fReaderMgr.skippedString(XMLUni::fgXMLStringHTabU)
            || fReaderMgr.skippedString(XMLUni::fgXMLStringLFU)
            || fReaderMgr.skippedString(XMLUni::fgXMLStringCRU))
         {
             //  Just in case, check for upper case. If found, issue
             //  an error, but keep going.
             emitError(XMLErrs::XMLDeclMustBeLowerCase);
             return true;
         }
     }

     return false;
 }


 // ---------------------------------------------------------------------------
 //  XMLScanner: Grammar preparsing
 // ---------------------------------------------------------------------------
 Grammar* XMLScanner::loadGrammar(const   XMLCh* const systemId
                                  , const short        grammarType
                                  , const bool         toCache)
 {
     InputSource* srcToUse = 0;

     if (fEntityHandler){
         ReaderMgr::LastExtEntityInfo lastInfo;
         fReaderMgr.getLastExtEntityInfo(lastInfo);
         XMLResourceIdentifier resourceIdentifier(XMLResourceIdentifier::ExternalEntity,
                             systemId, 0, XMLUni::fgZeroLenString, lastInfo.systemId,
                             &fReaderMgr);
         srcToUse = fEntityHandler->resolveEntity(&resourceIdentifier);
     }

     //  First we try to parse it as a URL. If that fails, we assume its
     //  a file and try it that way.
     if (!srcToUse) {
         if (fDisableDefaultEntityResolution)
             return 0;

         try
         {
             //  Create a temporary URL. Since this is the primary document,
             //  it has to be fully qualified. If not, then assume we are just
             //  mistaking a file for a URL.
             XMLURL tmpURL(fMemoryManager);

             if (XMLURL::parse(systemId, tmpURL)) {

                 if (tmpURL.isRelative())
                 {
                     if (!fStandardUriConformant)
                         srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                     else {
                         // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                         // emit the error directly
                         MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_NoProtocolPresent, fMemoryManager);
                         fInException = true;
                         emitError
                         (
                             XMLErrs::XMLException_Fatal
                             , e.getCode()
                             , e.getType()
                             , e.getMessage()
                         );
                         return 0;
                     }
                 }
                 else
                 {
                     if (fStandardUriConformant && tmpURL.hasInvalidChar()) {
                         MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL, fMemoryManager);
                         fInException = true;
                         emitError
                         (
                             XMLErrs::XMLException_Fatal
                             , e.getCode()
                             , e.getType()
                             , e.getMessage()
                         );
                         return 0;
                     }
                     srcToUse = new (fMemoryManager) URLInputSource(tmpURL, fMemoryManager);
                 }
             }
             else
             {
                 if (!fStandardUriConformant)
                     srcToUse = new (fMemoryManager) LocalFileInputSource(systemId, fMemoryManager);
                 else {
                     // since this is the top of the try/catch, cannot call ThrowXMLwithMemMgr
                     // emit the error directly
                     // lazy bypass ... since all MalformedURLException are fatal, no need to check the type
                     MalformedURLException e(__FILE__, __LINE__, XMLExcepts::URL_MalformedURL);
                     fInException = true;
                     emitError
                     (
                         XMLErrs::XMLException_Fatal
                         , e.getCode()
                         , e.getType()
                         , e.getMessage()
                     );
                     return 0;
                 }
             }
         }
         catch(const XMLException& excToCatch)
         {
             //  For any other XMLException,
             //  emit the error and catch any user exception thrown from here.
             fInException = true;
             if (excToCatch.getErrorType() == XMLErrorReporter::ErrType_Warning)
                 emitError
                 (
                     XMLErrs::XMLException_Warning
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else if (excToCatch.getErrorType() >= XMLErrorReporter::ErrType_Fatal)
                 emitError
                 (
                     XMLErrs::XMLException_Fatal
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
             else
                 emitError
                 (
                     XMLErrs::XMLException_Error
                     , excToCatch.getCode()
                     , excToCatch.getType()
                     , excToCatch.getMessage()
                 );
                 return 0;
         }
     }

     Janitor<InputSource> janSrc(srcToUse);
     return loadGrammar(*srcToUse, grammarType, toCache);
 }

 Grammar* XMLScanner::loadGrammar(const   char* const systemId
                                  , const short       grammarType
                                  , const bool        toCache)
 {
     // We just delegate this to the XMLCh version after transcoding
     XMLCh* tmpBuf = XMLString::transcode(systemId, fMemoryManager);
     ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
     return loadGrammar(tmpBuf, grammarType, toCache);
 }


 // ---------------------------------------------------------------------------
 //  XMLScanner: Setter methods
 // ---------------------------------------------------------------------------
 void XMLScanner::setURIStringPool(XMLStringPool* const stringPool)
 {
     fURIStringPool = stringPool;
     fEmptyNamespaceId   = fURIStringPool->addOrFind(XMLUni::fgZeroLenString);
     fUnknownNamespaceId = fURIStringPool->addOrFind(XMLUni::fgUnknownURIName);
     fXMLNamespaceId     = fURIStringPool->addOrFind(XMLUni::fgXMLURIName);
     fXMLNSNamespaceId   = fURIStringPool->addOrFind(XMLUni::fgXMLNSURIName);
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Private helper methods
 // ---------------------------------------------------------------------------

 /***
  * In reusing grammars (cacheing grammar from parse, or use cached grammar), internal
  * dtd is allowed conditionally.
  *
  * In the case of cacheing grammar from parse, it is NOT allowed.
  *
  * In the case of use cached grammar,
  *   if external dtd is present and it is parsed before, then it is not allowed,
  *   otherwise it is allowed.
  *
  ***/
 void XMLScanner::checkInternalDTD(bool hasExtSubset
                                  ,const XMLCh* const sysId
                                  ,const XMLCh* const pubId)
 {
     if (fToCacheGrammar)
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);

     if (fUseCachedGrammar && hasExtSubset && !fIgnoreCachedDTD)
     {
         InputSource* sysIdSrc = resolveSystemId(sysId, pubId);
         if (sysIdSrc) {
             Janitor<InputSource> janSysIdSrc(sysIdSrc);
             Grammar* grammar = fGrammarResolver->getGrammar(sysIdSrc->getSystemId());

             if (grammar && grammar->getGrammarType() == Grammar::DTDGrammarType)
             {
                 ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Val_CantHaveIntSS, fMemoryManager);
             }
         }
     }

 }

 //  This method is called after the content scan to insure that all the
 //  ID/IDREF attributes match up (i.e. that all IDREFs refer to IDs.) This is
 //  an XML 1.0 rule, so we can do here in the core.

 void XMLScanner::checkIDRefs()
 {
     //  Iterate the id ref list. If we find any entries here which are used
     //  but not declared, then that's an error.
     RefHashTableOfEnumerator<XMLRefInfo> refEnum(fValidationContext->getIdRefList(), false, fMemoryManager);
     while (refEnum.hasMoreElements())
     {
         // Get a ref to the current element
         const XMLRefInfo& curRef = refEnum.nextElement();

         // If its used but not declared, then its an error
         if (!curRef.getDeclared() && curRef.getUsed() && fValidate)
             fValidator->emitError(XMLValid::IDNotDeclared, curRef.getRefName());
     }
 }


 //  This just does a simple check that the passed progressive scan token is
 //  legal for this scanner.
 bool XMLScanner::isLegalToken(const XMLPScanToken& toCheck)
 {
     return ((fScannerId == toCheck.fScannerId)
     &&      (fSequenceId == toCheck.fSequenceId));
 }


 //  This method will handle figuring out what the next top level token is
 //  in the input stream. It will return an enumerated value that indicates
 //  what it believes the next XML level token must be. It will eat as many
 //  chars are required to figure out what is next.
 XMLScanner::XMLTokens XMLScanner::senseNextToken(unsigned int& orgReader)
 {
     //  Get the next character and use it to guesstimate what the next token
     //  is going to be. We turn on end of entity exceptions when we do this
     //  in order to catch the scenario where the current entity ended at
     //  the > of some markup.
     XMLCh nextCh;

     // avoid setting up the ThrowEOEJanitor if we know that we have data in the current reader
     if(fReaderMgr.getCurrentReader() && fReaderMgr.getCurrentReader()->charsLeftInBuffer()>0)
         nextCh = fReaderMgr.peekNextChar();
     else
     {
         ThrowEOEJanitor janMgr(&fReaderMgr, true);
         nextCh = fReaderMgr.peekNextChar();
     }

     //  Check for special chars. Start with the most
     //  obvious end of file, which should be legal here at top level.
     if (!nextCh)
         return Token_EOF;


     //  If it's not a '<' we must be in content.
     //
     //  This includes entity references '&' of some sort. These must
     //  be character data because that's the only place a reference can
     //  occur in content.
     if (nextCh != chOpenAngle)
         return Token_CharData;

     //  Ok it had to have been a '<' character. So get it out of the reader
     //  and store the reader number where we saw it, passing it back to the
     //  caller.
     fReaderMgr.getNextChar();
     orgReader = fReaderMgr.getCurrentReaderNum();

     //  Ok, so lets go through the things that it could be at this point which
     //  are all some form of markup.
     nextCh = fReaderMgr.peekNextChar();

     if (nextCh == chForwardSlash)
     {
         fReaderMgr.getNextChar();
         return Token_EndTag;
     }
     else if (nextCh == chBang)
     {
         static const XMLCh gCDATAStr[] =
         {
                 chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A
             ,   chLatin_T, chLatin_A, chNull
         };

         static const XMLCh gCommentString[] =
         {
             chBang, chDash, chDash, chNull
         };

         if (fReaderMgr.skippedString(gCDATAStr))
             return Token_CData;

         if (fReaderMgr.skippedString(gCommentString))
             return Token_Comment;

         emitError(XMLErrs::ExpectedCommentOrCDATA);
         return Token_Unknown;
     }
     else if (nextCh == chQuestion)
     {
         // It must be a PI
         fReaderMgr.getNextChar();
         return Token_PI;
     }

     //  Assume its an element name, so return with a start tag token. If it
     //  turns out not to be, then it will fail when it cannot get a valid tag.
     return Token_StartTag;
 }

 // ---------------------------------------------------------------------------
 //  XMLScanner: Private parsing methods
 // ---------------------------------------------------------------------------

 //  This guy just scans out a single or double quoted string of characters.
 //  It does not pass any judgement on the contents and assumes that it is
 //  illegal to have another quote of the same kind inside the string's
 //  contents.
 //
 //  NOTE: This is for simple stuff like the strings in the XMLDecl which
 //  cannot have any entities inside them. So this guy does not handle any
 //  end of entity stuff.
 bool XMLScanner::getQuotedString(XMLBuffer& toFill)
 {
     // Reset the target buffer
     toFill.reset();

     // Get the next char which must be a single or double quote
     XMLCh quoteCh;
     if (!fReaderMgr.skipIfQuote(quoteCh))
         return false;

     while (true)
     {
         // Get another char
         const XMLCh nextCh = fReaderMgr.getNextChar();

         // See if it matches the starting quote char
         if (nextCh == quoteCh)
             break;

         //  We should never get either an end of file null char here. If we
         //  do, just fail. It will be handled more gracefully in the higher
         //  level code that called us.
         if (!nextCh)
             return false;

         // Else add it to the buffer
         toFill.append(nextCh);
     }
     return true;
 }


 //  This method scans a character reference and returns the character that
 //  was refered to. It assumes that we've already scanned the &# characters
 //  that prefix the numeric code.
 bool XMLScanner::scanCharRef(XMLCh& toFill, XMLCh& second)
 {
     bool gotOne = false;
     unsigned int value = 0;

     //  Set the radix. Its supposed to be a lower case x if hex. But, in
     //  order to recover well, we check for an upper and put out an error
     //  for that.
     unsigned int radix = 10;
     if (fReaderMgr.skippedChar(chLatin_x))
     {
         radix = 16;
     }
     else if (fReaderMgr.skippedChar(chLatin_X))
     {
         emitError(XMLErrs::HexRadixMustBeLowerCase);
         radix = 16;
     }

     while (true)
     {
         const XMLCh nextCh = fReaderMgr.peekNextChar();

         // Watch for EOF
         if (!nextCh)
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);

         // Break out on the terminating semicolon
         if (nextCh == chSemiColon)
         {
             fReaderMgr.getNextChar();
             break;
         }

         //  Convert this char to a binary value, or bail out if its not
         //  one.
         unsigned int nextVal;
         if ((nextCh >= chDigit_0) && (nextCh <= chDigit_9))
             nextVal = (unsigned int)(nextCh - chDigit_0);
         else if ((nextCh >= chLatin_A) && (nextCh <= chLatin_F))
             nextVal= (unsigned int)(10 + (nextCh - chLatin_A));
         else if ((nextCh >= chLatin_a) && (nextCh <= chLatin_f))
             nextVal = (unsigned int)(10 + (nextCh - chLatin_a));
         else
         {
             // Return a zero
             toFill = 0;

             //  If we got at least a sigit, then do an unterminated ref error.
             //  Else, do an expected a numerical ref thing.
             if (gotOne)
                 emitError(XMLErrs::UnterminatedCharRef);
             else
                 emitError(XMLErrs::ExpectedNumericalCharRef);

             // Return failure
             return false;
         }

         //  Make sure its valid for the radix. If not, then just eat the
         //  digit and go on after issueing an error. Else, update the
         //  running value with this new digit.
         if (nextVal >= radix)
         {
             XMLCh tmpStr[2];
             tmpStr[0] = nextCh;
             tmpStr[1] = chNull;
             emitError(XMLErrs::BadDigitForRadix, tmpStr);
         }
         else
         {
             value = (value * radix) + nextVal;
             // Guard against overflow.
             if (value > 0x10FFFF) {
                 // Character reference was not in the valid range
                 emitError(XMLErrs::InvalidCharacterRef);
                 return false;
             }
         }

         // Indicate that we got at least one good digit
         gotOne = true;

         // And eat the last char
         fReaderMgr.getNextChar();
     }

     // Return the char (or chars)
     // And check if the character expanded is valid or not
     if (value >= 0x10000 && value <= 0x10FFFF)
     {
         value -= 0x10000;
         toFill = XMLCh((value >> 10) + 0xD800);
         second = XMLCh((value & 0x3FF) + 0xDC00);
     }
     else if (value <= 0xFFFD)
     {
         toFill = XMLCh(value);
         second = 0;
         if (!fReaderMgr.getCurrentReader()->isXMLChar(toFill) && !fReaderMgr.getCurrentReader()->isControlChar(toFill)) {
             // Character reference was not in the valid range
             emitError(XMLErrs::InvalidCharacterRef);
             return false;
         }
     }
     else {
         // Character reference was not in the valid range
         emitError(XMLErrs::InvalidCharacterRef);
         return false;
     }

     return true;
 }


 //  We get here after the '<!--' part of the comment. We scan past the
 //  terminating '-->' It will calls the appropriate handler with the comment
 //  text, if one is provided. A comment can be in either the document or
 //  the DTD, so the fInDocument flag is used to know which handler to send
 //  it to.
 void XMLScanner::scanComment()
 {

     enum States
     {
         InText
         , OneDash
         , TwoDashes
     };

     // Get a buffer for this
     XMLBufBid bbComment(&fBufMgr);

     //  Get the comment text into a temp buffer. Be sure to use temp buffer
     //  two here, since its to be used for stuff that is potentially longer
     //  than just a name.
     States curState = InText;
     bool gotLeadingSurrogate = false;
     while (true)
     {
         // Get the next character
         const XMLCh nextCh = fReaderMgr.getNextChar();

         //  Watch for an end of file
         if (!nextCh)
         {
             emitError(XMLErrs::UnterminatedComment);
             ThrowXMLwithMemMgr(UnexpectedEOFException, XMLExcepts::Gen_UnexpectedEOF, fMemoryManager);
         }

         // Check for correct surrogate pairs
         if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF))
         {
             if (gotLeadingSurrogate)
                 emitError(XMLErrs::Expected2ndSurrogateChar);
             else
                 gotLeadingSurrogate = true;
         }
         else
         {
             if (gotLeadingSurrogate)
             {
                 if ((nextCh < 0xDC00) || (nextCh > 0xDFFF))
                     emitError(XMLErrs::Expected2ndSurrogateChar);
             }
             // Its got to at least be a valid XML character
             else if (!fReaderMgr.getCurrentReader()->isXMLChar(nextCh)) {

                 XMLCh tmpBuf[9];
                 XMLString::binToText
                 (
                     nextCh
                     , tmpBuf
                     , 8
                     , 16
                     , fMemoryManager
                 );
                 emitError(XMLErrs::InvalidCharacter, tmpBuf);
             }

             gotLeadingSurrogate = false;
         }

         if (curState == InText)
         {
             // If its a dash, go to OneDash state. Otherwise take as text
             if (nextCh == chDash)
                 curState = OneDash;
             else
                 bbComment.append(nextCh);
         }
         else if (curState == OneDash)
         {
             //  If its another dash, then we change to the two dashes states.
             //  Otherwise, we have to put in the deficit dash and the new
             //  character and go back to InText.
             if (nextCh == chDash)
             {
                 curState = TwoDashes;
             }
             else
             {
                 bbComment.append(chDash);
                 bbComment.append(nextCh);
                 curState = InText;
             }
         }
         else if (curState == TwoDashes)
         {
             // The next character must be the closing bracket
             if (nextCh != chCloseAngle)
             {
                 emitError(XMLErrs::IllegalSequenceInComment);
                 fReaderMgr.skipPastChar(chCloseAngle);
                 return;
             }
             break;
         }
     }

     // If we have an available handler, call back with the comment.
     if (fDocHandler)
     {
         fDocHandler->docComment
         (
             bbComment.getRawBuffer()
         );
     }

     //mark comment is seen within the current element
     if (! fElemStack.isEmpty())
         fElemStack.setCommentOrPISeen();

 }


 //  Most equal signs can have white space around them, so this little guy
 //  just makes the calling code cleaner by eating whitespace.
 bool XMLScanner::scanEq(bool inDecl)
 {
     fReaderMgr.skipPastSpaces(inDecl);
     if (fReaderMgr.skippedChar(chEqual))
     {
         fReaderMgr.skipPastSpaces(inDecl);
         return true;
     }
     return false;
 }


 unsigned int
 XMLScanner::scanUpToWSOr(XMLBuffer& toFill, const XMLCh chEndChar)
 {
     fReaderMgr.getUpToCharOrWS(toFill, chEndChar);
     return toFill.getLen();
 }

 unsigned int *XMLScanner::getNewUIntPtr()
 {
     // this method hands back a new pointer initialized to 0
     unsigned int *retVal;
     if(fUIntPoolCol < 64)
     {
         retVal = fUIntPool[fUIntPoolRow]+fUIntPoolCol;
         fUIntPoolCol++;
         return retVal;
     }
     // time to grow the pool...
     if(fUIntPoolRow+1 == fUIntPoolRowTotal)
     {
         // and time to add some space for new rows:
         fUIntPoolRowTotal <<= 1;
         unsigned int **newArray = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal );
         memcpy(newArray, fUIntPool, (fUIntPoolRow+1) * sizeof(unsigned int *));
         fMemoryManager->deallocate(fUIntPool);
         fUIntPool = newArray;
         // need to 0 out new elements we won't need:
         for (unsigned int i=fUIntPoolRow+2; i<fUIntPoolRowTotal; i++)
             fUIntPool[i] = 0;
     }
     // now to add a new row; we just made sure we have space
     fUIntPoolRow++;
     fUIntPool[fUIntPoolRow] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
     memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
     // point to next element
     fUIntPoolCol = 1;
     return fUIntPool[fUIntPoolRow];
 }

 void XMLScanner::resetUIntPool()
 {
     // to reuse the unsigned int pool--and the hashtables that use it--
     // simply reinitialize everything to 0's
     for(unsigned int i = 0; i<= fUIntPoolRow; i++)
         memset(fUIntPool[i], 0, sizeof(unsigned int) << 6);
 }

 void XMLScanner::recreateUIntPool()
 {
     // this allows a bloated unsigned int pool to be dispensed with

     // first, delete old fUIntPool
     for (unsigned int i=0; i<=fUIntPoolRow; i++)
     {
         fMemoryManager->deallocate(fUIntPool[i]);
     }
     fMemoryManager->deallocate(fUIntPool);

     fUIntPoolRow = fUIntPoolCol = 0;
     fUIntPoolRowTotal = 2;
     fUIntPool = (unsigned int **)fMemoryManager->allocate(sizeof(unsigned int *) * fUIntPoolRowTotal);
     fUIntPool[0] = (unsigned int *)fMemoryManager->allocate(sizeof(unsigned int) << 6);
     memset(fUIntPool[fUIntPoolRow], 0, sizeof(unsigned int) << 6);
     fUIntPool[1] = 0;
 }

 XERCES_CPP_NAMESPACE_END