src/xercesc/internal/XMLReader.cpp - platform/external/xerces-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * $Id: XMLReader.cpp 568078 2007-08-21 11:43:25Z amassari $
  */

 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/internal/XMLReader.hpp>
 #include <xercesc/util/BitOps.hpp>
 #include <xercesc/util/BinInputStream.hpp>
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/util/RuntimeException.hpp>
 #include <xercesc/util/TransService.hpp>
 #include <xercesc/util/XMLEBCDICTranscoder.hpp>
 #include <xercesc/util/XMLString.hpp>
 #include <xercesc/util/Janitor.hpp>

 XERCES_CPP_NAMESPACE_BEGIN

 // ---------------------------------------------------------------------------
 //  XMLReader: Query Methods
 // ---------------------------------------------------------------------------
 //  Checks whether all of the chars in the passed buffer are whitespace or
 //  not. Breaks out on the first non-whitespace.
 //
 bool XMLReader::isAllSpaces(const   XMLCh* const    toCheck
                             , const unsigned int    count) const
 {
     const XMLCh* curCh = toCheck;
     const XMLCh* endPtr = toCheck + count;
     while (curCh < endPtr)
     {
         if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
             return false;
     }
     return true;
 }


 //
 //  Checks whether at least one of the chars in the passed buffer are whitespace or
 //  not.
 //
 bool XMLReader::containsWhiteSpace(const   XMLCh* const    toCheck
                             , const unsigned int    count) const
 {
     const XMLCh* curCh = toCheck;
     const XMLCh* endPtr = toCheck + count;
     while (curCh < endPtr)
     {
         if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
             return true;
     }
     return false;
 }

 //
 //  This one is not called terribly often, so call the XMLChar utility
 //
 bool XMLReader::isPublicIdChar(const XMLCh toCheck) const
 {
     if (fXMLVersion == XMLV1_1)
         return XMLChar1_1::isPublicIdChar(toCheck);
     else
         return XMLChar1_0::isPublicIdChar(toCheck);
 }

 // ---------------------------------------------------------------------------
 //  XMLReader: Constructors and Destructor
 // ---------------------------------------------------------------------------
 XMLReader::XMLReader(const  XMLCh* const          pubId
                     , const XMLCh* const          sysId
                     ,       BinInputStream* const streamToAdopt
                     , const RefFrom               from
                     , const Types                 type
                     , const Sources               source
                     , const bool                  throwAtEnd
                     , const bool                  calculateSrcOfs
                     , const XMLVersion            version
                     ,       MemoryManager* const  manager) :
     fCharIndex(0)
     , fCharsAvail(0)
     , fCurCol(1)
     , fCurLine(1)
     , fEncodingStr(0)
     , fForcedEncoding(false)
     , fNoMore(false)
     , fPublicId(XMLString::replicate(pubId, manager))
     , fRawBufIndex(0)
     , fRawBytesAvail(0)
     , fReaderNum(0xFFFFFFFF)
     , fRefFrom(from)
     , fSentTrailingSpace(false)
     , fSource(source)
     , fSrcOfsBase(0)
     , fSrcOfsSupported(false)
     , fCalculateSrcOfs(calculateSrcOfs)
     , fSystemId(XMLString::replicate(sysId, manager))
     , fStream(streamToAdopt)
     , fSwapped(false)
     , fThrowAtEnd(throwAtEnd)
     , fTranscoder(0)
     , fType(type)
     , fMemoryManager(manager)
 {
     setXMLVersion(version);

     // Do an initial load of raw bytes
     refreshRawBuffer();

     // Ask the transcoding service if it supports src offset info
     fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

     //
     //  Use the recognizer class to get a basic sense of what family of
     //  encodings this file is in. We'll start off with a reader of that
     //  type, and update it later if needed when we read the XMLDecl line.
     //
     fEncoding = XMLRecognizer::basicEncodingProbe(fRawByteBuf, fRawBytesAvail);

     #if defined(XERCES_DEBUG)
     if ((fEncoding < XMLRecognizer::Encodings_Min)
     ||  (fEncoding > XMLRecognizer::Encodings_Max))
     {
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_BadAutoEncoding, fMemoryManager);
     }
     #endif

     fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding, fMemoryManager), fMemoryManager);

     // Check whether the fSwapped flag should be set or not
     checkForSwapped();

     //
     //  This will check to see if the first line is an XMLDecl and, if
     //  so, decode that first line manually one character at a time. This
     //  leaves enough characters in the buffer that the high level code
     //  can get through the Decl and call us back with the real encoding.
     //
     doInitDecode();

     //
     //  NOTE: We won't create a transcoder until we either get a call to
     //  setEncoding() or we get a call to refreshCharBuffer() and no
     //  transcoder has been set yet.
     //
 }


 XMLReader::XMLReader(const  XMLCh* const          pubId
                     , const XMLCh* const          sysId
                     ,       BinInputStream* const streamToAdopt
                     , const XMLCh* const          encodingStr
                     , const RefFrom               from
                     , const Types                 type
                     , const Sources               source
                     , const bool                  throwAtEnd
                     , const bool                  calculateSrcOfs
                     , const XMLVersion            version
                     ,       MemoryManager* const  manager) :
     fCharIndex(0)
     , fCharsAvail(0)
     , fCurCol(1)
     , fCurLine(1)
     , fEncoding(XMLRecognizer::UTF_8)
     , fEncodingStr(0)
     , fForcedEncoding(true)
     , fNoMore(false)
     , fPublicId(XMLString::replicate(pubId, manager))
     , fRawBufIndex(0)
     , fRawBytesAvail(0)
     , fReaderNum(0xFFFFFFFF)
     , fRefFrom(from)
     , fSentTrailingSpace(false)
     , fSource(source)
     , fSrcOfsBase(0)
     , fSrcOfsSupported(false)
     , fCalculateSrcOfs(calculateSrcOfs)
     , fSystemId(XMLString::replicate(sysId, manager))
     , fStream(streamToAdopt)
     , fSwapped(false)
     , fThrowAtEnd(throwAtEnd)
     , fTranscoder(0)
     , fType(type)
     , fMemoryManager(manager)
 {
     setXMLVersion(version);

     // Do an initial load of raw bytes
     refreshRawBuffer();

     // Copy the encoding string to our member
     fEncodingStr = XMLString::replicate(encodingStr, fMemoryManager);
     XMLString::upperCaseASCII(fEncodingStr);

     // Ask the transcoding service if it supports src offset info
     fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

     //
     //  Map the passed encoding name to one of our enums. If it does not
     //  match one of the intrinsic encodings, it will come back 'other',
     //  which tells us to create a transcoder based reader.
     //
     fEncoding = XMLRecognizer::encodingForName(fEncodingStr);

     //  test the presence of the BOM and remove it from the source
     switch(fEncoding)
     {
         case XMLRecognizer::UCS_4B :
         case XMLRecognizer::UCS_4L :
         {
             if (fRawBytesAvail > 4 &&
                 ((fRawByteBuf[0] == 0x00) && (fRawByteBuf[1] == 0x00) && (fRawByteBuf[2] == 0xFE) && (fRawByteBuf[3] == 0xFF)) ||
                 ((fRawByteBuf[0] == 0xFF) && (fRawByteBuf[1] == 0xFE) && (fRawByteBuf[2] == 0x00) && (fRawByteBuf[3] == 0x00))  )
             {
                 fRawBufIndex += 4;
             }
             break;
         }
         case XMLRecognizer::UTF_8 :
         {
             // Look at the raw buffer as short chars
             const char* asChars = (const char*)fRawByteBuf;

             if (fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen &&
                 XMLString::compareNString(  asChars
                                             , XMLRecognizer::fgUTF8BOM
                                             , XMLRecognizer::fgUTF8BOMLen) == 0)
             {
                 fRawBufIndex += XMLRecognizer::fgUTF8BOMLen;
             }
             break;
         }
         case XMLRecognizer::UTF_16B :
         case XMLRecognizer::UTF_16L :
         {
             if (fRawBytesAvail < 2)
                 break;

             const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
             if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
             {
                 fRawBufIndex += sizeof(UTF16Ch);
             }
             break;
         }
     }

     // Check whether the fSwapped flag should be set or not
     checkForSwapped();

     //
     //  Create a transcoder for the encoding. Since the encoding has been
     //  forced, this will be the one we will use, period.
     //
     XMLTransService::Codes failReason;
     if (fEncoding == XMLRecognizer::OtherEncoding)
     {
         //
         //  fEncodingStr not  pre-recognized, use it
         //  directly for transcoder
         //
         fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
         (
             fEncodingStr
             , failReason
             , kCharBufSize
             , fMemoryManager
         );
     }
      else
     {
         //
         //  Use the recognized fEncoding to create the transcoder
         //
         fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
         (
             fEncoding
             , failReason
             , kCharBufSize
             , fMemoryManager
         );

     }

     if (!fTranscoder)
     {
         ThrowXMLwithMemMgr1
         (
             TranscodingException
             , XMLExcepts::Trans_CantCreateCvtrFor
             , fEncodingStr
             , fMemoryManager
         );
     }

     //
     //  Note that, unlike above, we do not do an initial decode of the
     //  first line. We take the caller's word that the encoding is correct
     //  and just assume that the first bulk decode (kicked off by the first
     //  get of a character) will work.
     //
     //  So we do here the slipping in of the leading space if required.
     //
     if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
     {
         // This represents no data from the source
         fCharSizeBuf[fCharsAvail] = 0;
         fCharOfsBuf[fCharsAvail] = 0;
         fCharBuf[fCharsAvail++] = chSpace;
     }
 }


 XMLReader::XMLReader(const  XMLCh* const          pubId
                     , const XMLCh* const          sysId
                     ,       BinInputStream* const streamToAdopt
                     , XMLRecognizer::Encodings    encodingEnum
                     , const RefFrom               from
                     , const Types                 type
                     , const Sources               source
                     , const bool                  throwAtEnd
                     , const bool                  calculateSrcOfs
                     , const XMLVersion            version
                     ,       MemoryManager* const  manager) :
     fCharIndex(0)
     , fCharsAvail(0)
     , fCurCol(1)
     , fCurLine(1)
     , fEncoding(XMLRecognizer::UTF_8)
     , fEncodingStr(0)
     , fForcedEncoding(true)
     , fNoMore(false)
     , fPublicId(XMLString::replicate(pubId, manager))
     , fRawBufIndex(0)
     , fRawBytesAvail(0)
     , fReaderNum(0xFFFFFFFF)
     , fRefFrom(from)
     , fSentTrailingSpace(false)
     , fSource(source)
     , fSrcOfsBase(0)
     , fSrcOfsSupported(false)
     , fCalculateSrcOfs(calculateSrcOfs)
     , fSystemId(XMLString::replicate(sysId, manager))
     , fStream(streamToAdopt)
     , fSwapped(false)
     , fThrowAtEnd(throwAtEnd)
     , fTranscoder(0)
     , fType(type)
     , fMemoryManager(manager)
 {
     setXMLVersion(version);

     // Do an initial load of raw bytes
     refreshRawBuffer();

     // Ask the transcoding service if it supports src offset info
     fSrcOfsSupported = XMLPlatformUtils::fgTransService->supportsSrcOfs();

     //
     //  Use the passed encoding code
     //
     fEncoding = encodingEnum;
     fEncodingStr = XMLString::replicate(XMLRecognizer::nameForEncoding(fEncoding, fMemoryManager), fMemoryManager);

     // Check whether the fSwapped flag should be set or not
     checkForSwapped();

     //
     //  Create a transcoder for the encoding. Since the encoding has been
     //  forced, this will be the one we will use, period.
     //
     XMLTransService::Codes failReason;
     fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
     (
         fEncoding
         , failReason
         , kCharBufSize
         , fMemoryManager
     );

     if (!fTranscoder)
     {
         ThrowXMLwithMemMgr1
         (
             TranscodingException
             , XMLExcepts::Trans_CantCreateCvtrFor
             , fEncodingStr
             , fMemoryManager
         );
     }

     //
     //  Note that, unlike above, we do not do an initial decode of the
     //  first line. We take the caller's word that the encoding is correct
     //  and just assume that the first bulk decode (kicked off by the first
     //  get of a character) will work.
     //
     //  So we do here the slipping in of the leading space if required.
     //
     if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
     {
         // This represents no data from the source
         fCharSizeBuf[fCharsAvail] = 0;
         fCharOfsBuf[fCharsAvail] = 0;
         fCharBuf[fCharsAvail++] = chSpace;
     }
 }


 XMLReader::~XMLReader()
 {
     fMemoryManager->deallocate(fEncodingStr);
     fMemoryManager->deallocate(fPublicId);
     fMemoryManager->deallocate(fSystemId);
     delete fStream;
     delete fTranscoder;
 }


 // ---------------------------------------------------------------------------
 //  XMLReader: Character buffer management methods
 // ---------------------------------------------------------------------------
 unsigned int XMLReader::getSrcOffset() const
 {
     if (!fSrcOfsSupported || !fCalculateSrcOfs)
         ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_SrcOfsNotSupported, fMemoryManager);

     //
     //  Take the current source offset and add in the sizes that we've
     //  eaten from the source so far.
     //
     if( fCharIndex == 0 ) {
         return fSrcOfsBase;
     }

     if( fCharIndex < fCharsAvail ) {

         return (fSrcOfsBase + fCharOfsBuf[fCharIndex]);
     }

     return (fSrcOfsBase + fCharOfsBuf[fCharIndex-1] + fCharSizeBuf[fCharIndex-1]);
 }


 bool XMLReader::refreshCharBuffer()
 {
     // If the no more flag is set, then don't both doing anything
     if (fNoMore)
         return false;

     unsigned int startInd;

     // See if we have any existing chars.
     const unsigned int spareChars = fCharsAvail - fCharIndex;

     // If we are full, then don't do anything.
     if (spareChars == kCharBufSize)
         return true;

     //
     //  If no transcoder has been created yet, then we never saw the
     //  any encoding="" string and the encoding was not forced, so lets
     //  create one now. We know that it won't change now.
     //
     //  However, note that if we autosensed EBCDIC, then we have to
     //  consider it an error if we never got an encoding since we don't
     //  know what variant of EBCDIC it is.
     //
     if (!fTranscoder)
     {
         if (fEncoding == XMLRecognizer::EBCDIC)
             ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Reader_EncodingStrRequired, fMemoryManager);

         // Ask the transcoding service to make use a transcoder
         XMLTransService::Codes failReason;
         fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
         (
             fEncodingStr
             , failReason
             , kCharBufSize
             , fMemoryManager
         );

         if (!fTranscoder)
         {
             ThrowXMLwithMemMgr1
             (
                 TranscodingException
                 , XMLExcepts::Trans_CantCreateCvtrFor
                 , fEncodingStr
                 , fMemoryManager
             );
         }
     }

     //
     //  Add the number of source bytes eaten so far to the base src
     //  offset member.
     //
     if (fCalculateSrcOfs) {
         for (startInd = 0; startInd < fCharIndex; startInd++)
             fSrcOfsBase += fCharSizeBuf[startInd];
     }

     //
     //  If there are spare chars, then move then down to the bottom. We
     //  have to move the char sizes down also.
     //
     startInd = 0;
     if (spareChars)
     {
         for (unsigned int index = fCharIndex; index < fCharsAvail; index++)
         {
             fCharBuf[startInd] = fCharBuf[index];
             fCharSizeBuf[startInd] = fCharSizeBuf[index];
             startInd++;
         }
     }

     //
     //  And then get more chars, starting after any spare chars that were
     //  left over from the last time.
     //
     fCharsAvail = xcodeMoreChars
     (
         &fCharBuf[startInd]
         , &fCharSizeBuf[startInd]
         , kCharBufSize - spareChars
     );

     // Add back in the spare chars
     fCharsAvail += spareChars;

     // Reset the buffer index to zero, so we start from the 0th char again
     fCharIndex = 0;

     //
     //  If no chars available, then we have to check for one last thing. If
     //  this is reader for a PE and its not being expanded inside a literal,
     //  then unget a trailing space. We use a boolean to avoid triggering
     //  this more than once.
     //
     if (!fCharsAvail
     &&  (fType == Type_PE)
     &&  (fRefFrom == RefFrom_NonLiteral)
     &&  !fSentTrailingSpace)
     {
         fCharBuf[0] = chSpace;
         fCharsAvail = 1;
         fSentTrailingSpace = true;
     }

     //
     //  If we get here with no more chars, then set the fNoMore flag which
     //  lets us optimize and know without checking that no more chars are
     //  available.
     //
     if (!fCharsAvail)
         fNoMore = true;

     //  Calculate fCharOfsBuf using the elements from fCharBufSize
     if (fCalculateSrcOfs)
     {
         fCharOfsBuf[0] = 0;
         for (unsigned int index = 1; index < fCharsAvail; ++index) {
             fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
         }
     }

     return (fCharsAvail != 0);
 }


 // ---------------------------------------------------------------------------
 //  XMLReader: Scanning methods
 // ---------------------------------------------------------------------------
 bool XMLReader::getName(XMLBuffer& toFill, const bool token)
 {
     //  Ok, first lets see if we have chars in the buffer. If not, then lets
     //  reload.
     if (fCharIndex == fCharsAvail)
     {
         if (!refreshCharBuffer())
             return false;
     }

     unsigned int charIndex_start = fCharIndex;

     //  Lets check the first char for being a first name char. If not, then
     //  what's the point in living mannnn? Just give up now. We only do this
     //  if its a name and not a name token that they want.
     if (!token)
     {
         if (fXMLVersion == XMLV1_1 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) {
            // make sure one more char is in the buffer, the transcoder
            // should put only a complete surrogate pair into the buffer
            assert(fCharIndex+1 < fCharsAvail);
            if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
                return false;

             // Looks ok, so lets eat it
             fCharIndex += 2;
         }
         else {
             if (!isFirstNameChar(fCharBuf[fCharIndex]))
                 return false;

             // Looks ok, so lets eat it
             fCharIndex ++;
         }

     }

     //  And now we loop until we run out of data in this reader or we hit
     //  a non-name char.
     while (true)
     {
         if (fXMLVersion == XMLV1_1)
         {
             while (fCharIndex < fCharsAvail)
             {
                 //  Check the current char and take it if its a name char. Else
                 //  break out.
                 if ( (fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) )
                 {
                     // make sure one more char is in the buffer, the transcoder
                     // should put only a complete surrogate pair into the buffer
                     assert(fCharIndex+1 < fCharsAvail);
                     if ( (fCharBuf[fCharIndex+1] < 0xDC00) ||
                          (fCharBuf[fCharIndex+1] > 0xDFFF)  )
                         break;
                     fCharIndex += 2;

                 }
                 else
                 {
                     if (!isNameChar(fCharBuf[fCharIndex]))
                         break;
                     fCharIndex++;
                 }
             }
         }
         else // XMLV1_0
         {
             while (fCharIndex < fCharsAvail)
             {
                 if (!isNameChar(fCharBuf[fCharIndex]))
                     break;
                 fCharIndex++;
             }
         }

         // we have to copy the accepted character(s), and update column
         if (fCharIndex != charIndex_start)
         {
             fCurCol += fCharIndex - charIndex_start;
             toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
         }

         // something is wrong if there is still something in the buffer
         // or if we don't get no more, then break out.
         if ((fCharIndex < fCharsAvail) ||
              !refreshCharBuffer())
             break;

         charIndex_start = fCharIndex;
     }

     return !toFill.isEmpty();
 }

 bool XMLReader::getQName(XMLBuffer& toFill, int* colonPosition)
 {
     unsigned int charIndex_start;
     bool checkNextCharacterForFirstNCName = true;

     // We are only looking for two iterations (i.e. 'NCANAME':'NCNAME').
     // We will stop when we finished scanning for a QName (i.e. either a second
     // colon or an invalid char).
     *colonPosition = -1;
     for (;;) {

         //  Ok, first lets see if we have chars in the buffer. If not, then lets
         //  reload.
         if (fCharIndex == fCharsAvail) {
             if (!refreshCharBuffer()) {
                 break;
             }
         }

         charIndex_start = fCharIndex;
         if (checkNextCharacterForFirstNCName) {

             checkNextCharacterForFirstNCName = false;
             //  Lets check the first char for being a first name char. If not, then
             //  what's the point in living mannnn? Just give up now. We only do this
             //  if its a name and not a name token that they want.
             if (fXMLVersion == XMLV1_1
                 && ((fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F))) {
                 // make sure one more char is in the buffer, the transcoder
                 // should put only a complete surrogate pair into the buffer
                 assert(fCharIndex+1 < fCharsAvail);
                 if ((fCharBuf[fCharIndex+1] < 0xDC00) || (fCharBuf[fCharIndex+1] > 0xDFFF))
                     return false;

                 // Looks ok, so lets eat it
                 fCharIndex += 2;
             }
             else {
                 if (!isFirstNCNameChar(fCharBuf[fCharIndex])) {
                     return false;
                 }

                 // Looks ok, so lets eat it
                 fCharIndex++;
             }
         }

         while (fCharIndex < fCharsAvail) {
             //  Check the current char and take it if its a name char. Else
             //  break out.
             if ( (fCharBuf[fCharIndex] >= 0xD800) && (fCharBuf[fCharIndex] <= 0xDB7F) )
             {
                 // make sure one more char is in the buffer, the transcoder
                 // should put only a complete surrogate pair into the buffer
                 assert(fCharIndex+1 < fCharsAvail);
                 if ( (fXMLVersion == XMLV1_0) ||
                      (fCharBuf[fCharIndex+1] < 0xDC00) ||
                      (fCharBuf[fCharIndex+1] > 0xDFFF)  ) {
                     break;
                 }

                 fCharIndex += 2;
                 continue;
             }

             if (!isNCNameChar(fCharBuf[fCharIndex])) {
                 break;
             }

             fCharIndex++;
         }

         // we have to copy the accepted character(s), and update column
         if (fCharIndex != charIndex_start)
         {
             fCurCol += fCharIndex - charIndex_start;
             toFill.append(&fCharBuf[charIndex_start], fCharIndex - charIndex_start);
         }

         // something is wrong if there is still something in the buffer
         // or if we don't get no more, then break out.
         if (fCharIndex < fCharsAvail) {
             if (fCharBuf[fCharIndex] != chColon) {
                 break;
             }

             if (*colonPosition != -1) {
                 return false;
             }

             *colonPosition = toFill.getLen();
             toFill.append(chColon);
             fCharIndex++;
             fCurCol++;
             checkNextCharacterForFirstNCName = true;
         }
     }

     if (checkNextCharacterForFirstNCName) {
         return false;
     }

     return !toFill.isEmpty();
 }

 bool XMLReader::getSpaces(XMLBuffer& toFill)
 {
     //
     //  We just loop until we either hit a non-space or the end of this
     //  entity. We return true if we returned because of a non-space and
     //  false if because of end of entity.
     //
     //  NOTE:   We have to maintain line/col info here and we have to do
     //          whitespace normalization if we are not already internalized.
     //
     while (true)
     {
         // Loop through the current chars in the buffer
         while (fCharIndex < fCharsAvail)
         {
             // Get the current char out of the buffer
             XMLCh curCh = fCharBuf[fCharIndex];

             //
             //  See if its a white space char. If so, then process it. Else
             //  we've hit a non-space and need to return.
             //
             if (isWhitespace(curCh))
             {
                 // Eat this char
                 fCharIndex++;

                 //
                 //  'curCh' is a whitespace(x20|x9|xD|xA), so we only can have
                 //  end-of-line combinations with a leading chCR(xD) or chLF(xA)
                 //
                 //  100000 x20
                 //  001001 x9
                 //  001010 chLF
                 //  001101 chCR
                 //  -----------
                 //  000110 == (chCR|chLF) & ~(0x9|0x20)
                 //
                 //  if the result of thelogical-& operation is
                 //  true  : 'curCh' must be xA  or xD
                 //  false : 'curCh' must be x20 or x9
                 //
                 if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
                 {
                     fCurCol++;
                 } else
                 {
                     handleEOL(curCh, false);
                 }

                 // Ok we can add this guy to our buffer
                 toFill.append(curCh);
             }
              else
             {
                 // Return true to indicate we broke out due to a whitespace
                 return true;
             }
         }

         //
         //  We've eaten up the current buffer, so lets try to reload it. If
         //  we don't get anything new, then break out. If we do, then we go
         //  back to the top to keep getting spaces.
         //
         if (!refreshCharBuffer())
             break;
     }
     return false;
 }


 bool XMLReader::getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck)
 {
     while (true)
     {
         // Loop through the current chars in the buffer
         while (fCharIndex < fCharsAvail)
         {
             // Get the current char out of the buffer
             XMLCh curCh = fCharBuf[fCharIndex];

             //
             //  See if its not a white space or our target char, then process
             //  it. Else, we need to return.
             //
             if (!isWhitespace(curCh) && (curCh != toCheck))
             {
                 // Eat this char
                 fCharIndex++;

                 //
                 //  'curCh' is not a whitespace(x20|x9|xD|xA), so we only can
                 //  have end-of-line combinations with a leading chNEL(x85) or
                 //  chLineSeparator(x2028)
                 //
                 //  0010000000101000 chLineSeparator
                 //  0000000010000101 chNEL
                 //  ---------------------
                 //  1101111101010010 == ~(chNEL|chLineSeparator)
                 //
                 //  if the result of the logical-& operation is
                 //  true  : 'curCh' can not be chNEL or chLineSeparator
                 //  false : 'curCh' can be chNEL or chLineSeparator
                 //
                 if ( curCh & (XMLCh) ~(chNEL|chLineSeparator) )
                 {
                     fCurCol++;
                 } else
                 {
                     handleEOL(curCh, false);
                 }

                 // Add it to our buffer
                 toFill.append(curCh);
             }
              else
             {
                 return true;
             }
         }

         //
         //  We've eaten up the current buffer, so lets try to reload it. If
         //  we don't get anything new, then break out. If we do, then we go
         //  back to the top to keep getting spaces.
         //
         if (!refreshCharBuffer())
             break;
     }

     // We never hit any non-space and ate up the whole reader
     return false;

 }

 bool XMLReader::skipIfQuote(XMLCh& chGotten)
 {
     if (fCharIndex == fCharsAvail)
     {
         if (!refreshCharBuffer())
             return false;
     }

     const XMLCh curCh = fCharBuf[fCharIndex];
     if ((curCh == chDoubleQuote) || (curCh == chSingleQuote))
     {
         chGotten = curCh;
         fCharIndex++;
         fCurCol++;
         return true;
     }
     return false;
 }


 bool XMLReader::skipSpaces(bool& skippedSomething, bool inDecl)
 {
     // Remember the current line and column
     XMLSSize_t    orgLine = fCurLine;
     XMLSSize_t    orgCol  = fCurCol;

     //  We enter a loop where we skip over spaces until we hit the end of
     //  this reader or a non-space value. The return indicates whether we
     //  hit the non-space (true) or the end (false).
     while (true)
     {
         // Loop through the current chars in the buffer
         while (fCharIndex < fCharsAvail)
         {
             //  See if its a white space char. If so, then process it. Else
             //  we've hit a non-space and need to return.
             if (isWhitespace(fCharBuf[fCharIndex]))
             {
                 // Get the current char out of the buffer and eat it
                 XMLCh curCh = fCharBuf[fCharIndex++];

                 //
                 //  'curCh' is a whitespace(x20|x9|xD|xA), so we only can have
                 //  end-of-line combinations with a leading chCR(xD) or chLF(xA)
                 //
                 //  100000 x20
                 //  001001 x9
                 //  001010 chLF
                 //  001101 chCR
                 //  -----------
                 //  000110 == (chCR|chLF) & ~(0x9|0x20)
                 //
                 //  if the result of the logical-& operation is
                 //  true  : 'curCh' must be xA  or xD
                 //  false : 'curCh' must be x20 or x9
                 //
                 if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
                 {
                     fCurCol++;
                 } else
                 {
                     handleEOL(curCh, inDecl);
                 }

             }
             else
             {
                 skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
                 return true;
             }
         }

         //  We've eaten up the current buffer, so lets try to reload it. If
         //  we don't get anything new, then break out. If we do, then we go
         //  back to the top to keep getting spaces.
         if (!refreshCharBuffer())
             break;
     }

     // We never hit any non-space and ate up the whole reader
     skippedSomething = (orgLine != fCurLine) || (orgCol != fCurCol);
     return false;
 }

 bool XMLReader::skippedChar(const XMLCh toSkip)
 {
     //
     //  If the buffer is empty, then try to reload it. If we still get
     //  nothing, then return false.
     //
     if (fCharIndex == fCharsAvail)
     {
         if (!refreshCharBuffer())
             return false;
     }

     //
     //  See if the current char is the one we want. If so, then we need
     //  to eat it and return true.
     //
     if (fCharBuf[fCharIndex] == toSkip)
     {
         fCharIndex++;
         fCurCol++;
         return true;
     }
     return false;
 }


 bool XMLReader::skippedSpace()
 {
     //
     //  If the buffer is empty, then try to reload it. If we still get
     //  nothing, then return false.
     //
     if (fCharIndex == fCharsAvail)
     {
         if (!refreshCharBuffer())
             return false;
     }

     //
     //  See if the current char is a whitespace. If so, then we need to eat
     //  it and return true.
     //
     const XMLCh curCh = fCharBuf[fCharIndex];
     if (isWhitespace(curCh))
     {
         // Eat the character
         fCharIndex++;

         //
         //  'curCh' is a whitespace(x20|x9|xD|xA), so we only can have
         //  end-of-line combinations with a leading chCR(xD) or chLF(xA)
         //
         //  100000 x20
         //  001001 x9
         //  001010 chLF
         //  001101 chCR
         //  -----------
         //  000110 == (chCR|chLF) & ~(0x9|0x20)
         //
         //  if the result of the logical-& operation is
         //  true  : 'curCh' must be xA  or xD
         //  false : 'curCh' must be x20 or x9
         //
         if ( ( curCh & (chCR|chLF) & ~(0x9|0x20) ) == 0 )
         {
             fCurCol++;
         } else
         {
             handleEOL((XMLCh&)curCh, false);
         }

         return true;
     }
     return false;
 }


 bool XMLReader::skippedString(const XMLCh* const toSkip)
 {
     // Get the length of the string to skip
     const unsigned int srcLen = XMLString::stringLen(toSkip);
     unsigned int charsLeft = charsLeftInBuffer();

     if (srcLen <= fCharsAvail) {
         //
         //  See if the current reader has enough chars to test against this
         //  string. If not, then ask it to reload its buffer. If that does not
         //  get us enough, then it cannot match.
         //
         //  NOTE: This works because strings never have to cross a reader! And
         //  a string to skip will never have a new line in it, so we will never
         //  miss adjusting the current line.
         //
         while (charsLeft < srcLen)
         {
             refreshCharBuffer();
             unsigned int t = charsLeftInBuffer();
             if (t == charsLeft)   // if the refreshCharBuf() did not add anything new
                 return false;     //   give up and return.
             charsLeft = t;
 	    }

         //
         //  Ok, now we now that the current reader has enough chars in its
         //  buffer and that its index is back at zero. So we can do a quick and
         //  dirty comparison straight to its buffer with no requirement to unget
         //  if it fails.
         //
         if (memcmp(&fCharBuf[fCharIndex], toSkip, srcLen*sizeof(XMLCh)))
             return false;

         //
         //  And get the character buffer index back right by just adding the
         //  source len to it.
         //
         fCharIndex += srcLen;
     }
     else {
         if (charsLeft == 0) {
             refreshCharBuffer();
             charsLeft = charsLeftInBuffer();
             if (charsLeft == 0)
                 return false; // error situation
         }
         if (memcmp(&fCharBuf[fCharIndex], toSkip, charsLeft*sizeof(XMLCh)))
             return false;

         fCharIndex += charsLeft;

         unsigned int offset = charsLeft;
         unsigned int remainingLen = srcLen - charsLeft;

         while (remainingLen > 0) {
             refreshCharBuffer();
             charsLeft = charsLeftInBuffer();
             if (charsLeft == 0)
                 return false; // error situation
             if (charsLeft > remainingLen)
                 charsLeft = remainingLen;
             if (memcmp(&fCharBuf[fCharIndex], toSkip+offset, charsLeft*sizeof(XMLCh)))
                 return false;
             offset += charsLeft;
             remainingLen -= charsLeft;
             fCharIndex += charsLeft;

         }

     }

     // Add the source length to the current column to get it back right
     fCurCol += srcLen;

     return true;
 }

 //
 // This is just to peek if the next coming buffer
 // matches the string toPeek.
 // Similar to skippedString, but just the fCharIndex and fCurCol are not updated
 //
 bool XMLReader::peekString(const XMLCh* const toPeek)
 {
     // Get the length of the string to skip
     const unsigned int srcLen = XMLString::stringLen(toPeek);

     //
     //  See if the current reader has enough chars to test against this
     //  string. If not, then ask it to reload its buffer. If that does not
     //  get us enough, then it cannot match.
     //
     //  NOTE: This works because strings never have to cross a reader! And
     //  a string to skip will never have a new line in it, so we will never
     //  miss adjusting the current line.
     //
     unsigned int charsLeft = charsLeftInBuffer();
     while (charsLeft < srcLen)
     {
          refreshCharBuffer();
          unsigned int t = charsLeftInBuffer();
          if (t == charsLeft)   // if the refreshCharBuf() did not add anything new
              return false;     //   give up and return.
          charsLeft = t;
 	}


     //
     //  Ok, now we now that the current reader has enough chars in its
     //  buffer and that its index is back at zero. So we can do a quick and
     //  dirty comparison straight to its buffer with no requirement to unget
     //  if it fails.
     //
     if (memcmp(&fCharBuf[fCharIndex], toPeek, srcLen*sizeof(XMLCh)))
         return false;

     return true;
 }


 // ---------------------------------------------------------------------------
 //  XMLReader: Setter methods (most are inlined)
 // ---------------------------------------------------------------------------
 bool XMLReader::setEncoding(const XMLCh* const newEncoding)
 {
     //
     //  If the encoding was forced, then we ignore the new value and just
     //  return with success. If it was forced, then we are to use that
     //  encoding without question. Note that, if we are forced, we created
     //  a transcoder up front so there is no need to do one here in that
     //  case.
     //
     if (fForcedEncoding)
         return true;

     //
     // upperCase the newEncoding first for better performance
     //
     XMLCh* inputEncoding = XMLString::replicate(newEncoding, fMemoryManager);
     XMLString::upperCaseASCII(inputEncoding);

     XMLRecognizer::Encodings newBaseEncoding;
     //
     //  Check for non-endian specific UTF-16 or UCS-4. If so, and if we
     //  are already in one of the endian versions of those encodings,
     //  then just keep it and go on. Otherwise, its not valid.
     //
     if (!XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString2)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString3)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString4)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString5)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString6)
     ||  !XMLString::compareString(inputEncoding, XMLUni::fgUTF16EncodingString7))
     {
         fMemoryManager->deallocate(inputEncoding);

         if ((fEncoding != XMLRecognizer::UTF_16L)
         &&  (fEncoding != XMLRecognizer::UTF_16B))
         {
             return false;
         }

         // Override with the original endian specific encoding
         newBaseEncoding = fEncoding;

         if (fEncoding == XMLRecognizer::UTF_16L) {
             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = 0;
             fEncodingStr = XMLString::replicate(XMLUni::fgUTF16LEncodingString, fMemoryManager);
         }
         else {
             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = 0;
             fEncodingStr = XMLString::replicate(XMLUni::fgUTF16BEncodingString, fMemoryManager);
         }
     }
     else if (!XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString)
          ||  !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString2)
          ||  !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString3)
          ||  !XMLString::compareString(inputEncoding, XMLUni::fgUCS4EncodingString4))
     {
         fMemoryManager->deallocate(inputEncoding);

         if ((fEncoding != XMLRecognizer::UCS_4L)
         &&  (fEncoding != XMLRecognizer::UCS_4B))
         {
             return false;
         }

         // Override with the original endian specific encoding
         newBaseEncoding = fEncoding;

         if (fEncoding == XMLRecognizer::UCS_4L) {

             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = 0;
             fEncodingStr = XMLString::replicate(XMLUni::fgUCS4LEncodingString, fMemoryManager);
         }
         else {

             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = 0;
             fEncodingStr = XMLString::replicate(XMLUni::fgUCS4BEncodingString, fMemoryManager);
         }
     }
     else
     {
         //
         //  Try to map the string to one of our standard encodings. If its not
         //  one of them, then it has to be one of the non-intrinsic encodings,
         //  in which case we have to delete our intrinsic encoder and create a
         //  new one.
         //
         newBaseEncoding = XMLRecognizer::encodingForName(inputEncoding);

         //
         //  If it does not come back as one of the auto-sensed encodings, then we
         //  have to possibly replace it and at least check a few things.
         //
         if (newBaseEncoding == XMLRecognizer::OtherEncoding)
         {
             //
             // We already know it's none of those non-endian special cases,
             // so just replicate the new name and use it directly to create the transcoder
             //
             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = inputEncoding;

             XMLTransService::Codes failReason;
             fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
             (
                 fEncodingStr
                 , failReason
                 , kCharBufSize
                 , fMemoryManager
             );
         }
         else
         {
             // Store the new encoding string since it is just an intrinsic
             fMemoryManager->deallocate(fEncodingStr);
             fEncodingStr = inputEncoding;
         }
     }

     if (!fTranscoder) {
         //
         //  Now we can create a transcoder using the recognized fEncoding.  We
         //  might get back a transcoder for an intrinsically supported encoding,
         //  or we might get one from the underlying transcoding service.
         //
         XMLTransService::Codes failReason;
         fTranscoder = XMLPlatformUtils::fgTransService->makeNewTranscoderFor
         (
             newBaseEncoding
             , failReason
             , kCharBufSize
             , fMemoryManager
         );

         if (!fTranscoder)
             ThrowXMLwithMemMgr1(TranscodingException, XMLExcepts::Trans_CantCreateCvtrFor, fEncodingStr, fMemoryManager);
     }

     // Update the base encoding member with the new base encoding found
     fEncoding = newBaseEncoding;

     // Looks ok to us
     return true;
 }


 // ---------------------------------------------------------------------------
 //  XMLReader: Private helper methods
 // ---------------------------------------------------------------------------

 //
 //  This is called when the encoding flag is set and just sets the fSwapped
 //  flag appropriately.
 //
 void XMLReader::checkForSwapped()
 {
     // Assume not swapped
     fSwapped = false;

     #if defined(ENDIANMODE_LITTLE)

         if ((fEncoding == XMLRecognizer::UTF_16B)
         ||  (fEncoding == XMLRecognizer::UCS_4B))
         {
             fSwapped = true;
         }

     #elif defined(ENDIANMODE_BIG)

         if ((fEncoding == XMLRecognizer::UTF_16L)
         ||  (fEncoding == XMLRecognizer::UCS_4L))
         {
             fSwapped = true;
         }

     #endif
 }


 //
 //  This is called from the constructor when the encoding is not forced.
 //  We assume that the encoding has been auto-sensed at this point and that
 //  fSwapped is set correctly.
 //
 //  In the case of UCS-4 and EBCDIC, we don't have to check for a decl.
 //  The fact that we got here, means that there is one, because that's the
 //  only way we can autosense those.
 //
 void XMLReader::doInitDecode()
 {
     switch(fEncoding)
     {
         case XMLRecognizer::UCS_4B :
         case XMLRecognizer::UCS_4L :
         {
             // Remove bom if any
             if (((fRawByteBuf[0] == 0x00) && (fRawByteBuf[1] == 0x00) && (fRawByteBuf[2] == 0xFE) && (fRawByteBuf[3] == 0xFF)) ||
                 ((fRawByteBuf[0] == 0xFF) && (fRawByteBuf[1] == 0xFE) && (fRawByteBuf[2] == 0x00) && (fRawByteBuf[3] == 0x00))  )
             {
                 for (unsigned int i = 0; i < fRawBytesAvail; i++)
                     fRawByteBuf[i] = fRawByteBuf[i+4];

                 fRawBytesAvail -=4;
             }

             // Look at the raw buffer as UCS4 chars
             const UCS4Ch* asUCS = (const UCS4Ch*)fRawByteBuf;

             while (fRawBufIndex < fRawBytesAvail)
             {
                 // Get out the current 4 byte value and inc our raw buf index
                 UCS4Ch curVal = *asUCS++;
                 fRawBufIndex += sizeof(UCS4Ch);

                 // Swap if that is required for this machine
                 if (fSwapped)
                     curVal = BitOps::swapBytes(curVal);

                 // Make sure its at least semi legal. If not, undo and throw
                 if (curVal > 0xFFFF)
                 {
                     fCharsAvail = 0;
                     fRawBufIndex = 0;
                     fMemoryManager->deallocate(fPublicId);
                     fMemoryManager->deallocate(fEncodingStr);
                     ArrayJanitor<XMLCh> janValue(fSystemId, fMemoryManager);
                     ThrowXMLwithMemMgr1
                     (
                         TranscodingException
                         , XMLExcepts::Reader_CouldNotDecodeFirstLine
                         , fSystemId
                         , fMemoryManager
                     );
                 }

                 // Convert the value to an XML char and store it
                 fCharSizeBuf[fCharsAvail] = 4;
                 fCharBuf[fCharsAvail++] = XMLCh(curVal);

                 // Break out on the > character
                 if (curVal == chCloseAngle)
                     break;
             }
             break;
         }

         case XMLRecognizer::UTF_8 :
         {
             // If there's a utf-8 BOM  (0xEF 0xBB 0xBF), skip past it.
             //   Don't move to char buf - no one wants to see it.
             //   Note: this causes any encoding= declaration to override
             //         the BOM's attempt to say that the encoding is utf-8.

             // Look at the raw buffer as short chars
             const char* asChars = (const char*)fRawByteBuf;

             if (fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen &&
                 XMLString::compareNString(  asChars
                                             , XMLRecognizer::fgUTF8BOM
                                             , XMLRecognizer::fgUTF8BOMLen) == 0)
             {
                 fRawBufIndex += XMLRecognizer::fgUTF8BOMLen;
                 asChars      += XMLRecognizer::fgUTF8BOMLen;
             }

             //
             //  First check that there are enough bytes to even see the
             //  decl indentifier. If not, get out now with no action since
             //  there is no decl.
             //
             if (fRawBytesAvail < XMLRecognizer::fgASCIIPreLen)
                 break;

             // Check for the opening sequence. If not, then no decl
             if (XMLString::compareNString(  asChars
                                             , XMLRecognizer::fgASCIIPre
                                             , XMLRecognizer::fgASCIIPreLen))
             {
                 break;
             }

             while (fRawBufIndex < fRawBytesAvail)
             {
                 const char curCh = *asChars++;
                 fRawBufIndex++;

                 // Looks ok, so store it
                 fCharSizeBuf[fCharsAvail] = 1;
                 fCharBuf[fCharsAvail++] = XMLCh(curCh);

                 // Break out on a > character
                 if (curCh == chCloseAngle)
                     break;

                 //
                 //  A char greater than 0x7F is not allowed in this case. If
                 //  so, undo and throw.
                 //
                 if (curCh & 0x80)
                 {
                     fCharsAvail = 0;
                     fRawBufIndex = 0;
                     fMemoryManager->deallocate(fPublicId);
                     fMemoryManager->deallocate(fEncodingStr);
                     ArrayJanitor<XMLCh> janValue(fSystemId, fMemoryManager);
                     ThrowXMLwithMemMgr1
                     (
                         TranscodingException
                         , XMLExcepts::Reader_CouldNotDecodeFirstLine
                         , fSystemId
                         , fMemoryManager
                     );
                 }
             }
             break;
         }

         case XMLRecognizer::UTF_16B :
         case XMLRecognizer::UTF_16L :
         {
             //
             //  If there is a decl here, we just truncate back the characters
             //  as we go. No surrogate creation would be allowed here in legal
             //  XML, so we consider it a transoding error if we find one.
             //
             if (fRawBytesAvail < 2)
                 break;

             unsigned int postBOMIndex = 0;
             const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex];
             if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker))
             {
                 fRawBufIndex += sizeof(UTF16Ch);
                 asUTF16++;
                 postBOMIndex = fRawBufIndex;
             }

             //  First check that there are enough raw bytes for there to even
             //  be a decl indentifier. If not, then nothing to do.
             //
             if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen)
             {
                 fRawBufIndex = postBOMIndex;
                 break;
             }

             //
             //  See we get a match on the prefix. If not, then reset and
             //  break out.
             //
             if (fEncoding == XMLRecognizer::UTF_16B)
             {
                 if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen))
                 {
                     fRawBufIndex = postBOMIndex;
                     break;
                 }
             }
              else
             {
                 if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen))
                 {
                     fRawBufIndex = postBOMIndex;
                     break;
                 }
             }

             while (fRawBufIndex < fRawBytesAvail)
             {
                 // Get out the current 2 byte value
                 UTF16Ch curVal = *asUTF16++;
                 fRawBufIndex += sizeof(UTF16Ch);

                 // Swap if that is required for this machine
                 if (fSwapped)
                     curVal = BitOps::swapBytes(curVal);

                 //
                 //  Store it and bump the target index, implicitly converting
                 //  if UTF16Ch and XMLCh are not the same size.
                 //
                 fCharSizeBuf[fCharsAvail] = 2;
                 fCharBuf[fCharsAvail++] = curVal;

                 // Break out on a > char
                 if (curVal == chCloseAngle)
                     break;
             }
             break;
         }

         case XMLRecognizer::EBCDIC :
         {
             //
             //  We use special support in the intrinsic EBCDIC-US transcoder
             //  to go through one char at a time.
             //
             const XMLByte* srcPtr = fRawByteBuf;
             while (1)
             {
                 // Transcode one char from the source
                 const XMLCh chCur = XMLEBCDICTranscoder::xlatThisOne(*srcPtr++);
                 fRawBufIndex++;

                 //
                 //  And put it into the character buffer. This stuff has to
                 //  look like it was normally transcoded.
                 //
                 fCharSizeBuf[fCharsAvail] = 1;
                 fCharBuf[fCharsAvail++] = chCur;

                 // If its a > char, then break out
                 if (chCur == chCloseAngle)
                     break;

                 // Watch for using up all input and get out
                 if (fRawBufIndex == fRawBytesAvail)
                     break;
             }
             break;
         }

         default :
             // It should never be anything else here
             fMemoryManager->deallocate(fPublicId);
             fMemoryManager->deallocate(fEncodingStr);
             fMemoryManager->deallocate(fSystemId);
             ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Reader_BadAutoEncoding, fMemoryManager);
             break;
     }

     //
     //  Ok, by the time we get here, if its a legal XML file we have eaten
     //  the XML/TextDecl. So, if we are a PE and are being referenced from
     //  outside a literal, then we need to throw in an arbitrary space that
     //  is required by XML.
     //
     if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral))
         fCharBuf[fCharsAvail++] = chSpace;

     //  Calculate fCharOfsBuf buffer using the elements from fCharBufSize
     if (fCalculateSrcOfs)
     {
         fCharOfsBuf[0] = 0;
         for (unsigned int index = 1; index < fCharsAvail; ++index) {
             fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1];
         }
     }
 }


 //
 //  This method is called internally when we run out of bytes in the raw
 //  buffer. We just read as many bytes as we can into the raw buffer again
 //  and store the number of bytes we got.
 //
 void XMLReader::refreshRawBuffer()
 {
     //
     //  If there are any bytes left, move them down to the start. There
     //  should only ever be (max bytes per char - 1) at the most.
     //
     const unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;

     // Move the existing ones down
     for (unsigned int index = 0; index < bytesLeft; index++)
         fRawByteBuf[index] = fRawByteBuf[fRawBufIndex + index];

     //
     //  And then read into the buffer past the existing bytes. Add back in
     //  that many to the bytes read, and subtract that many from the bytes
     //  requested.
     //
     fRawBytesAvail = fStream->readBytes
     (
         &fRawByteBuf[bytesLeft], kRawBufSize - bytesLeft
     ) + bytesLeft;

     //
     //  We need to reset the buffer index back to the start in all cases,
     //  since any trailing data was copied down to the start.
     //
     fRawBufIndex = 0;
 }


 //
 //  This method is called internally when we run out of characters in the
 //  trancoded character buffer. We transcode up to another maxChars chars
 //  from the
 //
 unsigned int
 XMLReader::xcodeMoreChars(          XMLCh* const            bufToFill
                             ,       unsigned char* const    charSizes
                             , const unsigned int            maxChars)
 {
     // If we are plain tuckered out, then return zero now
     if (!fRawBytesAvail)
         return 0;

     //
     //  If our raw buffer is low, then lets load up another batch of
     //  raw bytes now.  We can't check for exactly zero bytes left because
     //  transcoding of multi-byte encodings may have left a few bytes
     //  representing a partial character in the buffer that can't be
     //  used until the next buffer (and the rest of the character)
     //  is read.
     //
     unsigned int bytesLeft = fRawBytesAvail - fRawBufIndex;
     if (bytesLeft < 100)
     {
         refreshRawBuffer();

         // If we didn't get anything more just return a zero now
         if (!fRawBytesAvail)
             return 0;
     }

     // Ask the transcoder to internalize another batch of chars
     unsigned int bytesEaten;
     const unsigned int charsDone = fTranscoder->transcodeFrom
     (
         &fRawByteBuf[fRawBufIndex]
         , fRawBytesAvail - fRawBufIndex
         , bufToFill
         , maxChars
         , bytesEaten
         , charSizes
     );

     // Update the raw buffer index
     fRawBufIndex += bytesEaten;

     return charsDone;
 }

 /***
  *
  * XML1.1
  *
  * 2.11 End-of-Line Handling
  *
  *    XML parsed entities are often stored in computer files which, for editing
  *    convenience, are organized into lines. These lines are typically separated
  *    by some combination of the characters CARRIAGE RETURN (#xD) and LINE FEED (#xA).
  *
  *    To simplify the tasks of applications, the XML processor MUST behave as if
  *    it normalized all line breaks in external parsed entities (including the document
  *    entity) on input, before parsing, by translating all of the following to a single
  *    #xA character:
  *
  *  1. the two-character sequence #xD #xA
  *  2. the two-character sequence #xD #x85
  *  3. the single character #x85
  *  4. the single character #x2028
  *  5. any #xD character that is not immediately followed by #xA or #x85.
  *
  *
  ***/
 void XMLReader::handleEOL(XMLCh& curCh, bool inDecl)
 {
     // 1. the two-character sequence #xD #xA
     // 2. the two-character sequence #xD #x85
     // 5. any #xD character that is not immediately followed by #xA or #x85.
     if (curCh == chCR)
     {
         fCurCol = 1;
         fCurLine++;

         //
         //  If not already internalized, then convert it to an
         //  LF and eat any following LF.
         //
         if (fSource == Source_External)
         {
             if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
             {
                 if ( fCharBuf[fCharIndex] == chLF              ||
                     ((fCharBuf[fCharIndex] == chNEL) && fNEL)  )
                 {
                     fCharIndex++;
                 }
             }
             curCh = chLF;
         }
     }
     else if (curCh == chLF)
     {
         fCurCol = 1;
         fCurLine++;
     }
     // 3. the single character #x85
     // 4. the single character #x2028
     else if (curCh == chNEL || curCh == chLineSeparator)
     {
         if (inDecl && fXMLVersion == XMLV1_1)
         {

         /***
          * XML1.1
          *
          * 2.11 End-of-Line Handling
          *  ...
          *   The characters #x85 and #x2028 cannot be reliably recognized and translated
          *   until an entity's encoding declaration (if present) has been read.
          *   Therefore, it is a fatal error to use them within the XML declaration or
          *   text declaration.
          *
          ***/
             ThrowXMLwithMemMgr1
                 (
                 TranscodingException
                 , XMLExcepts::Reader_NelLsepinDecl
                 , fSystemId
                 , fMemoryManager
                 );
         }

         if (fNEL && fSource == Source_External)
         {
             fCurCol = 1;
             fCurLine++;
             curCh = chLF;
         }
     }
     else
     {
         fCurCol++;
     }
 }

 XERCES_CPP_NAMESPACE_END