src/xercesc/util/Transcoders/ICU/ICUTransService.cpp - platform/external/xerces-cpp - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * $Id: ICUTransService.cpp 568078 2007-08-21 11:43:25Z amassari $
  */


 // ---------------------------------------------------------------------------
 //  Includes
 // ---------------------------------------------------------------------------
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/util/Janitor.hpp>
 #include <xercesc/util/TranscodingException.hpp>
 #include <xercesc/util/XMLString.hpp>
 #include <xercesc/util/XMLUniDefs.hpp>
 #include "ICUTransService.hpp"
 #include <string.h>
 #include <unicode/uloc.h>
 #include <unicode/uchar.h>
 #include <unicode/ucnv.h>
 #include <unicode/ucnv_err.h>
 #include <unicode/ustring.h>
 #include <unicode/udata.h>
 #if (U_ICU_VERSION_MAJOR_NUM >= 2)
     #include <unicode/uclean.h>
 #endif

 #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
 // Forward reference the symbol which points to the ICU converter data.
 #if (U_ICU_VERSION_MAJOR_NUM < 2)
 extern "C" const uint8_t U_IMPORT icudata_dat[];
 #endif
 #endif

 XERCES_CPP_NAMESPACE_BEGIN

 // ---------------------------------------------------------------------------
 //  Local, const data
 // ---------------------------------------------------------------------------
 static const XMLCh gMyServiceId[] =
 {
     chLatin_I, chLatin_C, chLatin_U, chNull
 };

 static const XMLCh gS390Id[] =
 {
     chLatin_S, chDigit_3, chDigit_9, chDigit_0, chNull
 };

 static const XMLCh gs390Id[] =
 {
     chLatin_s, chDigit_3, chDigit_9, chDigit_0, chNull
 };

 static const XMLCh gswaplfnlId[] =
 {
     chComma, chLatin_s, chLatin_w, chLatin_a, chLatin_p,
     chLatin_l, chLatin_f, chLatin_n, chLatin_l, chNull
 };
 // ---------------------------------------------------------------------------
 //  Local functions
 // ---------------------------------------------------------------------------

 //
 //  When XMLCh and ICU's UChar are not the same size, we have to do a temp
 //  conversion of all strings. These local helper methods make that easier.
 //
 static UChar* convertToUChar( const   XMLCh* const    toConvert
                             , const unsigned int    srcLen = 0
                             , MemoryManager* const manager = 0)
 {
     const unsigned int actualLen = srcLen
                                    ? srcLen : XMLString::stringLen(toConvert);

     UChar* tmpBuf = (manager)
         ? (UChar*) manager->allocate((actualLen + 1) * sizeof(UChar))
 		: new UChar[actualLen + 1];
     const XMLCh* srcPtr = toConvert;
     UChar* outPtr = tmpBuf;
     while (*srcPtr)
         *outPtr++ = UChar(*srcPtr++);
     *outPtr = 0;

     return tmpBuf;
 }


 static XMLCh* convertToXMLCh( const UChar* const toConvert,
                             MemoryManager* const manager = 0)
 {
     const unsigned int srcLen = u_strlen(toConvert);
     XMLCh* retBuf = (manager)
         ? (XMLCh*) manager->allocate((srcLen+1) * sizeof(XMLCh))
         : new XMLCh[srcLen + 1];

     XMLCh* outPtr = retBuf;
     const UChar* srcPtr = toConvert;
     while (*srcPtr)
         *outPtr++ = XMLCh(*srcPtr++);
     *outPtr = 0;

     return retBuf;
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICUTransService::ICUTransService()
 {
 #if (U_ICU_VERSION_MAJOR_NUM > 2 || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >= 6))
     UErrorCode errorCode=U_ZERO_ERROR;
     u_init(&errorCode);
     if(U_FAILURE(errorCode)) {
         XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
     }
 #endif

 #if !defined(XML_OS390) && !defined(XML_AS400) && !defined(XML_HPUX) && !defined(XML_PTX)
 #if (U_ICU_VERSION_MAJOR_NUM < 2)
     // Starting with ICU 2.0, ICU itself includes a static reference to the data
     // entrypoint symbol.
     //
     // ICU 1.8 (and previous) did not include a static reference, but would
     // dynamically load the data dll when it was first needed, however this dynamic
     // loading proved unreliable in some of the odd environments that Xerces needed
     // to run in.  Hence, the static reference.

     // Pass the location of the converter data to ICU. By doing so, we are
     // forcing the load of ICU converter data DLL, after the Xerces-C DLL is
     // loaded. This implies that Xerces-C, now has to explicitly link with the
     // ICU converter dll. However, the advantage is that we no longer depend
     // on the code which does demand dynamic loading of DLL's. The demand
     // loading is highly system dependent and was a constant source of support
     // calls.
     UErrorCode uerr = U_ZERO_ERROR;
     udata_setCommonData((void *) icudata_dat, &uerr);
 #endif
 #endif
 }

 ICUTransService::~ICUTransService()
 {
     /*
      * commented out the following clean up code
      * in case users use ICU outside of the parser
      * if we clean up here, users' code may crash
      *
     #if (U_ICU_VERSION_MAJOR_NUM >= 2)
         // release all lazily allocated data
         u_cleanup();
     #endif
     */
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: The virtual transcoding service API
 // ---------------------------------------------------------------------------
 int ICUTransService::compareIString(const   XMLCh* const    comp1
                                     , const XMLCh* const    comp2)
 {
     size_t  i = 0;
     size_t  j = 0;

     for(;;)
     {
         UChar32 ch1;
         UChar32 ch2;

         U16_NEXT_UNSAFE(comp1, i, ch1);
         U16_NEXT_UNSAFE(comp2, j, ch2);

         const UChar32   folded1 =
             u_foldCase(ch1, U_FOLD_CASE_DEFAULT);

         const UChar32   folded2 =
             u_foldCase(ch2, U_FOLD_CASE_DEFAULT);

         if (folded1 !=
             folded2)
         {
             return folded1 - folded2;
         }
         else if (ch1 == 0)
         {
             // If ch1 is 0, the ch2 must also be
             // 0.  Otherwise, the previous if
             // would have failed.
             break;
         }
     }

     return 0;
 }


 int ICUTransService::compareNIString(const  XMLCh* const    comp1
                                     , const XMLCh* const    comp2
                                     , const unsigned int    maxChars)
 {
     if (maxChars > 0)
     {
         // Note that this function has somewhat broken semantics, as it's
         // possible for two strings of different lengths to compare as equal
         // in a case-insensitive manner, since one character could be
         // represented as a surrogate pair.
         size_t  i = 0;
         size_t  j = 0;

         for(;;)
         {
             UChar32 ch1;
             UChar32 ch2;

             U16_NEXT_UNSAFE(comp1, i, ch1);
             U16_NEXT_UNSAFE(comp2, j, ch2);

             const UChar32   folded1 =
                 u_foldCase(ch1, U_FOLD_CASE_DEFAULT);

             const UChar32   folded2 =
                 u_foldCase(ch2, U_FOLD_CASE_DEFAULT);

             if (folded1 != folded2)
             {
                 return folded1 - folded2;
             }
             else if (i == maxChars)
             {
                 // If we're at the end of both strings, return 0.
                 // Otherwise, we've run out of characters in the
                 // left string, so return -1.
                 return j == maxChars ? 0 : -1;
             }
             else if (j == maxChars)
             {
                 // We've run out of characters in the right string,
                 // but not the left, so return 1.
                 return 1;
             }
         }
     }

     return 0;
 }


 const XMLCh* ICUTransService::getId() const
 {
     return gMyServiceId;
 }


 bool ICUTransService::isSpace(const XMLCh toCheck) const
 {
     //
     //  <TBD>
     //  For now, we short circuit some of the control chars because ICU
     //  is not correctly reporting them as space. Later, when they change
     //  this, we can get rid of this special case.
     //
     if ((toCheck == 0x09)
     ||  (toCheck == 0x0A)
     ||  (toCheck == 0x0D))
     {
         return true;
     }
     return (u_isspace(UChar(toCheck)) != 0);
 }


 XMLLCPTranscoder* ICUTransService::makeNewLCPTranscoder()
 {
     //
     //  Try to create a default converter. If it fails, return a null
     //  pointer which will basically cause the system to give up because
     //  we really can't do anything without one.
     //
     UErrorCode uerr = U_ZERO_ERROR;
     UConverter* converter = ucnv_open(NULL, &uerr);
     if (!converter)
         return 0;

     // That went ok, so create an ICU LCP transcoder wrapper and return it
     return new ICULCPTranscoder(converter);
 }


 bool ICUTransService::supportsSrcOfs() const
 {
     // This implementation supports source offset information
     return true;
 }


 template <class FunctionType>
 static void
 doCaseConvert(
             XMLCh*          convertString,
             FunctionType    caseFunction)
 {
     // Note the semantics of this function are broken, since it's
     // possible that changing the case of a string could increase
     // its length, but there's no way to handle such a situation.
     const unsigned int  len =
             XMLString::stringLen(convertString);

     size_t  readPos = 0;
     size_t  writePos = 0;

     while(readPos < len)
     {
         UChar32     original;

         // Get the next Unicode code point.
         U16_NEXT_UNSAFE(convertString, readPos, original);

         // Convert the code point
         const UChar32   converted = caseFunction(original);

         // OK, now here's where it gets ugly.
         if (!U_IS_BMP(converted) && U_IS_BMP(original) &&
             readPos - writePos == 1)
         {
             // We do not have room to convert the
             // character without overwriting the next
             // character, so we will just stop.
             break;
         }
         else
         {
             U16_APPEND_UNSAFE(convertString, writePos, converted);
         }
     }

     convertString[writePos] = 0;
 }


 void ICUTransService::upperCase(XMLCh* const toUpperCase) const
 {
     doCaseConvert(toUpperCase, u_toupper);
 }

 void ICUTransService::lowerCase(XMLCh* const toLowerCase) const
 {
     doCaseConvert(toLowerCase, u_tolower);
 }


 // ---------------------------------------------------------------------------
 //  ICUTransService: The protected virtual transcoding service API
 // ---------------------------------------------------------------------------
 XMLTranscoder* ICUTransService::
 makeNewXMLTranscoder(const  XMLCh* const            encodingName
                     ,       XMLTransService::Codes& resValue
                     , const unsigned int            blockSize
                     ,       MemoryManager* const    manager)
 {
     //
     //  For encodings that end with "s390" we need to strip off the "s390"
     //  from the encoding name and add ",swaplfnl" to the encoding name
     //  that we pass into ICU on the ucnv_openU.
     //
     XMLCh* encodingNameToUse = (XMLCh*) encodingName;
     XMLCh* workBuffer = 0;

     if ( (XMLString::endsWith(encodingNameToUse, gs390Id)) ||
          (XMLString::endsWith(encodingNameToUse, gS390Id)) )
     {
        int workBufferSize = (XMLString::stringLen(encodingNameToUse) + XMLString::stringLen(gswaplfnlId) - XMLString::stringLen(gS390Id) + 1);
        workBuffer = (XMLCh*) manager->allocate(workBufferSize * sizeof(XMLCh));
        int moveSize = XMLString::stringLen(encodingNameToUse) - XMLString::stringLen(gS390Id);
        XMLString::moveChars(workBuffer, encodingNameToUse, moveSize);
        XMLString::moveChars((workBuffer + moveSize), gswaplfnlId, XMLString::stringLen(gswaplfnlId));
        encodingNameToUse = workBuffer;
     }

     //
     //  If UChar and XMLCh are not the same size, then we have premassage the
     //  encoding name into a UChar type string.
     //
     const UChar* actualName;
     UChar* tmpName = 0;
     if (sizeof(UChar) == sizeof(XMLCh))
     {
         actualName = (const UChar*)encodingNameToUse;
     }
     else
     {
         tmpName = convertToUChar(encodingNameToUse, 0, manager);
         actualName = tmpName;
     }

     ArrayJanitor<UChar> janTmp(tmpName, manager);
     ArrayJanitor<XMLCh> janTmp1(workBuffer, manager);

     UErrorCode uerr = U_ZERO_ERROR;
     UConverter* converter = ucnv_openU(actualName, &uerr);
     if (!converter)
     {
         resValue = XMLTransService::UnsupportedEncoding;
         return 0;
     }

     return new (manager) ICUTranscoder(encodingName, converter, blockSize, manager);
 }


 // ---------------------------------------------------------------------------
 //  ICUTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICUTranscoder::ICUTranscoder(const  XMLCh* const        encodingName
                             ,       UConverter* const   toAdopt
                             , const unsigned int        blockSize
                             , MemoryManager* const      manager) :

     XMLTranscoder(encodingName, blockSize, manager)
     , fConverter(toAdopt)
     , fFixed(false)
     , fSrcOffsets(0)
 {
     // If there is a block size, then allocate our source offset array
     if (blockSize)
         fSrcOffsets = (XMLUInt32*) manager->allocate
         (
             blockSize * sizeof(XMLUInt32)
         );//new XMLUInt32[blockSize];

     // Remember if its a fixed size encoding
     fFixed = (ucnv_getMaxCharSize(fConverter) == ucnv_getMinCharSize(fConverter));
 }

 ICUTranscoder::~ICUTranscoder()
 {
     getMemoryManager()->deallocate(fSrcOffsets);//delete [] fSrcOffsets;

     // If there is a converter, ask ICU to clean it up
     if (fConverter)
     {
         // <TBD> Does this actually delete the structure???
         ucnv_close(fConverter);
         fConverter = 0;
     }
 }


 // ---------------------------------------------------------------------------
 //  ICUTranscoder: The virtual transcoder API
 // ---------------------------------------------------------------------------
 unsigned int
 ICUTranscoder::transcodeFrom(const  XMLByte* const          srcData
                             , const unsigned int            srcCount
                             ,       XMLCh* const            toFill
                             , const unsigned int            maxChars
                             ,       unsigned int&           bytesEaten
                             ,       unsigned char* const    charSizes)
 {
     // If debugging, insure the block size is legal
     #if defined(XERCES_DEBUG)
     checkBlockSize(maxChars);
     #endif

     // Set up pointers to the start and end of the source buffer
     const XMLByte*  startSrc = srcData;
     const XMLByte*  endSrc = srcData + srcCount;

     //
     //  And now do the target buffer. This works differently according to
     //  whether XMLCh and UChar are the same size or not.
     //
     UChar* startTarget;
     if (sizeof(XMLCh) == sizeof(UChar))
         startTarget = (UChar*)toFill;
      else
         startTarget = (UChar*) getMemoryManager()->allocate
         (
             maxChars * sizeof(UChar)
         );//new UChar[maxChars];
     UChar* orgTarget = startTarget;

     //
     //  Transoode the buffer.  Buffer overflow errors are normal, occuring
     //  when the raw input buffer holds more characters than will fit in
     //  the Unicode output buffer.
     //
     UErrorCode  err = U_ZERO_ERROR;
     ucnv_toUnicode
     (
         fConverter
         , &startTarget
         , startTarget + maxChars
         , (const char**)&startSrc
         , (const char*)endSrc
         , (fFixed ? 0 : (int32_t*)fSrcOffsets)
         , false
         , &err
     );

     if ((err != U_ZERO_ERROR) && (err != U_BUFFER_OVERFLOW_ERROR))
     {
         if (orgTarget != (UChar*)toFill)
             getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;

         if (fFixed)
         {
             XMLCh tmpBuf[17];
             XMLString::binToText((unsigned int)(*startTarget), tmpBuf, 16, 16, getMemoryManager());
             ThrowXMLwithMemMgr2
             (
                 TranscodingException
                 , XMLExcepts::Trans_BadSrcCP
                 , tmpBuf
                 , getEncodingName()
                 , getMemoryManager()
             );
         }
         else
         {
             ThrowXMLwithMemMgr(TranscodingException, XMLExcepts::Trans_BadSrcSeq, getMemoryManager());
         }
     }

     // Calculate the bytes eaten and store in caller's param
     bytesEaten = startSrc - srcData;

     // And the characters decoded
     const unsigned int charsDecoded = startTarget - orgTarget;

     //
     //  Translate the array of char offsets into an array of character
     //  sizes, which is what the transcoder interface semantics requires.
     //  If its fixed, then we can optimize it.
     //
     if (fFixed)
     {
         const unsigned char fillSize = (unsigned char)ucnv_getMaxCharSize(fConverter);
         memset(charSizes, fillSize, maxChars);
     }
      else
     {
         //
         //  We have to convert the series of offsets into a series of
         //  sizes. If just one char was decoded, then its the total bytes
         //  eaten. Otherwise, do a loop and subtract out each element from
         //  its previous element.
         //
         if (charsDecoded == 1)
         {
             charSizes[0] = (unsigned char)bytesEaten;
         }
          else
         {
             //  ICU does not return an extra element to allow us to figure
             //  out the last char size, so we have to compute it from the
             //  total bytes used.
             unsigned int index;
             for (index = 0; index < charsDecoded - 1; index++)
             {
                 charSizes[index] = (unsigned char)(fSrcOffsets[index + 1]
                                                     - fSrcOffsets[index]);
             }
             if( charsDecoded > 0 ) {
                 charSizes[charsDecoded - 1] = (unsigned char)(bytesEaten
                                               - fSrcOffsets[charsDecoded - 1]);
             }
         }
     }

     //
     //  If XMLCh and UChar are not the same size, then we need to copy over
     //  the temp buffer to the new one.
     //
     if (sizeof(UChar) != sizeof(XMLCh))
     {
         XMLCh* outPtr = toFill;
         startTarget = orgTarget;
         for (unsigned int index = 0; index < charsDecoded; index++)
             *outPtr++ = XMLCh(*startTarget++);

         // And delete the temp buffer
         getMemoryManager()->deallocate(orgTarget);//delete [] orgTarget;
     }

     // Return the chars we put into the target buffer
     return charsDecoded;
 }


 unsigned int
 ICUTranscoder::transcodeTo( const   XMLCh* const    srcData
                             , const unsigned int    srcCount
                             ,       XMLByte* const  toFill
                             , const unsigned int    maxBytes
                             ,       unsigned int&   charsEaten
                             , const UnRepOpts       options)
 {
     //
     //  Get a pointer to the buffer to transcode. If UChar and XMLCh are
     //  the same size here, then use the original. Else, create a temp
     //  one and put a janitor on it.
     //
     const UChar* srcPtr;
     UChar* tmpBufPtr = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         srcPtr = (const UChar*)srcData;
     }
     else
     {
         tmpBufPtr = convertToUChar(srcData, srcCount, getMemoryManager());
         srcPtr = tmpBufPtr;
     }
     ArrayJanitor<UChar> janTmpBuf(tmpBufPtr, getMemoryManager());

     //
     //  Set the appropriate callback so that it will either fail or use
     //  the rep char. Remember the old one so we can put it back.
     //
     UErrorCode  err = U_ZERO_ERROR;
     UConverterFromUCallback oldCB = NULL;
     #if (U_ICU_VERSION_MAJOR_NUM < 2)
     void* orgContent;
     #else
     const void* orgContent;
     #endif
     ucnv_setFromUCallBack
     (
         fConverter
         , (options == UnRep_Throw) ? UCNV_FROM_U_CALLBACK_STOP
                                    : UCNV_FROM_U_CALLBACK_SUBSTITUTE
         , NULL
         , &oldCB
         , &orgContent
         , &err
     );

     //
     //  Ok, lets transcode as many chars as we we can in one shot. The
     //  ICU API gives enough info not to have to do this one char by char.
     //
     XMLByte*        startTarget = toFill;
     const UChar*    startSrc = srcPtr;
     err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
         , (char**)&startTarget
         , (char*)(startTarget + maxBytes)
         , &startSrc
         , srcPtr + srcCount
         , 0
         , false
         , &err
     );

     // Rememember the status before we possibly overite the error code
     const bool res = (err == U_ZERO_ERROR);

     // Put the old handler back
     err = U_ZERO_ERROR;
     UConverterFromUCallback orgAction = NULL;

     ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

     if (!res)
     {
         XMLCh tmpBuf[17];
         XMLString::binToText((unsigned int)*startSrc, tmpBuf, 16, 16, getMemoryManager());
         ThrowXMLwithMemMgr2
         (
             TranscodingException
             , XMLExcepts::Trans_Unrepresentable
             , tmpBuf
             , getEncodingName()
             , getMemoryManager()
         );
     }

     // Fill in the chars we ate from the input
     charsEaten = startSrc - srcPtr;

     // Return the chars we stored
     return startTarget - toFill;
 }


 bool ICUTranscoder::canTranscodeTo(const unsigned int toCheck) const
 {
     //
     //  If the passed value is really a surrogate embedded together, then
     //  we need to break it out into its two chars. Else just one. While
     //  we are ate it, convert them to UChar format if required.
     //
     UChar           srcBuf[2];
     unsigned int    srcCount = 1;
     if (toCheck & 0xFFFF0000)
     {
         srcBuf[0] = UChar((toCheck >> 10) + 0xD800);
         srcBuf[1] = UChar(toCheck & 0x3FF) + 0xDC00;
         srcCount++;
     }
      else
     {
         srcBuf[0] = UChar(toCheck);
     }

     //
     //  Set the callback so that it will fail instead of using the rep char.
     //  Remember the old one so we can put it back.
     //
      UErrorCode  err = U_ZERO_ERROR;
      UConverterFromUCallback oldCB = NULL;
      #if (U_ICU_VERSION_MAJOR_NUM < 2)
      void* orgContent;
      #else
      const void* orgContent;
      #endif

      ucnv_setFromUCallBack
          (
          fConverter
          , UCNV_FROM_U_CALLBACK_STOP
          , NULL
          , &oldCB
          , &orgContent
          , &err
          );

     // Set upa temp buffer to format into. Make it more than big enough
     char            tmpBuf[64];
     char*           startTarget = tmpBuf;
     const UChar*    startSrc = srcBuf;

     err = U_ZERO_ERROR;
     ucnv_fromUnicode
     (
         fConverter
         , &startTarget
         , startTarget + 64
         , &startSrc
         , srcBuf + srcCount
         , 0
         , false
         , &err
     );

     // Save the result before we overight the error code
     const bool res = (err == U_ZERO_ERROR);

     // Put the old handler back
     err = U_ZERO_ERROR;
     UConverterFromUCallback orgAction = NULL;

     ucnv_setFromUCallBack(fConverter, oldCB, NULL, &orgAction, &orgContent, &err);

     return res;
 }


 // ---------------------------------------------------------------------------
 //  ICULCPTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 ICULCPTranscoder::ICULCPTranscoder(UConverter* const toAdopt) :

     fConverter(toAdopt)
 {
 }

 ICULCPTranscoder::~ICULCPTranscoder()
 {
     // If there is a converter, ask ICU to clean it up
     if (fConverter)
     {
         // <TBD> Does this actually delete the structure???
         ucnv_close(fConverter);
         fConverter = 0;
     }
 }


 // ---------------------------------------------------------------------------
 //  ICULCPTranscoder: Constructors and Destructor
 // ---------------------------------------------------------------------------
 unsigned int ICULCPTranscoder::calcRequiredSize(const XMLCh* const srcText
                                                 , MemoryManager* const manager)
 {
     if (!srcText)
         return 0;

     //
     //  We do two different versions of this, according to whether XMLCh
     //  is the same size as UChar or not.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         // Use a faux scope to synchronize while we do this
         {
             XMLMutexLock lockConverter(&fMutex);

             targetCap = ucnv_fromUChars
             (
                 fConverter
                 , 0
                 , 0
                 , (const UChar*)srcText
                 , -1
                 , &err
             );
         }
     }
     else
     {
         // Copy the source to a local temp
         UChar* tmpBuf = convertToUChar(srcText, 0, manager);
         ArrayJanitor<UChar> janTmp(tmpBuf, manager);

         // Use a faux scope to synchronize while we do this
         {
             XMLMutexLock lockConverter(&fMutex);

             targetCap = ucnv_fromUChars
             (
                 fConverter
                 , 0
                 , 0
                 , tmpBuf
                 , -1
                 , &err
             );
         }
     }

     if (err != U_BUFFER_OVERFLOW_ERROR)
         return 0;

     return (unsigned int)targetCap;
 }

 unsigned int ICULCPTranscoder::calcRequiredSize(const char* const srcText
                                                 , MemoryManager* const manager)
 {
     if (!srcText)
         return 0;

     int32_t targetCap;
     UErrorCode err = U_ZERO_ERROR;

     // Use a faux scope to synchronize while we do this
     {
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_toUChars
         (
             fConverter
             , 0
             , 0
             , srcText
             , strlen(srcText)
             , &err
         );
     }

     if (err != U_BUFFER_OVERFLOW_ERROR)
         return 0;

 #if (U_ICU_VERSION_MAJOR_NUM < 2)
     // Subtract one since it includes the terminator space
     return (unsigned int)(targetCap - 1);
 #else
     // Starting ICU 2.0, this is fixed and all ICU String functions have consistent NUL-termination behavior.
     // The returned length is always the number of output UChar's, not counting an additional, terminating NUL.
     return (unsigned int)(targetCap);
 #endif
 }


 char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode)
 {
     char* retBuf = 0;

     // Check for a couple of special cases
     if (!toTranscode)
         return retBuf;

     if (!*toTranscode)
     {
         retBuf = new char[1];
         retBuf[0] = 0;
         return retBuf;
     }

     //
     //  Get the length of the source string since we'll have to use it in
     //  a couple places below.
     //
     const unsigned int srcLen = XMLString::stringLen(toTranscode);

     //
     //  If XMLCh and UChar are not the same size, then we have to make a
     //  temp copy of the text to pass to ICU.
     //
     const UChar* actualSrc;
     UChar* ncActual = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualSrc = (const UChar*)toTranscode;
     }
      else
     {
         // Allocate a non-const temp buf, but store it also in the actual
         ncActual = convertToUChar(toTranscode, 0, XMLPlatformUtils::fgMemoryManager);
         actualSrc = ncActual;
     }

     // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
     ArrayJanitor<UChar> janTmp(ncActual, XMLPlatformUtils::fgMemoryManager);

     // Caculate a return buffer size not too big, but less likely to overflow
     int32_t targetLen = (int32_t)(srcLen * 1.25);

     // Allocate the return buffer
     retBuf = new char[targetLen + 1];

     //
     //  Lock now while we call the converter. Use a faux block to do the
     //  lock so that it unlocks immediately afterwards.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetLen + 1
             , actualSrc
             , -1
             , &err
         );
     }

     // If targetLen is not enough then buffer overflow might occur
     if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))
     {
         //
         //  Reset the error, delete the old buffer, allocate a new one,
         //  and try again.
         //
         err = U_ZERO_ERROR;
         delete [] retBuf;
         retBuf = new char[targetCap + 1];

         // Lock again before we retry
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetCap + 1
             , actualSrc
             , -1
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         delete [] retBuf;
         return 0;
     }

     return retBuf;
 }

 char* ICULCPTranscoder::transcode(const XMLCh* const toTranscode,
                                   MemoryManager* const manager)
 {
     char* retBuf = 0;

     // Check for a couple of special cases
     if (!toTranscode)
         return retBuf;

     if (!*toTranscode)
     {
         retBuf = (char*) manager->allocate(sizeof(char));//new char[1];
         retBuf[0] = 0;
         return retBuf;
     }

     //
     //  Get the length of the source string since we'll have to use it in
     //  a couple places below.
     //
     const unsigned int srcLen = XMLString::stringLen(toTranscode);

     //
     //  If XMLCh and UChar are not the same size, then we have to make a
     //  temp copy of the text to pass to ICU.
     //
     const UChar* actualSrc;
     UChar* ncActual = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualSrc = (const UChar*)toTranscode;
     }
      else
     {
         // Allocate a non-const temp buf, but store it also in the actual
         ncActual = convertToUChar(toTranscode, 0, manager);
         actualSrc = ncActual;
     }

     // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
     ArrayJanitor<UChar> janTmp(ncActual, manager);

     // Caculate a return buffer size not too big, but less likely to overflow
     int32_t targetLen = (int32_t)(srcLen * 1.25);

     // Allocate the return buffer
     retBuf = (char*) manager->allocate((targetLen + 1) * sizeof(char));//new char[targetLen + 1];

     //
     //  Lock now while we call the converter. Use a faux block to do the
     //  lock so that it unlocks immediately afterwards.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetLen + 1
             , actualSrc
             , -1
             , &err
         );
     }

     // If targetLen is not enough then buffer overflow might occur
     if ((err == U_BUFFER_OVERFLOW_ERROR) || (err == U_STRING_NOT_TERMINATED_WARNING))
     {
         //
         //  Reset the error, delete the old buffer, allocate a new one,
         //  and try again.
         //
         err = U_ZERO_ERROR;
         manager->deallocate(retBuf);//delete [] retBuf;
         retBuf = (char*) manager->allocate((targetCap + 1) * sizeof(char));//new char[targetCap + 1];

         // Lock again before we retry
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_fromUChars
         (
             fConverter
             , retBuf
             , targetCap + 1
             , actualSrc
             , -1
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         manager->deallocate(retBuf);//delete [] retBuf;
         return 0;
     }

     return retBuf;
 }

 XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode)
 {
     // Watch for a few pyscho corner cases
     if (!toTranscode)
         return 0;

     if (!*toTranscode)
     {
         XMLCh* retVal = new XMLCh[1];
         retVal[0] = 0;
         return retVal;
     }

     //
     //  Get the length of the string to transcode. The Unicode string will
     //  almost always be no more chars than were in the source, so this is
     //  the best guess as to the storage needed.
     //
     const int32_t srcLen = (int32_t)strlen(toTranscode);

     // We need a target buffer of UChars to fill in
     UChar* targetBuf = 0;

     // Now lock while we do these calculations
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         //
         //  Here we don't know what the target length will be so use 0 and
         //  expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
         //  by the correct capacity value.
         //
         targetCap = ucnv_toUChars
         (
             fConverter
             , 0
             , 0
             , toTranscode
             , srcLen
             , &err
         );

         if (err != U_BUFFER_OVERFLOW_ERROR)
             return 0;

         err = U_ZERO_ERROR;
         targetBuf = new UChar[targetCap + 1];
         ucnv_toUChars
         (
             fConverter
             , targetBuf
             , targetCap + 1
             , toTranscode
             , srcLen
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         // Clean up if we got anything allocated
         delete [] targetBuf;
         return 0;
     }

     // Cap it off to make sure
     targetBuf[targetCap] = 0;

     //
     //  If XMLCh and UChar are the same size, then we can return retVal
     //  as is. Else, we have to allocate another buffer and copy the data
     //  over to it.
     //
     XMLCh* actualRet;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualRet = (XMLCh*)targetBuf;
     }
      else
     {
         actualRet = convertToXMLCh(targetBuf);
         delete [] targetBuf;
     }
     return actualRet;
 }

 XMLCh* ICULCPTranscoder::transcode(const char* const toTranscode,
                                    MemoryManager* const manager)
 {
     // Watch for a few pyscho corner cases
     if (!toTranscode)
         return 0;

     if (!*toTranscode)
     {
         XMLCh* retVal = (XMLCh*) manager->allocate(sizeof(XMLCh));//new XMLCh[1];
         retVal[0] = 0;
         return retVal;
     }

     //
     //  Get the length of the string to transcode. The Unicode string will
     //  almost always be no more chars than were in the source, so this is
     //  the best guess as to the storage needed.
     //
     const int32_t srcLen = (int32_t)strlen(toTranscode);

     // We need a target buffer of UChars to fill in
     UChar* targetBuf = 0;

     // Now lock while we do these calculations
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);

         //
         //  Here we don't know what the target length will be so use 0 and
         //  expect an U_BUFFER_OVERFLOW_ERROR in which case it'd get resolved
         //  by the correct capacity value.
         //
         targetCap = ucnv_toUChars
         (
             fConverter
             , 0
             , 0
             , toTranscode
             , srcLen
             , &err
         );

         if (err != U_BUFFER_OVERFLOW_ERROR)
             return 0;

         err = U_ZERO_ERROR;
         targetBuf = (UChar*) manager->allocate((targetCap+1) * sizeof(UChar));//new UChar[targetCap + 1];
         ucnv_toUChars
         (
             fConverter
             , targetBuf
             , targetCap + 1
             , toTranscode
             , srcLen
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         // Clean up if we got anything allocated
         manager->deallocate(targetBuf);//delete [] targetBuf;
         return 0;
     }

     // Cap it off to make sure
     targetBuf[targetCap] = 0;

     //
     //  If XMLCh and UChar are the same size, then we can return retVal
     //  as is. Else, we have to allocate another buffer and copy the data
     //  over to it.
     //
     XMLCh* actualRet;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualRet = (XMLCh*)targetBuf;
     }
      else
     {
         actualRet = convertToXMLCh(targetBuf, manager);
         manager->deallocate(targetBuf);//delete [] targetBuf;
     }
     return actualRet;
 }


 bool ICULCPTranscoder::transcode(const  char* const     toTranscode
                                 ,       XMLCh* const    toFill
                                 , const unsigned int    maxChars
                                 , MemoryManager* const  manager)
 {
     // Check for a couple of psycho corner cases
     if (!toTranscode || !maxChars)
     {
         toFill[0] = 0;
         return true;
     }

     if (!*toTranscode)
     {
         toFill[0] = 0;
         return true;
     }

     // We'll need this in a couple of places below
     const unsigned int srcLen = strlen(toTranscode);

     //
     //  Set up the target buffer. If XMLCh and UChar are not the same size
     //  then we have to use a temp buffer and convert over.
     //
     UChar* targetBuf;
     if (sizeof(XMLCh) == sizeof(UChar))
         targetBuf = (UChar*)toFill;
     else
         targetBuf = (UChar*) manager->allocate
         (
             (maxChars + 1) * sizeof(UChar)
         );//new UChar[maxChars + 1];

     //
     //  Use a faux block to enforce a lock on the converter, which will
     //  unlock immediately after its completed.
     //
     UErrorCode err = U_ZERO_ERROR;
     {
         XMLMutexLock lockConverter(&fMutex);
         ucnv_toUChars
         (
             fConverter
             , targetBuf
             , maxChars + 1
             , toTranscode
             , srcLen
             , &err
         );
     }

     if (U_FAILURE(err))
     {
         if (targetBuf != (UChar*)toFill)
             manager->deallocate(targetBuf);//delete [] targetBuf;
         return false;
     }

     // If the sizes are not the same, then copy the data over
     if (sizeof(XMLCh) != sizeof(UChar))
     {
         UChar* srcPtr = targetBuf;
         XMLCh* outPtr = toFill;
         while (*srcPtr)
             *outPtr++ = XMLCh(*srcPtr++);
         *outPtr = 0;

         // And delete the temp buffer
         manager->deallocate(targetBuf);//delete [] targetBuf;
     }

     return true;
 }


 bool ICULCPTranscoder::transcode(   const   XMLCh* const    toTranscode
                                     ,       char* const     toFill
                                     , const unsigned int    maxChars
                                     , MemoryManager* const  manager)
 {
     // Watch for a few psycho corner cases
     if (!toTranscode || !maxChars)
     {
         toFill[0] = 0;
         return true;
     }

     if (!*toTranscode)
     {
         toFill[0] = 0;
         return true;
     }

     //
     //  If XMLCh and UChar are not the same size, then we have to make a
     //  temp copy of the text to pass to ICU.
     //
     const UChar* actualSrc;
     UChar* ncActual = 0;
     if (sizeof(XMLCh) == sizeof(UChar))
     {
         actualSrc = (const UChar*)toTranscode;
     }
      else
     {
         // Allocate a non-const temp buf, but store it also in the actual
         ncActual = convertToUChar(toTranscode, 0, manager);
         actualSrc = ncActual;
     }

     // Insure that the temp buffer, if any, gets cleaned up via the nc pointer
     ArrayJanitor<UChar> janTmp(ncActual, manager);

     //
     //  Use a faux block to enforce a lock on the converter while we do this.
     //  It will be released immediately after its done.
     //
     UErrorCode err = U_ZERO_ERROR;
     int32_t targetCap;
     {
         XMLMutexLock lockConverter(&fMutex);
         targetCap = ucnv_fromUChars
         (
             fConverter
             , toFill
             , maxChars
             , actualSrc
             , -1
             , &err
         );
     }

     if (U_FAILURE(err))
         return false;

     toFill[targetCap] = 0;
     return true;
 }

 XERCES_CPP_NAMESPACE_END