blob: 5dd530d7af69d6e7fa6b7325300bb48c7fb6ac3e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: MacOSUnicodeConverter.cpp 568078 2007-08-21 11:43:25Z amassari $
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XercesDefs.hpp>
#include <algorithm>
#include <cstddef>
#include <cstring>
#if defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
// Only used under metrowerks.
#include <cwctype>
#endif
#if defined(__APPLE__)
// Framework includes from ProjectBuilder
#include <CoreServices/CoreServices.h>
#else
// Classic includes otherwise
#include <MacErrors.h>
#include <Script.h>
#include <TextUtils.h>
#include <TextEncodingConverter.h>
#include <TextCommon.h>
#include <CodeFragments.h>
#include <UnicodeConverter.h>
#include <UnicodeUtilities.h>
#include <CFCharacterSet.h>
#include <CFString.h>
#endif
#include <xercesc/util/Transcoders/MacOSUnicodeConverter/MacOSUnicodeConverter.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLUni.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/TranscodingException.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/Platforms/MacOS/MacOSPlatformUtils.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Typedefs
// ---------------------------------------------------------------------------
// TempBufs are used for cases where we need a temporary buffer while processing.
const std::size_t kTempBufCount = 512;
typedef char TempCharBuf[kTempBufCount];
typedef UniChar TempUniBuf[kTempBufCount];
typedef XMLCh TempXMLBuf[kTempBufCount];
// ---------------------------------------------------------------------------
// Local, const data
// ---------------------------------------------------------------------------
const XMLCh MacOSUnicodeConverter::fgMyServiceId[] =
{
chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chNull
};
const XMLCh MacOSUnicodeConverter::fgMacLCPEncodingName[] =
{
chLatin_M, chLatin_a, chLatin_c, chLatin_O, chLatin_S, chLatin_L
, chLatin_C, chLatin_P, chLatin_E, chLatin_n, chLatin_c, chLatin_o
, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chNull
};
// ---------------------------------------------------------------------------
// MacOSUnicodeConverter: Constructors and Destructor
// ---------------------------------------------------------------------------
MacOSUnicodeConverter::MacOSUnicodeConverter()
: fCollator(NULL)
{
// Test for presense of unicode collation functions
fHasUnicodeCollation = (UCCompareText != (void*)kUnresolvedCFragSymbolAddress);
// Create a unicode collator for doing string comparisons
if (fHasUnicodeCollation)
{
// Configure collation options
UCCollateOptions collateOptions =
kUCCollateComposeInsensitiveMask
| kUCCollateWidthInsensitiveMask
| kUCCollateCaseInsensitiveMask
| kUCCollatePunctuationSignificantMask
;
OSStatus status = UCCreateCollator(NULL, 0, collateOptions, &fCollator);
}
}
MacOSUnicodeConverter::~MacOSUnicodeConverter()
{
// Dispose our collator
if (fCollator != NULL)
UCDisposeCollator(&fCollator);
}
// ---------------------------------------------------------------------------
// MacOSUnicodeConverter: The virtual transcoding service API
// ---------------------------------------------------------------------------
int MacOSUnicodeConverter::compareIString( const XMLCh* const comp1
, const XMLCh* const comp2)
{
// If unicode collation routines are available, use them.
// This should be the case on Mac OS 8.6 and later,
// with Carbon 1.0.2 or later, and under Mac OS X.
//
// Otherwise, but only for Metrowerks, since only Metrowerks
// has a c library with a valid set of wchar routines,
// fall back to the standard library.
if (fHasUnicodeCollation && fCollator != NULL)
{
std::size_t cnt1 = XMLString::stringLen(comp1);
std::size_t cnt2 = XMLString::stringLen(comp2);
Boolean equivalent = false;
SInt32 order = 0;
OSStatus status = UCCompareText(
fCollator,
reinterpret_cast<const UniChar*>(comp1),
cnt1,
reinterpret_cast<const UniChar*>(comp2),
cnt2,
&equivalent,
&order
);
return ((status != noErr) || equivalent) ? 0 : order;
}
#if defined(XML_METROWERKS)
else
{
const XMLCh* cptr1 = comp1;
const XMLCh* cptr2 = comp2;
while ( (*cptr1 != 0) && (*cptr2 != 0) )
{
std::wint_t wch1 = std::towupper(*cptr1);
std::wint_t wch2 = std::towupper(*cptr2);
if (wch1 != wch2)
break;
cptr1++;
cptr2++;
}
return (int) (std::towupper(*cptr1) - std::towupper(*cptr2));
}
#else
else
{
// For some reason there is no platform utils available
// where we expect it. Bail.
XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
return 0;
}
#endif
}
int MacOSUnicodeConverter::compareNIString( const XMLCh* const comp1
, const XMLCh* const comp2
, const unsigned int maxChars)
{
// If unicode collation routines are available, use them.
// This should be the case on Mac OS 8.6 and later,
// with Carbon 1.0.2 or later, and under Mac OS X.
//
// Otherwise, but only for Metrowerks, since only Metrowerks
// has a c library with a valid set of wchar routines,
// fall back to the standard library.
if (fHasUnicodeCollation && fCollator != NULL)
{
std::size_t cnt1 = XMLString::stringLen(comp1);
std::size_t cnt2 = XMLString::stringLen(comp2);
// Restrict view of source characters to first {maxChars}
if (cnt1 > maxChars)
cnt1 = maxChars;
if (cnt2 > maxChars)
cnt2 = maxChars;
Boolean equivalent = false;
SInt32 order = 0;
OSStatus status = UCCompareText(
fCollator,
reinterpret_cast<const UniChar*>(comp1),
cnt1,
reinterpret_cast<const UniChar*>(comp2),
cnt2,
&equivalent,
&order
);
return ((status != noErr) || equivalent) ? 0 : order;
}
#if defined(XML_METROWERKS)
else
{
unsigned int n = 0;
const XMLCh* cptr1 = comp1;
const XMLCh* cptr2 = comp2;
while ( (*cptr1 != 0) && (*cptr2 != 0) && (n < maxChars) )
{
std::wint_t wch1 = std::towupper(*cptr1);
std::wint_t wch2 = std::towupper(*cptr2);
if (wch1 != wch2)
break;
cptr1++;
cptr2++;
n++;
}
return (int)(std::towupper(*cptr1) - std::towupper(*cptr2));
}
#else
else
{
// For some reason there is no platform utils available
// where we expect it. Bail.
XMLPlatformUtils::panic(PanicHandler::Panic_NoTransService);
return 0;
}
#endif
}
const XMLCh* MacOSUnicodeConverter::getId() const
{
return fgMyServiceId;
}
bool MacOSUnicodeConverter::isSpace(const XMLCh toCheck) const
{
#if TARGET_API_MAC_CARBON
// Return true if the specified character is in the set.
CFCharacterSetRef wsSet = CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline);
return CFCharacterSetIsCharacterMember(wsSet, toCheck);
#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
// Use this if there's a reasonable c library available.
// ProjectBuilder currently has no support for iswspace ;(
return (std::iswspace(toCheck) != 0);
#endif
}
TextEncoding
MacOSUnicodeConverter::discoverLCPEncoding()
{
TextEncoding encoding = 0;
// Ask the OS for the best text encoding for this application
// We would call GetApplicationTextEncoding(), but it's available only in
// Carbon (not CarbonCore), and we try to link with frameworks only in CoreServices.
// encoding = GetApplicationTextEncoding();
// Get TextEncoding for the current Mac System Script, falling back to Mac Roman
if (noErr != UpgradeScriptInfoToTextEncoding(
smSystemScript, kTextLanguageDontCare, kTextRegionDontCare,
NULL, &encoding))
encoding = CreateTextEncoding(kTextEncodingMacRoman,
kTextEncodingDefaultVariant,
kTextEncodingDefaultFormat);
// Traditionally, the Mac transcoder has used the current system script
// as the LCP text encoding.
//
// As of Xerces 2.6, this continues to be the case if XML_MACOS_LCP_TRADITIONAL
// is defined.
//
// Otherwise, but only for Mac OS X, utf-8 will be used instead.
// Since posix paths are utf-8 encoding on OS X, and the OS X
// terminal uses utf-8 by default, this seems to make the most sense.
#if !defined(XML_MACOS_LCP_TRADITIONAL)
if (gMacOSXOrBetter)
{
// Manufacture a text encoding for UTF8
encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
kTextEncodingDefaultVariant,
kUnicodeUTF8Format);
}
#endif
return encoding;
}
XMLLCPTranscoder* MacOSUnicodeConverter::makeNewLCPTranscoder()
{
XMLLCPTranscoder* result = NULL;
OSStatus status = noErr;
MemoryManager* manager = XMLPlatformUtils::fgMemoryManager;
// Discover the text encoding to use for the LCP
TextEncoding lcpTextEncoding = discoverLCPEncoding();
// We implement the LCP transcoder in terms of the XMLTranscoder.
// Create an XMLTranscoder for this encoding
XMLTransService::Codes resValue;
XMLTranscoder* xmlTrans = makeNewXMLTranscoder(fgMacLCPEncodingName,
resValue, kTempBufCount,
lcpTextEncoding, manager);
if (xmlTrans)
{
// Pass the XMLTranscoder over to the LPC transcoder
if (resValue == XMLTransService::Ok)
result = new MacOSLCPTranscoder(xmlTrans, manager);
else
delete xmlTrans;
}
return result;
}
bool MacOSUnicodeConverter::supportsSrcOfs() const
{
// For now, we don't support source offsets
return false;
}
void MacOSUnicodeConverter::upperCase(XMLCh* const toUpperCase) const
{
#if TARGET_API_MAC_CARBON
// If we're targeting carbon, use the CFString conversion to uppercase
int len = XMLString::stringLen(toUpperCase);
CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
kCFAllocatorDefault,
(UniChar*)toUpperCase,
len, // length
len, // capacity
kCFAllocatorNull);
CFStringUppercase(cfString, NULL);
CFRelease(cfString);
#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
// Use this if there's a reasonable c library available.
// Metrowerks does this reasonably
wchar_t c;
for (XMLCh* p = (XMLCh*)toUpperCase; ((c = *p) != 0); )
*p++ = std::towupper(c);
#else
#error Sorry, no support for upperCase
#endif
}
void MacOSUnicodeConverter::lowerCase(XMLCh* const toLowerCase) const
{
#if TARGET_API_MAC_CARBON
// If we're targeting carbon, use the CFString conversion to uppercase
int len = XMLString::stringLen(toLowerCase);
CFMutableStringRef cfString = CFStringCreateMutableWithExternalCharactersNoCopy(
kCFAllocatorDefault,
(UniChar*)toLowerCase,
len, // length
len, // capacity
kCFAllocatorNull);
CFStringLowercase(cfString, NULL);
CFRelease(cfString);
#elif defined(XML_METROWERKS) || (__GNUC__ >= 3 && _GLIBCPP_USE_WCHAR_T)
// Use this if there's a reasonable c library available.
// Metrowerks does this reasonably
wchar_t c;
for (XMLCh* p = (XMLCh*)toLowerCase; ((c = *p) != 0); )
*p++ = std::towlower(c);
#else
#error Sorry, no support for lowerCase
#endif
}
void
MacOSUnicodeConverter::ConvertWideToNarrow(const XMLCh* wide, char* narrow, std::size_t maxChars)
{
while (maxChars-- > 0)
if ((*narrow++ = *wide++) == 0)
break;
}
// ---------------------------------------------------------------------------
// MacOSTransService: The protected virtual transcoding service API
// ---------------------------------------------------------------------------
XMLTranscoder*
MacOSUnicodeConverter::makeNewXMLTranscoder(const XMLCh* const encodingName
, XMLTransService::Codes& resValue
, const unsigned int blockSize
, MemoryManager* const manager)
{
XMLTranscoder* result = NULL;
resValue = XMLTransService::Ok;
TextToUnicodeInfo textToUnicodeInfo = NULL;
UnicodeToTextInfo unicodeToTextInfo = NULL;
// Map the encoding to a Mac OS Encoding value
Str255 pasEncodingName;
char cEncodingName[256];
ConvertWideToNarrow(encodingName, cEncodingName, sizeof(cEncodingName));
CopyCStringToPascal(cEncodingName, pasEncodingName);
TextEncoding textEncoding = 0;
OSStatus status = TECGetTextEncodingFromInternetName (
&textEncoding,
pasEncodingName);
// Make a transcoder for that encoding
if (status == noErr)
result = makeNewXMLTranscoder(encodingName, resValue, blockSize, textEncoding, manager);
else
resValue = XMLTransService::UnsupportedEncoding;
return result;
}
XMLTranscoder*
MacOSUnicodeConverter::makeNewXMLTranscoder(const XMLCh* const encodingName
, XMLTransService::Codes& resValue
, const unsigned int blockSize
, TextEncoding textEncoding
, MemoryManager* const manager)
{
XMLTranscoder* result = NULL;
resValue = XMLTransService::Ok;
OSStatus status = noErr;
TECObjectRef textToUnicode = NULL;
TECObjectRef unicodeToText = NULL;
// We convert to and from utf16
TextEncoding utf16Encoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
kTextEncodingDefaultVariant,
kUnicode16BitFormat);
// Create a TEC from our encoding to utf16
if (status == noErr)
status = TECCreateConverter(&textToUnicode, textEncoding, utf16Encoding);
// Create a TEC from utf16 to our encoding
if (status == noErr)
status = TECCreateConverter(&unicodeToText, utf16Encoding, textEncoding);
if (status != noErr)
{
// Clean up on error
if (textToUnicode != NULL)
TECDisposeConverter(textToUnicode);
if (unicodeToText != NULL)
TECDisposeConverter(unicodeToText);
resValue = XMLTransService::UnsupportedEncoding;
}
else
{
// Create our transcoder, passing in the converters
result = new (manager) MacOSTranscoder(encodingName, textToUnicode, unicodeToText, blockSize, manager);
}
return result;
}
// ---------------------------------------------------------------------------
// IsMacOSUnicodeConverterSupported
// ---------------------------------------------------------------------------
bool
MacOSUnicodeConverter::IsMacOSUnicodeConverterSupported(void)
{
return UpgradeScriptInfoToTextEncoding != (void*)kUnresolvedCFragSymbolAddress
&& CreateTextToUnicodeInfoByEncoding != (void*)kUnresolvedCFragSymbolAddress
;
}
// ---------------------------------------------------------------------------
// MacOSTranscoder: Constructors and Destructor
// ---------------------------------------------------------------------------
MacOSTranscoder::MacOSTranscoder(const XMLCh* const encodingName
, TECObjectRef textToUnicode
, TECObjectRef unicodeToText
, const unsigned int blockSize
, MemoryManager* const manager) :
XMLTranscoder(encodingName, blockSize, manager),
mTextToUnicode(textToUnicode),
mUnicodeToText(unicodeToText)
{
}
MacOSTranscoder::~MacOSTranscoder()
{
// Dispose our text encoding converters
TECDisposeConverter(mTextToUnicode);
TECDisposeConverter(mUnicodeToText);
}
// ---------------------------------------------------------------------------
// MacOSTranscoder: The virtual transcoder API
// ---------------------------------------------------------------------------
unsigned int
MacOSTranscoder::transcodeFrom( const XMLByte* const srcData
, const unsigned int srcCount
, XMLCh* const toFill
, const unsigned int maxChars
, unsigned int& bytesEaten
, unsigned char* const charSizes)
{
// Reset the tec state (since we don't know that we're part of a
// larger run of text).
TECClearConverterContextInfo(mTextToUnicode);
// Do the conversion
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
OSStatus status = TECConvertText(mTextToUnicode,
(ConstTextPtr) srcData,
srcCount, // inputBufferLength
&bytesConsumed, // actualInputLength
(TextPtr) toFill, // outputBuffer
maxChars * sizeof(XMLCh), // outputBufferLength
&bytesProduced); // actualOutputLength
// Ignorable error codes
if( status == kTECUsedFallbacksStatus
|| status == kTECOutputBufferFullStatus
|| status == kTECPartialCharErr
)
status = noErr;
if (status != noErr)
ThrowXML(TranscodingException, XMLExcepts::Trans_BadSrcSeq);
std::size_t charsProduced = bytesProduced / sizeof(XMLCh);
bytesEaten = bytesConsumed;
return charsProduced;
}
unsigned int
MacOSTranscoder::transcodeTo(const XMLCh* const srcData
, const unsigned int srcCount
, XMLByte* const toFill
, const unsigned int maxBytes
, unsigned int& charsEaten
, const UnRepOpts options)
{
// Reset the tec state (since we don't know that we're part of a
// larger run of text).
TECClearConverterContextInfo(mUnicodeToText);
// Do the conversion
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
OSStatus status = TECConvertText(mUnicodeToText,
(ConstTextPtr) srcData,
srcCount * sizeof(XMLCh), // inputBufferLength
&bytesConsumed, // actualInputLength
(TextPtr) toFill, // outputBuffer
maxBytes, // outputBufferLength
&bytesProduced); // actualOutputLength
// Ignorable error codes
if( status == kTECUsedFallbacksStatus
|| status == kTECOutputBufferFullStatus
|| status == kTECPartialCharErr
)
status = noErr;
std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh);
// Deal with errors
if (status != noErr)
{
if (status == kTECUnmappableElementErr && options == UnRep_Throw)
{
XMLCh tmpBuf[17];
XMLString::binToText((unsigned int)&srcData[charsConsumed], tmpBuf, 16, 16);
ThrowXML2
(
TranscodingException
, XMLExcepts::Trans_Unrepresentable
, tmpBuf
, getEncodingName()
);
}
}
charsEaten = charsConsumed;
return bytesProduced;
}
bool
MacOSTranscoder::canTranscodeTo(const unsigned int toCheck) const
{
//
// If the passed value is really a surrogate embedded together, then
// we need to break it out into its two chars. Else just one.
//
unsigned int srcCnt = 0;
UniChar srcBuf[2];
if (toCheck & 0xFFFF0000)
{
srcBuf[srcCnt++] = XMLCh(toCheck >> 10) + 0xD800;
srcBuf[srcCnt++] = XMLCh(toCheck & 0x3FF) + 0xDC00;
}
else
{
srcBuf[srcCnt++] = XMLCh(toCheck);
}
// Clear the converter state: we're in a new run of text
TECClearConverterContextInfo(mUnicodeToText);
//
// Use a local temp buffer that would hold any sane multi-byte char
// sequence and try to transcode this guy into it.
//
char tmpBuf[64];
ByteCount bytesConsumed = 0;
ByteCount bytesProduced = 0;
OSStatus status = TECConvertText(mUnicodeToText,
(ConstTextPtr) srcBuf,
srcCnt * sizeof(XMLCh), // inputBufferLength
&bytesConsumed, // actualInputLength
(TextPtr) tmpBuf, // outputBuffer
sizeof(tmpBuf), // outputBufferLength
&bytesProduced); // actualOutputLength
std::size_t charsConsumed = bytesConsumed / sizeof(XMLCh);
// Return true if we transcoded the character(s)
// successfully
return status == noErr && charsConsumed == srcCnt;
}
// ---------------------------------------------------------------------------
// MacOSLCPTranscoder: Constructors and Destructor
// ---------------------------------------------------------------------------
MacOSLCPTranscoder::MacOSLCPTranscoder(XMLTranscoder* const transcoder, MemoryManager* const manager)
: mTranscoder(transcoder),
mManager(manager)
{
}
MacOSLCPTranscoder::~MacOSLCPTranscoder()
{
// Dispose the XMLTranscoder we're using
delete mTranscoder;
}
// ---------------------------------------------------------------------------
// MacOSLCPTranscoder: Implementation of the virtual transcoder interface
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// In order to implement calcRequiredSize we have to go ahead and do the
// conversion, which seems quite painful. The Mac Unicode converter has
// no way of saying "don't actually do the conversion." So we end up
// converting twice. It would be nice if the calling code could do some
// extra buffering to avoid this result.
// ---------------------------------------------------------------------------
unsigned int
MacOSLCPTranscoder::calcRequiredSize(const char* const srcText
, MemoryManager* const manager)
{
if (!srcText)
return 0;
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
std::size_t totalCharsProduced = 0;
const char* src = srcText;
unsigned int srcCnt = std::strlen(src);
// Iterate over the characters, converting into a temporary buffer which we'll discard.
// All this to get the size required.
while (srcCnt > 0)
{
TempXMLBuf tmpBuf;
unsigned int bytesConsumed = 0;
unsigned int charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt,
tmpBuf, kTempBufCount,
bytesConsumed,
NULL);
src += bytesConsumed;
srcCnt -= bytesConsumed;
totalCharsProduced += charsProduced;
// Bail out if nothing more was produced
if (charsProduced == 0)
break;
}
// Return number of XMLCh characters required (not counting terminating NULL!)
return totalCharsProduced;
}
// ---------------------------------------------------------------------------
// In order to implement calcRequiredSize we have to go ahead and do the
// conversion, which seems quite painful. The Mac Unicode converter has
// no way of saying "don't actually do the conversion." So we end up
// converting twice. It would be nice if the calling code could do some
// extra buffering to avoid this result.
// ---------------------------------------------------------------------------
unsigned int
MacOSLCPTranscoder::calcRequiredSize(const XMLCh* const srcText
, MemoryManager* const manager)
{
if (!srcText)
return 0;
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
std::size_t totalBytesProduced = 0;
const XMLCh* src = srcText;
unsigned int srcCnt = XMLString::stringLen(src);
// Iterate over the characters, converting into a temporary buffer which we'll discard.
// All this to get the size required.
while (srcCnt > 0)
{
TempCharBuf tmpBuf;
unsigned int charsConsumed = 0;
unsigned int bytesProduced = mTranscoder->transcodeTo(src, srcCnt,
(XMLByte*)tmpBuf, kTempBufCount,
charsConsumed,
XMLTranscoder::UnRep_RepChar);
src += charsConsumed;
srcCnt -= charsConsumed;
totalBytesProduced += bytesProduced;
// Bail out if nothing more was produced
if (bytesProduced == 0)
break;
}
// Return number of characters required (not counting terminating NULL!)
return totalBytesProduced;
}
char*
MacOSLCPTranscoder::transcode(const XMLCh* const srcText)
{
// Transcode using a memory manager that allocates
// memory using new[].
return transcode(srcText, XMLPlatformUtils::fgArrayMemoryManager);
}
char*
MacOSLCPTranscoder::transcode(const XMLCh* const srcText,
MemoryManager* const manager)
{
if (!srcText)
return NULL;
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
ArrayJanitor<char> result(0);
const XMLCh* src = srcText;
unsigned int srcCnt = XMLString::stringLen(src);
std::size_t resultCnt = 0;
// Iterate over the characters, buffering into a local temporary
// buffer, which we dump into an allocated (and reallocated, as necessary)
// string for return.
while (srcCnt > 0)
{
// Transcode some characters
TempCharBuf tmpBuf;
unsigned int charsConsumed = 0;
unsigned int bytesProduced = mTranscoder->transcodeTo(src, srcCnt,
(XMLByte*)tmpBuf, kTempBufCount,
charsConsumed,
XMLTranscoder::UnRep_RepChar);
src += charsConsumed;
srcCnt -= charsConsumed;
// Move the data to result buffer, reallocating as needed
if (bytesProduced > 0)
{
// Allocate space for result
std::size_t newCnt = resultCnt + bytesProduced;
ArrayJanitor<char> newResult
(
(char*) manager->allocate((newCnt + 1) * sizeof(char)) //new char[newCnt + 1]
, manager
);
if (newResult.get() != NULL)
{
// Incorporate previous result
if (result.get() != NULL)
std::memcpy(newResult.get(), result.get(), resultCnt);
result.reset(newResult.release());
// Copy in new data
std::memcpy(result.get() + resultCnt, tmpBuf, bytesProduced);
resultCnt = newCnt;
// Terminate the result
result[resultCnt] = '\0';
}
}
else
break;
}
if (!result.get())
{
// No error, and no result: we probably processed a zero length
// input, in which case we want a valid zero length output.
result.reset
(
(char*) manager->allocate(sizeof(char))//new char[1]
, manager
);
result[0] = '\0';
}
return result.release();
}
XMLCh*
MacOSLCPTranscoder::transcode(const char* const srcText)
{
// Transcode using a memory manager that allocates
// memory using new[].
return transcode(srcText, XMLPlatformUtils::fgArrayMemoryManager);
}
XMLCh*
MacOSLCPTranscoder::transcode(const char* const srcText,
MemoryManager* const manager)
{
if (!srcText)
return NULL;
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
ArrayJanitor<XMLCh> result(0);
const char* src = srcText;
std::size_t srcCnt = std::strlen(src);
std::size_t resultCnt = 0;
// Iterate over the characters, buffering into a local temporary
// buffer, which we dump into an allocated (and reallocated, as necessary)
// string for return.
while (srcCnt > 0)
{
// Transcode some characters
TempXMLBuf tmpBuf;
unsigned int bytesConsumed = 0;
unsigned int charsProduced = mTranscoder->transcodeFrom((XMLByte*)src, srcCnt,
tmpBuf, kTempBufCount,
bytesConsumed,
NULL);
src += bytesConsumed;
srcCnt -= bytesConsumed;
// Move the data to result buffer, reallocating as needed
if (charsProduced > 0)
{
// Allocate space for result
std::size_t newCnt = resultCnt + charsProduced;
ArrayJanitor<XMLCh> newResult
(
(XMLCh*) manager->allocate((newCnt + 1) * sizeof(XMLCh)) //new XMLCh[newCnt + 1]
, manager
);
if (newResult.get() != NULL)
{
// Incorporate previous result
if (result.get() != NULL)
std::memcpy(newResult.get(), result.get(), resultCnt * sizeof(XMLCh));
result.reset(newResult.release());
// Copy in new data
std::memcpy(result.get() + resultCnt, tmpBuf, charsProduced * sizeof(XMLCh));
resultCnt = newCnt;
result[resultCnt] = 0;
}
}
else
break;
}
if (!result.get())
{
// No error, and no result: we probably processed a zero length
// input, in which case we want a valid zero length output.
result.reset
(
(XMLCh*) manager->allocate(sizeof(XMLCh))//new XMLCh[1]
, manager
);
result[0] = '\0';
}
return result.release();
}
bool
MacOSLCPTranscoder::transcode( const char* const toTranscode
, XMLCh* const toFill
, const unsigned int maxChars
, MemoryManager* const manager)
{
// toFill must contain space for maxChars XMLCh characters + 1 (for terminating NULL).
// Check for a couple of psycho corner cases
if (!toTranscode || !maxChars || !*toTranscode)
{
toFill[0] = 0;
return true;
}
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
// Call the transcoder to do the work
unsigned int srcLen = std::strlen(toTranscode);
unsigned int bytesConsumed = 0;
unsigned int charsProduced = mTranscoder->transcodeFrom((XMLByte*)toTranscode, srcLen,
toFill, maxChars,
bytesConsumed,
NULL);
// Zero terminate the output string
toFill[charsProduced] = L'\0';
// Return true if we consumed all of the characters
return (bytesConsumed == srcLen);
}
bool
MacOSLCPTranscoder::transcode( const XMLCh* const toTranscode
, char* const toFill
, const unsigned int maxChars
, MemoryManager* const manager)
{
// toFill must contain space for maxChars bytes + 1 (for terminating NULL).
// Check for a couple of psycho corner cases
if (!toTranscode || !maxChars || !*toTranscode)
{
toFill[0] = 0;
return true;
}
// Lock our mutex to gain exclusive access to the transcoder
// since the lcp transcoders are used globally.
XMLMutexLock lock(&mMutex);
// Call the transcoder to do the work
unsigned int srcLen = XMLString::stringLen(toTranscode);
unsigned int charsConsumed = 0;
unsigned int bytesProduced = mTranscoder->transcodeTo(toTranscode, srcLen,
(XMLByte*)toFill, maxChars,
charsConsumed,
XMLTranscoder::UnRep_RepChar);
// Zero terminate the output string
toFill[bytesProduced] = '\0';
// Return true if we consumed all of the characters
return (charsConsumed == srcLen);
}
XERCES_CPP_NAMESPACE_END