blob: 3bb4d43de3b47149ad2dfe9d8464b4cf0cd10eaf [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: iconv_cnv.cpp 568078 2007-08-21 11:43:25Z amassari $
*/
#include <xercesc/util/XercesDefs.hpp>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <iconv_util.hpp>
#include <iconv_cnv.hpp>
#include <qmhrtvm.h>
#include <qusec.h>
#include <xercesc/util/Platforms/OS400/OS400PlatformUtils.hpp>
#include <iconv.h>
#include <errno.h>
#define CHUNK_SIZE 5*1024
XERCES_CPP_NAMESPACE_BEGIN
void Convert_toUnicode(UConverter *,
UChar **,
const UChar *,
const char **,
const char *,
int32_t* offsets,
int,
UErrorCode *);
void Convert_fromUnicode(UConverter *,
char **,
const char *,
const UChar **,
const UChar *,
int32_t* offsets,
int,
UErrorCode *);
UChar getNextUChar(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err);
void T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
UConverter * inConverter,
char **target,
const char *targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
int flush,
UErrorCode * err);
void Converter_fromUnicode(UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
int flush,
UErrorCode * err);
/*Calls through createConverter */
UConverter* ucnv_open (const char *name,
UErrorCode * err)
{
if (U_FAILURE (*err))
return NULL;
/*In case "name" is NULL we want to open the default converter */
if (name != NULL)
return createConverter (name, err);
else
return createConverter (iconv_getDefaultCodepage(), err);
}
/*Extracts the UChar* to a char* and calls through createConverter */
UConverter* ucnv_openU (const UChar * name,
UErrorCode * err)
{
char asciiName[MAX_CONVERTER_NAME_LENGTH];
if (U_FAILURE (*err))
return NULL;
if (name == NULL)
return ucnv_open (NULL, err);
if (u_strlen (name) > MAX_CONVERTER_NAME_LENGTH)
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
return ucnv_open (u_austrcpy (asciiName, name), err);
}
/*Decreases the reference counter in the shared immutable section of the object
*and frees the mutable part*/
void ucnv_close (UConverter * converter)
{
/* for iconv we will close the handles and free the converter storage*/
iconv_close(converter->sharedData->toiconv_handle);
iconv_close(converter->sharedData->fromiconv_handle);
if (converter == NULL)
return;
free (converter);
return;
}
/* currently required for iconv suuport */
/* XMLReader calls this and uses fact that it is different than min
to go thru a calculation otherwise if max and min same then there is
a calculation speed up - we will keep the two routines but have them
return different sizes - later will ifdef XMLreader for ICONV to remove the calls*/
int8_t ucnv_getMaxCharSize (const UConverter * converter)
{
return (4); /* dummy returns just need to be different in XMLParser - need something else for ICU replacement */
}
/* currently required for iconv support */
/* see note for ucnv_getMaxCharSize */
int8_t ucnv_getMinCharSize (const UConverter * converter)
{
return (1);
}
void ucnv_fromUnicode (UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t* offsets,
int flush,
UErrorCode * err)
{
/*
* Check parameters in for all conversions
*/
if (U_FAILURE (*err)) return;
if ((_this == NULL) || ((char *) targetLimit < *target) || (sourceLimit < *source))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/*calls the specific conversion routines */
Converter_fromUnicode(_this,target,targetLimit,source,sourceLimit,
offsets,flush,err);
return;
}
void ucnv_toUnicode (UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
int flush,
UErrorCode * err)
{
/*
* Check parameters in for all conversions
*/
if (U_FAILURE (*err)) return;
if ((_this == NULL) || ((UChar *) targetLimit < *target) || (sourceLimit < *source))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/*calls the specific conversion routines */
Convert_toUnicode(_this,target,targetLimit,source,sourceLimit,
offsets,flush,err);
return;
}
int32_t ucnv_fromUChars (const UConverter * converter,
char *target,
int32_t targetSize,
const UChar * source,
UErrorCode * err)
{
const UChar *mySource = source;
const UChar *mySource_limit;
int32_t mySourceLength = 0;
UConverter myConverter;
char *myTarget = target;
int32_t targetCapacity = 0;
if (U_FAILURE (*err))
return 0;
if ((converter == NULL) || (targetSize < 0))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/*makes a local copy of the UConverter */
myConverter = *converter;
/*if the source is empty we return immediately */
mySourceLength = u_strlen (source);
if (mySourceLength == 0)
{
/*for consistency we still need to
*store 0 in the targetCapacity
*if the user requires it
*/
return 0;
}
mySource_limit = mySource + mySourceLength;
if (targetSize > 0)
{
ucnv_fromUnicode (&myConverter,
&myTarget,
target + targetSize,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
targetCapacity = myTarget - target;
}
/*Updates targetCapacity to contain the number of bytes written to target */
if (targetSize == 0)
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
/* If the output buffer is exhausted, we need to stop writing
* to it but continue the conversion in order to store in targetSize
* the number of bytes that was required*/
if (*err == U_BUFFER_OVERFLOW_ERROR)
{
char target2[CHUNK_SIZE];
char *target2_alias = target2;
const char *target2_limit = target2 + CHUNK_SIZE;
/*We use a stack allocated buffer around which we loop
*(in case the output is greater than CHUNK_SIZE)
*/
while (*err == U_BUFFER_OVERFLOW_ERROR)
{
*err = U_ZERO_ERROR;
target2_alias = target2;
ucnv_fromUnicode (&myConverter,
&target2_alias,
target2_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
/*updates the output parameter to contain the number of char required */
targetCapacity += (target2_alias - target2) + 1;
}
/*We will set the erro code to BUFFER_OVERFLOW_ERROR only if
*nothing graver happened in the previous loop*/
(targetCapacity)--;
if (U_SUCCESS (*err))
*err = U_BUFFER_OVERFLOW_ERROR;
}
return targetCapacity;
}
int32_t ucnv_toUChars (const UConverter * converter,
UChar * target,
int32_t targetSize,
const char *source,
int32_t sourceSize,
UErrorCode * err)
{
const char *mySource = source;
const char *mySource_limit = source + sourceSize;
UConverter myConverter;
UChar *myTarget = target;
int32_t targetCapacity;
if (U_FAILURE (*err))
return 0;
if ((converter == NULL) || (targetSize < 0) || (sourceSize < 0))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/*Means there is no work to be done */
if (sourceSize == 0)
{
/*for consistency we still need to
*store 0 in the targetCapacity
*if the user requires it
*/
if (targetSize >= 1)
{
target[0] = 0x0000;
return 1;
}
else
return 0;
}
/*makes a local copy of the UConverter */
myConverter = *converter;
/*Not in pure pre-flight mode */
if (targetSize > 0)
{
/* Changed from (targetSize * 2) to (targetSize) */
ucnv_toUnicode (&myConverter,
&myTarget,
target + (targetSize-1), /*Save a spot for the Null terminator */
&mySource,
mySource_limit,
NULL,
TRUE,
err);
/*Null terminates the string */
*(myTarget) = 0x0000;
}
/*Rigs targetCapacity to have at least one cell for zero termination */
/*Updates targetCapacity to contain the number of bytes written to target */
targetCapacity = 1;
targetCapacity += myTarget - target;
if (targetSize == 0)
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
/* If the output buffer is exhausted, we need to stop writing
* to it but if the input buffer is not exhausted,
* we need to continue the conversion in order to store in targetSize
* the number of bytes that was required
*/
if (*err == U_BUFFER_OVERFLOW_ERROR)
{
UChar target2[CHUNK_SIZE];
UChar *target2_alias = target2;
const UChar *target2_limit = target2 + CHUNK_SIZE;
/*We use a stack allocated buffer around which we loop
(in case the output is greater than CHUNK_SIZE) */
while (*err == U_BUFFER_OVERFLOW_ERROR)
{
*err = U_ZERO_ERROR;
target2_alias = target2;
ucnv_toUnicode (&myConverter,
&target2_alias,
target2_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
/*updates the output parameter to contain the number of char required */
targetCapacity += target2_alias - target2 + 1;
}
(targetCapacity)--; /*adjust for last one */
if (U_SUCCESS (*err))
*err = U_BUFFER_OVERFLOW_ERROR;
}
return targetCapacity;
}
UChar ucnv_getNextUChar (UConverter * converter,
const char **source,
const char *sourceLimit,
UErrorCode * err)
{
/*calls the specific conversion routines */
/*as dictated in a code review, avoids a switch statement */
return getNextUChar(converter,source,sourceLimit,err);
}
/**************************
* Will convert a sequence of bytes from one codepage to another.
* @param toConverterName: The name of the converter that will be used to encode the output buffer
* @param fromConverterName: The name of the converter that will be used to decode the input buffer
* @param target: Pointer to the output buffer* written
* @param targetLength: on input contains the capacity of target, on output the number of bytes copied to target
* @param source: Pointer to the input buffer
* @param sourceLength: on input contains the capacity of source, on output the number of bytes processed in "source"
* @param internal: used internally to store store state data across calls
* @param err: fills in an error status
*/
void
T_UConverter_fromCodepageToCodepage (UConverter * outConverter,
UConverter * inConverter,
char **target,
const char *targetLimit,
const char **source,
const char *sourceLimit,
int32_t* offsets,
int flush,
UErrorCode * err)
{
UChar out_chunk[CHUNK_SIZE];
const UChar *out_chunk_limit = out_chunk + CHUNK_SIZE;
UChar *out_chunk_alias;
UChar const *out_chunk_alias2;
if (U_FAILURE (*err)) return;
/*loops until the input buffer is completely consumed
*or if an error has be encountered
*first we convert from inConverter codepage to Unicode
*then from Unicode to outConverter codepage
*/
while ((*source != sourceLimit) && U_SUCCESS (*err))
{
out_chunk_alias = out_chunk;
ucnv_toUnicode (inConverter,
&out_chunk_alias,
out_chunk_limit,
source,
sourceLimit,
NULL,
flush,
err);
/*BUFFER_OVERFLOW_ERROR means that the output "CHUNK" is full
*we will require at least another loop (it's a recoverable error)
*/
if (U_SUCCESS (*err) || (*err == U_BUFFER_OVERFLOW_ERROR))
{
*err = U_ZERO_ERROR;
out_chunk_alias2 = out_chunk;
while ((out_chunk_alias2 != out_chunk_alias) && U_SUCCESS (*err))
{
ucnv_fromUnicode (outConverter,
target,
targetLimit,
&out_chunk_alias2,
out_chunk_alias,
NULL,
TRUE,
err);
}
}
else
break;
}
return;
}
int32_t ucnv_convert(const char *toConverterName,
const char *fromConverterName,
char *target,
int32_t targetSize,
const char *source,
int32_t sourceSize,
UErrorCode * err)
{
const char *mySource = source;
const char *mySource_limit = source + sourceSize;
int32_t mySourceLength = 0;
UConverter *inConverter;
UConverter *outConverter;
char *myTarget = target;
int32_t targetCapacity = 0;
if (U_FAILURE (*err))
return 0;
if ((targetSize < 0) || (sourceSize < 0))
{
*err = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
/*if there is no input data, we're done */
if (sourceSize == 0)
{
/*in case the caller passed an output ptr
*we update it
*/
return 0;
}
/*create the converters */
inConverter = ucnv_open (fromConverterName, err);
if (U_FAILURE (*err)) return 0;
outConverter = ucnv_open (toConverterName, err);
if (U_FAILURE (*err))
{
ucnv_close (inConverter);
return 0;
}
if (targetSize > 0)
{
T_UConverter_fromCodepageToCodepage (outConverter,
inConverter,
&myTarget,
target + targetSize,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
}
/*Updates targetCapacity to contain the number of bytes written to target */
targetCapacity = myTarget - target;
if (targetSize == 0)
{
*err = U_BUFFER_OVERFLOW_ERROR;
}
/* If the output buffer is exhausted, we need to stop writing
* to it but continue the conversion in order to store in targetSize
* the number of bytes that was required*/
if (*err == U_BUFFER_OVERFLOW_ERROR)
{
char target2[CHUNK_SIZE];
char *target2_alias = target2;
const char *target2_limit = target2 + CHUNK_SIZE;
/*We use a stack allocated buffer around which we loop
*(in case the output is greater than CHUNK_SIZE)
*/
while (*err == U_BUFFER_OVERFLOW_ERROR)
{
*err = U_ZERO_ERROR;
target2_alias = target2;
T_UConverter_fromCodepageToCodepage (outConverter,
inConverter,
&target2_alias,
target2_limit,
&mySource,
mySource_limit,
NULL,
TRUE,
err);
/*updates the output parameter to contain the number of char required */
targetCapacity += (target2_alias - target2) + 1;
}
/*We will set the erro code to BUFFER_OVERFLOW_ERROR only if
*nothing graver happened in the previous loop*/
(targetCapacity)--;
if (U_SUCCESS (*err))
*err = U_BUFFER_OVERFLOW_ERROR;
}
ucnv_close (inConverter);
ucnv_close (outConverter);
return targetCapacity;
}
void Converter_fromUnicode(UConverter * _this,
char **target,
const char *targetLimit,
const UChar ** source,
const UChar * sourceLimit,
int32_t *offsets,
int flush,
UErrorCode * err)
{
int chardone;
const UChar *mySource = *source;
unsigned char *myTarget = (unsigned char *) *target;
int32_t targetLength = targetLimit - (char *) myTarget;
int32_t sourceLength = (sourceLimit - mySource) * 2;
unsigned char targetChar = 0x00;
/* pick up the iconv handle and perform the conversion */
errno = 0;
chardone =iconv(_this->sharedData->fromiconv_handle,(char**)source, (size_t*) &sourceLength,target,(size_t *)&targetLength);
if (errno!=0)
if (errno == E2BIG)
{
*err = U_BUFFER_OVERFLOW_ERROR;
return;
}
else
if ((errno ==EBADDATA)|| (errno ==ECONVERT))
{
char errno_id[7];
send_message(NULL,ICONV_CONVERT_PROBLEM,'d');
convert_errno(errno_id,errno);
send_message(NULL,errno_id,'d');
*err = U_INVALID_CHAR_FOUND;
return;
}
return;
}
void Convert_toUnicode(UConverter * _this,
UChar ** target,
const UChar * targetLimit,
const char **source,
const char *sourceLimit,
int32_t *offsets,
int flush,
UErrorCode * err)
{
char *mySource = (char *) *source;
UChar *myTarget = *target;
int32_t targetLength = (targetLimit - myTarget)*2; /* multiply by 2 */
int32_t sourceLength = (sourceLimit - (char *) mySource);
int chardone;
/* pick up the iconv handle */
errno = 0;
chardone =iconv(_this->sharedData->toiconv_handle,(char**)source, (size_t*) &sourceLength,(char **)target,(size_t *)&targetLength);
if (errno!=0)
{
if (errno == E2BIG)
{
*err = U_BUFFER_OVERFLOW_ERROR;
return;
}
else
if ((errno ==EBADDATA)|| (errno ==ECONVERT))
{
char errno_id[7];
send_message(NULL,ICONV_CONVERT_PROBLEM,'d');
convert_errno(errno_id,errno);
send_message(NULL,errno_id,'d');
*err = U_INVALID_CHAR_FOUND;
return;
}
}
return;
}
UChar getNextUChar(UConverter* converter,
const char** source,
const char* sourceLimit,
UErrorCode* err)
{
UChar myUChar;
UChar* myUCharptr;
size_t numberibytes=sizeof(UChar);
size_t numberobytes=sizeof(UChar);
int chardone;
if ((*source)+1 > sourceLimit)
{
*err = U_INDEX_OUTOFBOUNDS_ERROR;
return 0xFFFD;
}
/*pick up the iconv handle */
/* convert the requested character - need to cache characters 6 will do - XMLReader is using this function to get header to process*/
myUCharptr = &myUChar;
chardone =iconv(converter->sharedData->toiconv_handle,(char**)source, (size_t*) &numberibytes,(char **)&myUCharptr,(size_t *)&numberobytes);
if (myUChar != 0xFFFD) return myUChar;
else
{
UChar* myUCharPtr = &myUChar;
const char* sourceFinal = *source;
*err = U_INVALID_CHAR_FOUND;
/*makes the internal caching transparent to the user*/
if (*err == U_INDEX_OUTOFBOUNDS_ERROR) *err = U_ZERO_ERROR;
return myUChar;
}
}
XERCES_CPP_NAMESPACE_END