blob: f3595e28a9b92eb84ec1d23d7298fb224873dc9f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: uniconv.cpp 568078 2007-08-21 11:43:25Z amassari $
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <ctype.h>
#include <cunhc.h>
#include "ccsid.h"
#include "uniconv.h"
XERCES_CPP_NAMESPACE_BEGIN
#define WORK_BUFFER_SIZE 16*1024
#define DDA_NEEDED CUNBCPRM_DDA_REQ
#define RETRY_THRESHOLD 10000
// This is utility routine which strips '-', '_' and spaces from the name and
// also upper cases the name. It also returns the length of the string.
static int stripNameCopy(const char *s,char *d,int max)
{
int si=0;
int di=0;
while ( (s[si] != '\0') && (di < max) ) {
if ( (s[si] == ' ') || (s[si] == '_') || (s[si] == '-') )
si++;
else {
d[di] = toupper(s[si]);
si++;di++;
}
}
d[di] = 0;
if (s[si] != '\0')
return -1;
return si;
}
// This takes a name and does a lookup into the ccsid table (from ccsid.h)
// to find the corresponding ccsid. It also checks if the string ends in s390
// and returns that information to the caller.
// The lookup into the table is done via a binary search since we know that the
// table was nicely sorted for us.
static int getccsid(const char *s,int * is390)
{
char tmpstr[_AE_MAX_CODESET_NAME_LENGTH];
int start;
int limit;
int index;
int result;
int thelen;
// Clean up the name....
if (s == NULL)
return -1;
if ((thelen = stripNameCopy(s,tmpstr,_AE_MAX_CODESET_NAME_LENGTH-1)) == -1)
return -1;
// Check for the S390 string in the name
*is390 = 0;
if ( (strstr((char *)tmpstr, "S390")) != NULL )
*is390 = 1;
// Now lookup the name via a binary search
start = 0;
limit = _AE_NUM_OF_CODESETS;
index = limit/2;
while ( ((result=strcoll(tmpstr, CCSID_MAPPING[index].NAME)) != 0) &&
(start < limit-1) ) {
if (result < 0)
limit = index;
else
start = index;
index = (start+limit)/2;
}
if (result != 0 && start >= limit-1)
return -1;
return CCSID_MAPPING[index].CCSID;
}
// **********************************************************************
// These are the character conversion services
// **********************************************************************
// "Open" the conversion. Allocate memory to hold the handle which
// unicode services requires. Call unicode services with a 0 length
// so that it can initialize it's handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
uniconv_t uniconv_open(const char *destenc, const char *srcenc) {
CUNBCPRM defparms = {CUNBCPRM_DEFAULT};
CUNBCPRM * tmpp;
void * handle_area;
char *cptr;
int srcis390;
int destis390;
errno = 0;
handle_area = malloc (sizeof(CUNBCPRM)+DDA_NEEDED+WORK_BUFFER_SIZE+8);
tmpp = (CUNBCPRM *) handle_area;
if (tmpp==NULL)
return (uniconv_t)-1;
// initialize the parm area with defaults, then start filling it
// in with our values.
memcpy(tmpp,&defparms,sizeof(defparms));
tmpp->Src_Buf_Len= 0;
// get the ccsids.
if ( ((tmpp->Src_CCSID=getccsid(srcenc,&srcis390)) == -1) ||
((tmpp->Targ_CCSID=getccsid(destenc,&destis390)) == -1) ) {
errno=ENOENT;
free(handle_area);
return (uniconv_t)-1;
}
tmpp->Wrk_Buf_Ptr=(void*) (((unsigned int) handle_area) + sizeof(CUNBCPRM)+DDA_NEEDED +8);
tmpp->Wrk_Buf_Len=WORK_BUFFER_SIZE;
// Doubleword align the DDA area
tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBCPRM) +7);
tmpp->DDA_Buf_Ptr = (void*) ((unsigned int) tmpp->DDA_Buf_Ptr & ~7);
tmpp->DDA_Buf_Len=DDA_NEEDED;
// This flag tells the services to automatically refresh the handle if it
// becomes invalid.
// Use next two lines of code on old z/OS levels where Flag1 is is char field
// tmpp->Flag1|=CUNBCPRM_REFRESH_AT_INV_HANDLE_START;
// tmpp->Flag1|=CUNBCPRM_SUB_ACTION_SUBSTITUTE;
// Use next two lines of code on later z/OS levels where Flag1 is bit field
tmpp->Flag1.Inv_Handle = 1;
tmpp->Flag1.Sub_Action = 1;
/* Determine which technique to use */
if ( (srcis390) || (destis390) )
// This technique causes it to swap LF and NL.
memcpy(tmpp->Technique,"L ",8);
else
memcpy(tmpp->Technique," ",8);
// Retry if the services are busy reloading their tables.
int retry_count = 0;
while (retry_count < RETRY_THRESHOLD) {
CUNLCNV(tmpp);
if (tmpp->Return_Code == CUN_RC_OK)
break;
else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
// Let it loop around again
retry_count++;
else
break;
}
if (tmpp->Return_Code != CUN_RC_OK) {
free(handle_area);
errno=EINVAL;
handle_area = (uniconv_t)-1;
}
return handle_area;
}
// All that is required for close is to free the handle buffer.
int uniconv_close(uniconv_t handle_area) {
errno = 0;
if (((int)handle_area) <= 0) {
errno=EBADF;
return -1;
}
free(handle_area);
return 0;
}
// This does the real conversion.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
int uniconv(uniconv_t cd, char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft) {
CUNBCPRM * tmpp;
size_t startinlen = *inbytesleft;
size_t startoutlen = *outbytesleft;
errno = 0;
if (((int)cd) <= 0) {
errno=EBADF;
return -1;
}
// Fill in the parameter area with current values
tmpp = (CUNBCPRM *) cd;
tmpp->Src_Buf_Ptr = *inbuf;
tmpp->Src_Buf_Len = *inbytesleft;
tmpp->Targ_Buf_Ptr = *outbuf;
tmpp->Targ_Buf_Len = *outbytesleft;
// Retry if the services are busy reloading their tables.
int retry_count = 0;
while (retry_count < RETRY_THRESHOLD) {
CUNLCNV(tmpp);
if (tmpp->Return_Code == CUN_RC_OK)
break;
else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) )
// Let it loop around again
retry_count++;
else
break;
}
*inbuf = (char *)tmpp->Src_Buf_Ptr;
*inbytesleft = tmpp->Src_Buf_Len;
*outbuf = (char *)tmpp->Targ_Buf_Ptr;
*outbytesleft = tmpp->Targ_Buf_Len;
if (tmpp->Return_Code != CUN_RC_OK) {
if (tmpp->Reason_Code == CUN_RS_TRG_EXH)
errno=E2BIG;
else if (tmpp->Reason_Code == CUN_RS_MBC_INCOMPLETE)
errno=EINVAL;
else {
errno=EBADF;
return -1;
}
}
return (startinlen-*inbytesleft);
}
// **********************************************************************
// These are the case conversion services.
// **********************************************************************
// This "opens" the case conversion. It allocates the parameter area
// then does a dummy call to unicode services so that it can set up
// the handle.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
static inline uniconv_t uniconv_case_open(unsigned char direction) {
CUNBAPRM defparms = {CUNBAPRM_DEFAULT};
CUNBAPRM * tmpp;
void * handle_area;
errno = 0;
handle_area = malloc (sizeof(CUNBAPRM)+CUNBAPRM_DDA_REQ);
tmpp = (CUNBAPRM *) handle_area;
if (tmpp==NULL)
return (uniconv_t)-1;
// initialize the parm area with defaults, then start filling it
// in with our values.
memcpy(tmpp,&defparms,sizeof(defparms));
tmpp->DDA_Buf_Ptr=(void*) ((unsigned int) handle_area + sizeof(CUNBAPRM));
tmpp->DDA_Buf_Len=CUNBAPRM_DDA_REQ;
// This flag tells the services to automatically refresh the handle if it
// becomes invalid.
// Use next line of code on old z/OS levels where Flag1 is char field
// tmpp->Flag1|=CUNBAPRM_REFRESH_AT_INV_HANDLE_START;
// Use next line of code on later z/OS levels where Flag1 is bit field
tmpp->Flag1.Inv_Handle = 1;
unichar_t inchar = 0x61;
unichar_t outchar;
tmpp->Src_Buf_Ptr=&inchar;
tmpp->Targ_Buf_Ptr=&outchar;
tmpp->Targ_Buf_Len=sizeof(unichar_t);
tmpp->Src_Buf_Len=sizeof(unichar_t);
tmpp->Conv_Type=direction;
// Retry if the services are busy reloading their tables.
int retry_count = 0;
while (true) {
CUNLASE ( tmpp );
if (tmpp->Return_Code == CUN_RC_OK) {
break;
} else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
// Let it loop around again
retry_count++;
if (retry_count > RETRY_THRESHOLD) {
errno = ENOSYS;
break;
}
} else {
errno = ENOSYS;
break;
}
}
if (tmpp->Return_Code != CUN_RC_OK) {
free(handle_area);
errno=EINVAL;
handle_area = (uniconv_t)-1;
}
return handle_area;
}
// These are the actual external interfaces for the open function
uniconv_t uniconv_toupper_open() {
return uniconv_case_open(CUNBAPRM_TO_UPPER);
}
uniconv_t uniconv_tolower_open() {
return uniconv_case_open(CUNBAPRM_TO_LOWER);
}
// This closes the case conversion. All it does is free the handle buffer.
int _uniconv_case_close(uniconv_t handle_area) {
errno = 0;
if (((int)handle_area) <= 0) {
errno=EBADF;
return -1;
}
free(handle_area);
return 0;
}
// This does the actual case conversion. The direction is already
// stored in the handle buffer.
// Note that unicode services must always be called in a loop since
// it could be busy reloading its tables.
unichar_t uniconv_caseit (uniconv_t cd,unichar_t inchar) {
unichar_t outchar;
CUNBAPRM * tmpp;
errno = 0;
if (((int)cd) <= 0) {
errno=EBADF;
return -1;
}
tmpp = (CUNBAPRM *) cd;
tmpp->Src_Buf_Ptr=&inchar;
tmpp->Targ_Buf_Ptr=&outchar;
tmpp->Targ_Buf_Len=sizeof(unichar_t);
tmpp->Src_Buf_Len=sizeof(unichar_t);
// Retry if the services are busy reloading their tables.
int retry_count = 0;
while (true) {
CUNLASE ( tmpp );
if (tmpp->Return_Code == CUN_RC_OK) {
break;
}
else if ( (tmpp->Return_Code == CUN_RC_WARN) &&
( (tmpp->Reason_Code == CUN_RS_NO_HANDLE) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_NOSET) ||
(tmpp->Reason_Code == CUN_RS_INV_HANDLE_SET) ) ) {
// Let it loop around again
retry_count++;
if (retry_count > RETRY_THRESHOLD) {
errno = ENOSYS;
break;
}
} else {
errno = ENOSYS;
break;
}
}
return outchar;
}
XERCES_CPP_NAMESPACE_END