blob: eada0804e6603e07714056c5b5711f6283ea3391 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 1996-2006, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
/**
* A JNI interface for ICU converters.
*
*
* @author Ram Viswanadha, IBM
*/
package com.ibm.icu4jni.charset;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
import com.ibm.icu4jni.common.ErrorCode;
// BEGIN android-removed
// import com.ibm.icu4jni.converters.NativeConverter;
// END android-removed
public final class CharsetEncoderICU extends CharsetEncoder {
private static final int INPUT_OFFSET = 0,
OUTPUT_OFFSET = 1,
INVALID_CHARS = 2,
INPUT_HELD = 3,
LIMIT = 4;
/* data is 3 element array where
* data[INPUT_OFFSET] = on input contains the start of input and on output the number of input chars consumed
* data[OUTPUT_OFFSET] = on input contains the start of output and on output the number of output bytes written
* data[INVALID_CHARS] = number of invalid chars
* data[INPUT_HELD] = number of input chars held in the converter's state
*/
private int[] data = new int[LIMIT];
/* handle to the ICU converter that is opened */
private long converterHandle=0;
private char[] input = null;
private byte[] output = null;
// BEGIN android-added
private char[] allocatedInput = null;
private byte[] allocatedOutput = null;
// END android-added
// These instance variables are
// always assigned in the methods
// before being used. This class
// inhrently multithread unsafe
// so we dont have to worry about
// synchronization
private int inEnd;
private int outEnd;
private int ec;
private int savedInputHeldLen;
private int onUnmappableInput = NativeConverter.STOP_CALLBACK;;
private int onMalformedInput = NativeConverter.STOP_CALLBACK;;
/**
* Construcs a new encoder for the given charset
* @param cs for which the decoder is created
* @param cHandle the address of ICU converter
* @param replacement the substitution bytes
* @stable ICU 2.4
*/
public CharsetEncoderICU(Charset cs, long cHandle, byte[] replacement) {
super(
cs,
(float) NativeConverter.getAveBytesPerChar(cHandle),
(float) NativeConverter.getMaxBytesPerChar(cHandle),
replacement);
byte[] sub = replacement();
// The default callback action on unmappable input
// or malformed input is to ignore so we set ICU converter
// callback to stop and report the error
ec = NativeConverter.setCallbackEncode( cHandle,
onMalformedInput,
onUnmappableInput,
sub, sub.length);
converterHandle = cHandle;
if (ErrorCode.isFailure(ec)) {
throw ErrorCode.getException(ec);
}
}
/**
* Sets this encoders replacement string. Substitutes the string in output if an
* umappable or illegal sequence is encountered
* @param newReplacement to replace the error chars with
* @stable ICU 2.4
*/
protected void implReplaceWith(byte[] newReplacement) {
if (converterHandle != 0) {
if (newReplacement.length
> NativeConverter.getMaxBytesPerChar(converterHandle)) {
throw new IllegalArgumentException("Number of replacement Bytes are greater than max bytes per char");
}
ec = NativeConverter.setSubstitutionBytes(converterHandle,
newReplacement,
newReplacement.length);
if (ErrorCode.isFailure(ec)) {
throw ErrorCode.getException(ec);
}
}
}
/**
* Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken
* @exception IllegalArgumentException
* @stable ICU 2.4
*/
protected void implOnMalformedInput(CodingErrorAction newAction) {
onMalformedInput = NativeConverter.STOP_CALLBACK;
if (newAction.equals(CodingErrorAction.IGNORE)) {
onMalformedInput = NativeConverter.SKIP_CALLBACK;
} else if (newAction.equals(CodingErrorAction.REPLACE)) {
onMalformedInput = NativeConverter.SUBSTITUTE_CALLBACK;
}
byte[] sub = replacement();
ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length);
if (ErrorCode.isFailure(ec)) {
throw ErrorCode.getException(ec);
}
}
/**
* Sets the action to be taken if an illegal sequence is encountered
* @param newAction action to be taken
* @exception IllegalArgumentException
* @stable ICU 2.4
*/
protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
onUnmappableInput = NativeConverter.STOP_CALLBACK;
if (newAction.equals(CodingErrorAction.IGNORE)) {
onUnmappableInput = NativeConverter.SKIP_CALLBACK;
} else if (newAction.equals(CodingErrorAction.REPLACE)) {
onUnmappableInput = NativeConverter.SUBSTITUTE_CALLBACK;
}
byte[] sub = replacement();
ec = NativeConverter.setCallbackEncode(converterHandle, onMalformedInput, onUnmappableInput, sub, sub.length);
if (ErrorCode.isFailure(ec)) {
throw ErrorCode.getException(ec);
}
}
/**
* Flushes any characters saved in the converter's internal buffer and
* resets the converter.
* @param out action to be taken
* @return result of flushing action and completes the decoding all input.
* Returns CoderResult.UNDERFLOW if the action succeeds.
* @stable ICU 2.4
*/
protected CoderResult implFlush(ByteBuffer out) {
try {
data[OUTPUT_OFFSET] = getArray(out);
ec = NativeConverter.flushCharToByte(converterHandle,/* Handle to ICU Converter */
output, /* output array of chars */
outEnd, /* output index+1 to be written */
data /* contains data, inOff,outOff */
);
/* If we don't have room for the output, throw an exception*/
if (ErrorCode.isFailure(ec)) {
if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
return CoderResult.OVERFLOW;
}else if (ec == ErrorCode.U_TRUNCATED_CHAR_FOUND) {//CSDL: add this truncated character error handling
if(data[INPUT_OFFSET]>0){
return CoderResult.malformedForLength(data[INPUT_OFFSET]);
}
}else {
ErrorCode.getException(ec);
}
}
return CoderResult.UNDERFLOW;
} finally {
setPosition(out);
implReset();
}
}
/**
* Resets the from Unicode mode of converter
* @stable ICU 2.4
*/
protected void implReset() {
NativeConverter.resetCharToByte(converterHandle);
data[INPUT_OFFSET] = 0;
data[OUTPUT_OFFSET] = 0;
data[INVALID_CHARS] = 0;
data[INPUT_HELD] = 0;
savedInputHeldLen = 0;
}
/**
* Encodes one or more chars. The default behaviour of the
* converter is stop and report if an error in input stream is encountered.
* To set different behaviour use @see CharsetEncoder.onMalformedInput()
* @param in buffer to decode
* @param out buffer to populate with decoded result
* @return result of decoding action. Returns CoderResult.UNDERFLOW if the decoding
* action succeeds or more input is needed for completing the decoding action.
* @stable ICU 2.4
*/
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
if (!in.hasRemaining()) {
return CoderResult.UNDERFLOW;
}
data[INPUT_OFFSET] = getArray(in);
data[OUTPUT_OFFSET]= getArray(out);
data[INPUT_HELD] = 0;
// BEGIN android-added
data[INVALID_CHARS] = 0; // Make sure we don't see earlier errors.
// END android added
try {
/* do the conversion */
ec = NativeConverter.encode(converterHandle,/* Handle to ICU Converter */
input, /* input array of bytes */
inEnd, /* last index+1 to be converted */
output, /* output array of chars */
outEnd, /* output index+1 to be written */
data, /* contains data, inOff,outOff */
false /* donot flush the data */
);
if (ErrorCode.isFailure(ec)) {
/* If we don't have room for the output return error */
if (ec == ErrorCode.U_BUFFER_OVERFLOW_ERROR) {
return CoderResult.OVERFLOW;
} else if (ec == ErrorCode.U_INVALID_CHAR_FOUND) {
return CoderResult.unmappableForLength(data[INVALID_CHARS]);
} else if (ec == ErrorCode.U_ILLEGAL_CHAR_FOUND) {
// in.position(in.position() - 1);
return CoderResult.malformedForLength(data[INVALID_CHARS]);
}
}
return CoderResult.UNDERFLOW;
} finally {
/* save state */
setPosition(in);
setPosition(out);
}
}
/**
* Ascertains if a given Unicode character can
* be converted to the target encoding
*
* @param c the character to be converted
* @return true if a character can be converted
* @stable ICU 2.4
*
*/
public boolean canEncode(char c) {
return canEncode((int) c);
}
/**
* Ascertains if a given Unicode code point (32bit value for handling surrogates)
* can be converted to the target encoding. If the caller wants to test if a
* surrogate pair can be converted to target encoding then the
* responsibility of assembling the int value lies with the caller.
* For assembling a code point the caller can use UTF16 class of ICU4J and do something like:
* <pre>
* while(i<mySource.length){
* if(UTF16.isLeadSurrogate(mySource[i])&& i+1< mySource.length){
* if(UTF16.isTrailSurrogate(mySource[i+1])){
* int temp = UTF16.charAt(mySource,i,i+1,0);
* if(!((CharsetEncoderICU) myConv).canEncode(temp)){
* passed=false;
* }
* i++;
* i++;
* }
* }
* }
* </pre>
* or
* <pre>
* String src = new String(mySource);
* int i,codepoint;
* boolean passed = false;
* while(i<src.length()){
* codepoint = UTF16.charAt(src,i);
* i+= (codepoint>0xfff)? 2:1;
* if(!(CharsetEncoderICU) myConv).canEncode(codepoint)){
* passed = false;
* }
* }
* </pre>
*
* @param codepoint Unicode code point as int value
* @return true if a character can be converted
* @obsolete ICU 2.4
* @deprecated ICU 3.4
*/
public boolean canEncode(int codepoint) {
return NativeConverter.canEncode(converterHandle, codepoint);
}
/**
* Releases the system resources by cleanly closing ICU converter opened
* @exception Throwable exception thrown by super class' finalize method
* @stable ICU 2.4
*/
protected void finalize() throws Throwable {
NativeConverter.closeConverter(converterHandle);
super.finalize();
converterHandle=0;
}
//------------------------------------------
// private utility methods
//------------------------------------------
private final int getArray(ByteBuffer out) {
if(out.hasArray()){
// BEGIN android-changed: take arrayOffset into account
output = out.array();
outEnd = out.arrayOffset() + out.limit();
return out.arrayOffset() + out.position();
// END android-changed
}else{
outEnd = out.remaining();
// BEGIN android-added
if (allocatedOutput == null || (outEnd > allocatedOutput.length)) {
allocatedOutput = new byte[outEnd];
}
output = allocatedOutput;
// END android-added
//since the new
// buffer start position
// is 0
return 0;
}
}
private final int getArray(CharBuffer in) {
if(in.hasArray()){
// BEGIN android-changed: take arrayOffset into account
input = in.array();
inEnd = in.arrayOffset() + in.limit();
return in.arrayOffset() + in.position() + savedInputHeldLen;/*exclude the number fo bytes held in previous conversion*/
// END android-changed
}else{
inEnd = in.remaining();
// BEGIN android-added
if (allocatedInput == null || (inEnd > allocatedInput.length)) {
allocatedInput = new char[inEnd];
}
input = allocatedInput;
// END android-added
// save the current position
int pos = in.position();
in.get(input,0,inEnd);
// reset the position
in.position(pos);
// the start position
// of the new buffer
// is whatever is savedInputLen
return savedInputHeldLen;
}
}
private final void setPosition(ByteBuffer out) {
if (out.hasArray()) {
// in getArray method we accessed the
// array backing the buffer directly and wrote to
// it, so just just set the position and return.
// This is done to avoid the creation of temp array.
// BEGIN android-changed: take arrayOffset into account
out.position(out.position() + data[OUTPUT_OFFSET] - out.arrayOffset());
// END android-changed
} else {
out.put(output, 0, data[OUTPUT_OFFSET]);
}
// BEGIN android-added
// release reference to output array, which may not be ours
output = null;
// END android-added
}
private final void setPosition(CharBuffer in){
// BEGIN android-removed
// // was there input held in the previous invocation of encodeLoop
// // that resulted in output in this invocation?
// if(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0){
// int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen;
// in.position(len);
// savedInputHeldLen = data[INPUT_HELD];
// }else{
// in.position(in.position() + data[INPUT_OFFSET] + savedInputHeldLen);
// savedInputHeldLen = data[INPUT_HELD];
// in.position(in.position() - savedInputHeldLen);
// }
// END android-removed
// BEGIN android-added
// Slightly rewired original code to make it cleaner. Also
// added a fix for the problem where input charatcers got
// lost when invalid characters were encountered. Not sure
// what happens when data[INVALID_CHARS] is > 1, though,
// since we never saw that happening.
int len = in.position() + data[INPUT_OFFSET] + savedInputHeldLen;
len -= data[INVALID_CHARS]; // Otherwise position becomes wrong.
in.position(len);
savedInputHeldLen = data[INPUT_HELD];
// was there input held in the previous invocation of encodeLoop
// that resulted in output in this invocation?
if(!(data[OUTPUT_OFFSET]>0 && savedInputHeldLen>0)){
in.position(in.position() - savedInputHeldLen);
}
// END android-added
// BEGIN android-added
// release reference to input array, which may not be ours
input = null;
// END android-added
}
}