blob: 0dce04d658d93517d17aa97aa1d490ce2130396b [file] [log] [blame]
/*
* Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
*/
package sun.io;
import sun.nio.cs.ext.GB18030;
public class CharToByteGB18030 extends CharToByteConverter
{
private char highHalfZoneCode;
boolean flushed = true;
private final static int GB18030_SINGLE_BYTE = 1;
private final static int GB18030_DOUBLE_BYTE = 2;
private final static int GB18030_FOUR_BYTE = 3;
private static short[] index1;
private static String[] index2;
private int currentState;
public CharToByteGB18030() {
GB18030 nioCoder = new GB18030();
currentState = GB18030_DOUBLE_BYTE;
subBytes = new byte[1];
subBytes[0] = (byte)'?';
index1 = nioCoder.getEncoderIndex1();
index2 = nioCoder.getEncoderIndex2();
}
public int flush(byte[] output, int outStart, int outEnd)
throws MalformedInputException
{
if (highHalfZoneCode != 0) {
highHalfZoneCode = 0;
badInputLength = 0;
throw new MalformedInputException();
}
reset();
flushed = true;
return 0;
}
public void reset() {
byteOff = charOff = 0;
currentState = GB18030_DOUBLE_BYTE;
}
public boolean canConvert(char c) {
// converts all but unpaired surrogates
// and illegal chars, U+FFFE & U+FFFF
if ((c >= 0xd800 && c <=0xdfff) || (c >= 0xfffe))
return false;
else
return true;
}
/**
* Character conversion
*/
public int convert(char[] input, int inOff, int inEnd,
byte[] output, int outOff, int outEnd)
throws UnknownCharacterException, MalformedInputException,
ConversionBufferFullException
{
int linearDiffValue = 0;
int hiByte = 0 , loByte = 0; // low and high order bytes
char inputChar; // Input character to be converted
charOff = inOff;
byteOff = outOff;
int inputSize; // Size of the input
int outputSize; // Size of the output
flushed = false;
if (highHalfZoneCode != 0) {
if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) {
// This is legal UTF16 sequence, so shunt in the high
// surrogate for conversion by convert() loop.
char[] newBuf = new char[inEnd - inOff + 1];
newBuf[0] = highHalfZoneCode;
System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff);
charOff -= 1;
input = newBuf;
inOff = 0;
inEnd = newBuf.length;
highHalfZoneCode = 0;
} else {
// This is illegal UTF16 sequence.
badInputLength = 0;
throw new MalformedInputException();
}
}
// Main encode loop
while (charOff < inEnd) {
inputChar = input[charOff++];
if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
// Is this the last character of the input?
if (charOff + 1 > inEnd) {
highHalfZoneCode = inputChar;
break;
}
char previousChar = inputChar;
inputChar = input[charOff];
// Is there a low surrogate following?
if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
inputSize = 2;
charOff++;
linearDiffValue = ( previousChar - 0xD800) * 0x400 +
( inputChar - 0xDC00) + 0x2E248;
currentState = GB18030_FOUR_BYTE;
} else {
// We have a malformed surrogate pair
badInputLength = 1;
throw new MalformedInputException();
}
}
// Is this an unaccompanied low surrogate?
else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
badInputLength = 1;
throw new MalformedInputException();
}
// Not part of a surrogate
else if (inputChar >= 0x0000 && inputChar <= 0x007F) {
if (byteOff >= outEnd) {
throw new ConversionBufferFullException();
}
currentState = GB18030_SINGLE_BYTE;
output[byteOff++] = (byte) inputChar;
}
else if (inputChar <= 0xA4C6 || inputChar >= 0xE000) {
int outByteVal = getGB18030(index1, index2, inputChar);
if (outByteVal == 0xFFFD ) {
if (subMode) {
if (byteOff >= outEnd) {
throw new ConversionBufferFullException();
} else {
output[byteOff++] = subBytes[0];
continue;
}
} else {
badInputLength = 1;
throw new UnknownCharacterException();
}
}
hiByte = (outByteVal & 0xFF00) >> 8;
loByte = (outByteVal & 0xFF);
linearDiffValue = (hiByte - 0x20) * 256 + loByte;
if (inputChar >= 0xE000 && inputChar < 0xF900)
linearDiffValue += 0x82BD;
else if (inputChar >= 0xF900)
linearDiffValue += 0x93A9;
if (hiByte > 0x80)
currentState = GB18030_DOUBLE_BYTE;
else
currentState = GB18030_FOUR_BYTE;
}
else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) {
linearDiffValue = inputChar - 0x5543;
currentState = GB18030_FOUR_BYTE;
}
else {
badInputLength = 1;
throw new MalformedInputException();
}
if (currentState == GB18030_SINGLE_BYTE)
continue;
if (currentState == GB18030_DOUBLE_BYTE) {
if (byteOff + 2 > outEnd) {
throw new ConversionBufferFullException();
}
output[byteOff++] = (byte)hiByte;
output[byteOff++] = (byte)loByte;
}
else { // Four Byte encoding
if (byteOff + 4 > outEnd) {
throw new ConversionBufferFullException();
}
byte b1, b2, b3, b4;
b4 = (byte)((linearDiffValue % 10) + 0x30);
linearDiffValue /= 10;
b3 = (byte)((linearDiffValue % 126) + 0x81);
linearDiffValue /= 126;
b2 = (byte)((linearDiffValue % 10) + 0x30);
b1 = (byte)((linearDiffValue / 10) + 0x81);
output[byteOff++] = b1;
output[byteOff++] = b2;
output[byteOff++] = b3;
output[byteOff++] = b4;
}
}
// Return number of bytes written to the output buffer.
return byteOff - outOff;
}
/**
* returns the maximum number of bytes needed to convert a char
*/
public int getMaxBytesPerChar() {
return 4;
}
/**
* Return the character set ID
*/
public String getCharacterEncoding() {
return "GB18030";
}
private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) {
int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8;
return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff));
}
}