| /* |
| * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| */ |
| |
| |
| package sun.io; |
| import sun.nio.cs.ext.GB18030; |
| |
| public class CharToByteGB18030 extends CharToByteConverter |
| { |
| |
| private char highHalfZoneCode; |
| boolean flushed = true; |
| |
| private final static int GB18030_SINGLE_BYTE = 1; |
| private final static int GB18030_DOUBLE_BYTE = 2; |
| private final static int GB18030_FOUR_BYTE = 3; |
| private static short[] index1; |
| private static String[] index2; |
| private int currentState; |
| |
| public CharToByteGB18030() { |
| GB18030 nioCoder = new GB18030(); |
| currentState = GB18030_DOUBLE_BYTE; |
| subBytes = new byte[1]; |
| subBytes[0] = (byte)'?'; |
| index1 = nioCoder.getEncoderIndex1(); |
| index2 = nioCoder.getEncoderIndex2(); |
| } |
| |
| public int flush(byte[] output, int outStart, int outEnd) |
| throws MalformedInputException |
| { |
| if (highHalfZoneCode != 0) { |
| highHalfZoneCode = 0; |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| reset(); |
| flushed = true; |
| return 0; |
| } |
| |
| public void reset() { |
| byteOff = charOff = 0; |
| currentState = GB18030_DOUBLE_BYTE; |
| } |
| |
| public boolean canConvert(char c) { |
| // converts all but unpaired surrogates |
| // and illegal chars, U+FFFE & U+FFFF |
| |
| if ((c >= 0xd800 && c <=0xdfff) || (c >= 0xfffe)) |
| return false; |
| else |
| return true; |
| } |
| |
| /** |
| * Character conversion |
| */ |
| public int convert(char[] input, int inOff, int inEnd, |
| byte[] output, int outOff, int outEnd) |
| throws UnknownCharacterException, MalformedInputException, |
| ConversionBufferFullException |
| { |
| int linearDiffValue = 0; |
| int hiByte = 0 , loByte = 0; // low and high order bytes |
| char inputChar; // Input character to be converted |
| charOff = inOff; |
| byteOff = outOff; |
| int inputSize; // Size of the input |
| int outputSize; // Size of the output |
| |
| flushed = false; |
| |
| if (highHalfZoneCode != 0) { |
| if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) { |
| |
| // This is legal UTF16 sequence, so shunt in the high |
| // surrogate for conversion by convert() loop. |
| |
| char[] newBuf = new char[inEnd - inOff + 1]; |
| newBuf[0] = highHalfZoneCode; |
| System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff); |
| charOff -= 1; |
| input = newBuf; |
| inOff = 0; |
| inEnd = newBuf.length; |
| highHalfZoneCode = 0; |
| } else { |
| // This is illegal UTF16 sequence. |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| } |
| |
| // Main encode loop |
| |
| while (charOff < inEnd) { |
| inputChar = input[charOff++]; |
| |
| if(inputChar >= '\uD800' && inputChar <= '\uDBFF') { |
| // Is this the last character of the input? |
| if (charOff + 1 > inEnd) { |
| highHalfZoneCode = inputChar; |
| break; |
| } |
| |
| char previousChar = inputChar; |
| inputChar = input[charOff]; |
| |
| // Is there a low surrogate following? |
| if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { |
| inputSize = 2; |
| charOff++; |
| linearDiffValue = ( previousChar - 0xD800) * 0x400 + |
| ( inputChar - 0xDC00) + 0x2E248; |
| |
| currentState = GB18030_FOUR_BYTE; |
| } else { |
| // We have a malformed surrogate pair |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } |
| } |
| // Is this an unaccompanied low surrogate? |
| else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } |
| |
| // Not part of a surrogate |
| else if (inputChar >= 0x0000 && inputChar <= 0x007F) { |
| if (byteOff >= outEnd) { |
| throw new ConversionBufferFullException(); |
| } |
| currentState = GB18030_SINGLE_BYTE; |
| output[byteOff++] = (byte) inputChar; |
| } |
| else if (inputChar <= 0xA4C6 || inputChar >= 0xE000) { |
| int outByteVal = getGB18030(index1, index2, inputChar); |
| |
| if (outByteVal == 0xFFFD ) { |
| if (subMode) { |
| if (byteOff >= outEnd) { |
| throw new ConversionBufferFullException(); |
| } else { |
| output[byteOff++] = subBytes[0]; |
| continue; |
| } |
| } else { |
| badInputLength = 1; |
| throw new UnknownCharacterException(); |
| } |
| } |
| |
| hiByte = (outByteVal & 0xFF00) >> 8; |
| loByte = (outByteVal & 0xFF); |
| |
| linearDiffValue = (hiByte - 0x20) * 256 + loByte; |
| |
| if (inputChar >= 0xE000 && inputChar < 0xF900) |
| linearDiffValue += 0x82BD; |
| else if (inputChar >= 0xF900) |
| linearDiffValue += 0x93A9; |
| |
| if (hiByte > 0x80) |
| currentState = GB18030_DOUBLE_BYTE; |
| else |
| currentState = GB18030_FOUR_BYTE; |
| } |
| else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) { |
| linearDiffValue = inputChar - 0x5543; |
| currentState = GB18030_FOUR_BYTE; |
| } |
| else { |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } |
| |
| if (currentState == GB18030_SINGLE_BYTE) |
| continue; |
| |
| if (currentState == GB18030_DOUBLE_BYTE) { |
| if (byteOff + 2 > outEnd) { |
| throw new ConversionBufferFullException(); |
| } |
| output[byteOff++] = (byte)hiByte; |
| output[byteOff++] = (byte)loByte; |
| } |
| else { // Four Byte encoding |
| if (byteOff + 4 > outEnd) { |
| throw new ConversionBufferFullException(); |
| } |
| |
| byte b1, b2, b3, b4; |
| |
| b4 = (byte)((linearDiffValue % 10) + 0x30); |
| linearDiffValue /= 10; |
| b3 = (byte)((linearDiffValue % 126) + 0x81); |
| linearDiffValue /= 126; |
| b2 = (byte)((linearDiffValue % 10) + 0x30); |
| b1 = (byte)((linearDiffValue / 10) + 0x81); |
| output[byteOff++] = b1; |
| output[byteOff++] = b2; |
| output[byteOff++] = b3; |
| output[byteOff++] = b4; |
| } |
| } |
| // Return number of bytes written to the output buffer. |
| return byteOff - outOff; |
| } |
| |
| |
| /** |
| * returns the maximum number of bytes needed to convert a char |
| */ |
| public int getMaxBytesPerChar() { |
| return 4; |
| } |
| |
| |
| /** |
| * Return the character set ID |
| */ |
| public String getCharacterEncoding() { |
| return "GB18030"; |
| } |
| |
| private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) { |
| int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8; |
| |
| return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff)); |
| } |
| |
| } |