| /* |
| * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| /* |
| */ |
| |
| package sun.io; |
| |
| import sun.nio.cs.ext.GB18030; |
| |
| public class ByteToCharGB18030 extends ByteToCharGB18030DB { |
| |
| private static final int GB18030_SINGLE_BYTE = 1; |
| private static final int GB18030_DOUBLE_BYTE = 2; |
| private static final int GB18030_FOUR_BYTE = 3; |
| private static short[] decoderIndex1; |
| private static String[] decoderIndex2; |
| |
| private int currentState; |
| private int savedSize; |
| private byte[] savedBytes; |
| |
| public ByteToCharGB18030() { |
| super(); |
| GB18030 nioCoder = new GB18030(); |
| savedBytes = new byte[3]; |
| currentState = GB18030_DOUBLE_BYTE; |
| decoderIndex1 = nioCoder.getDecoderIndex1(); |
| decoderIndex2 = nioCoder.getDecoderIndex2(); |
| savedSize = 0; |
| } |
| |
| public short[] getOuter() { |
| return(index1); |
| } |
| |
| public String[] getInner() { |
| return(index2); |
| } |
| |
| public short[] getDBIndex1() { |
| return(super.index1); |
| } |
| |
| public String[] getDBIndex2() { |
| return(super.index2); |
| } |
| |
| public int flush(char [] output, int outStart, int outEnd) |
| throws MalformedInputException |
| { |
| if (savedSize != 0) { |
| savedSize = 0; |
| currentState = GB18030_DOUBLE_BYTE; |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| byteOff = charOff = 0; |
| return 0; |
| } |
| |
| |
| /** |
| * Character conversion |
| */ |
| public int convert(byte[] input, int inOff, int inEnd, |
| char[] output, int outOff, int outEnd) |
| throws UnknownCharacterException, MalformedInputException, |
| ConversionBufferFullException |
| { |
| int inputSize = 0; |
| char outputChar = '\uFFFD'; |
| // readOff keeps the actual buffer's pointer. |
| // byteOff keeps original buffer's pointer. |
| int readOff = byteOff = inOff; |
| |
| if (savedSize != 0) { |
| // Filter illegal bytes when they are detected in saved |
| // partial input from a previous conversion attempt. |
| if (((savedBytes[0] & 0xFF) < 0x81 || savedBytes[0] > 0xFE) || |
| (savedSize > 1 && |
| (savedBytes[1] & 0xFF) < 0x30 ) || |
| (savedSize > 2 && |
| ((savedBytes[2] & 0xFF) < 0x81 || |
| (savedBytes[2] & 0xFF) > 0xFE ))) { |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| |
| byte[] newBuf = new byte[inEnd - inOff + savedSize]; |
| for (int i = 0; i < savedSize; i++) { |
| newBuf[i] = savedBytes[i]; |
| } |
| System.arraycopy(input, inOff, newBuf, savedSize, inEnd - inOff); |
| byteOff -= savedSize; |
| input = newBuf; |
| inOff = 0; |
| inEnd = newBuf.length; |
| savedSize = 0; |
| } |
| |
| charOff = outOff; |
| readOff = inOff; |
| |
| while(readOff < inEnd) { |
| int byte1 = 0 , byte2 = 0, byte3 = 0, byte4 = 0; |
| |
| // Is there room in the output buffer for the result? |
| if (charOff >= outEnd) { |
| throw new ConversionBufferFullException(); |
| } |
| |
| // Get the input byte |
| byte1 = input[readOff++] & 0xFF; |
| inputSize = 1; |
| |
| if ((byte1 & (byte)0x80) == 0){ // US-ASCII range |
| outputChar = (char)byte1; |
| currentState = GB18030_SINGLE_BYTE; |
| } |
| |
| else if (byte1 < 0x81 || byte1 > 0xfe) { |
| if (subMode) |
| outputChar = subChars[0]; |
| else { |
| badInputLength = 1; |
| throw new UnknownCharacterException(); |
| } |
| } |
| else { |
| // Either 2 or 4 byte sequence follows |
| // If an underrun is detected save for later |
| // replay. |
| |
| if (readOff + inputSize > inEnd) { |
| savedBytes[0]=(byte)byte1; |
| savedSize = 1; |
| break; |
| } |
| |
| byte2 = input[readOff++] & 0xFF; |
| inputSize = 2; |
| |
| if (byte2 < 0x30) { |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } |
| else if (byte2 >= 0x30 && byte2 <= 0x39) { |
| currentState = GB18030_FOUR_BYTE; |
| inputSize = 4; |
| |
| if (readOff + 2 > inEnd) { |
| if (readOff + 1 > inEnd) { |
| savedBytes[0] = (byte)byte1; |
| savedBytes[1] = (byte)byte2; |
| savedSize = 2; |
| } |
| else { |
| savedBytes[0] = (byte)byte1; |
| savedBytes[1] = (byte)byte2; |
| savedBytes[2] = input[readOff++]; |
| savedSize = 3; |
| } |
| break; |
| } |
| byte3 = input[readOff++] & 0xFF; |
| if (byte3 < 0x81 || byte3 > 0xfe) { |
| badInputLength = 3; |
| throw new MalformedInputException(); |
| } |
| |
| byte4 = input[readOff++] & 0xFF; |
| |
| if (byte4 < 0x30 || byte4 > 0x39) { |
| badInputLength = 4; |
| throw new MalformedInputException(); |
| } |
| } |
| else if (byte2 == 0x7f || byte2 == 0xff || |
| (byte2 < 0x40 )) { |
| badInputLength = 2; |
| throw new MalformedInputException(); |
| } |
| else |
| currentState = GB18030_DOUBLE_BYTE; |
| } |
| |
| switch (currentState){ |
| case GB18030_SINGLE_BYTE: |
| output[charOff++] = (char)(byte1); |
| break; |
| case GB18030_DOUBLE_BYTE: |
| output[charOff++] = super.getUnicode(byte1, byte2); |
| break; |
| case GB18030_FOUR_BYTE: |
| int offset = (((byte1 - 0x81) * 10 + |
| (byte2 - 0x30)) * 126 + |
| byte3 - 0x81) * 10 + byte4 - 0x30; |
| int hiByte = (offset >>8) & 0xFF; |
| int lowByte = (offset & 0xFF); |
| |
| // Mixture of table lookups and algorithmic calculation |
| // of character values. |
| |
| // BMP Ranges |
| |
| if (offset <= 0x4A62) |
| output[charOff++] = getChar(offset); |
| else if (offset > 0x4A62 && offset <= 0x82BC) |
| output[charOff++] = (char) (offset + 0x5543); |
| else if (offset >= 0x82BD && offset <= 0x830D) |
| output[charOff++] = getChar(offset); |
| else if (offset >= 0x830D && offset <= 0x93A8) |
| output[charOff++] = (char) (offset + 0x6557); |
| else if (offset >= 0x93A9 && offset <= 0x99FB) |
| output[charOff++] = getChar(offset); |
| // Supplemental UCS planes handled via surrogates |
| else if (offset >= 0x2E248 && offset < 0x12E248) { |
| if (offset >= 0x12E248) { |
| if (subMode) |
| return subChars[0]; |
| else { |
| badInputLength = 4; |
| throw new UnknownCharacterException(); |
| } |
| } |
| |
| if (charOff +2 > outEnd) { |
| throw new ConversionBufferFullException(); |
| } |
| offset -= 0x1e248; |
| char highSurr = (char) ((offset - 0x10000) / 0x400 + 0xD800); |
| char lowSurr = (char) ((offset - 0x10000) % 0x400 + 0xDC00); |
| output[charOff++] = highSurr; |
| output[charOff++] = lowSurr; |
| } |
| else { |
| badInputLength = 4; |
| throw new MalformedInputException(); |
| } |
| break; |
| } |
| byteOff += inputSize; |
| } |
| |
| byteOff += savedSize; |
| return charOff - outOff; |
| } |
| |
| public void reset() { |
| byteOff = charOff = 0; |
| currentState = GB18030_DOUBLE_BYTE; |
| savedSize = 0; |
| } |
| |
| public String getCharacterEncoding() { |
| return "GB18030"; |
| } |
| |
| private char getChar(int offset) throws UnknownCharacterException { |
| int byte1 = (offset >>8) & 0xFF; |
| int byte2 = (offset & 0xFF); |
| int start = 0, end = 0xFF; |
| |
| if (((byte1 < 0) || (byte1 > getOuter().length)) |
| || ((byte2 < start) || (byte2 > end))) { |
| if (subMode) |
| return subChars[0]; |
| else { |
| badInputLength = 1; |
| throw new UnknownCharacterException(); |
| } |
| } |
| |
| int n = (decoderIndex1[byte1] & 0xf) * (end - start + 1) + (byte2 - start); |
| return decoderIndex2[decoderIndex1[byte1] >> 4].charAt(n); |
| } |
| } |