ojluni/src/main/java/sun/io/CharToByteGB18030.java - platform/libcore.git - Git at Google

 /*
  * Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */

 /*
  */


 package sun.io;
 import sun.nio.cs.ext.GB18030;

 public class CharToByteGB18030 extends CharToByteConverter
 {

     private char highHalfZoneCode;
     boolean flushed = true;

     private final static int GB18030_SINGLE_BYTE = 1;
     private final static int GB18030_DOUBLE_BYTE = 2;
     private final static int GB18030_FOUR_BYTE = 3;
     private static short[] index1;
     private static String[] index2;
     private int currentState;

     public CharToByteGB18030() {
         GB18030 nioCoder = new GB18030();
         currentState = GB18030_DOUBLE_BYTE;
         subBytes = new byte[1];
         subBytes[0] = (byte)'?';
         index1 = nioCoder.getEncoderIndex1();
         index2 = nioCoder.getEncoderIndex2();
     }

     public int flush(byte[] output, int outStart, int outEnd)
         throws MalformedInputException
     {
         if (highHalfZoneCode != 0) {
             highHalfZoneCode = 0;
             badInputLength = 0;
             throw new MalformedInputException();
         }
         reset();
         flushed = true;
         return 0;
     }

     public void reset() {
         byteOff = charOff = 0;
         currentState = GB18030_DOUBLE_BYTE;
     }

     public boolean canConvert(char c) {
         // converts all but unpaired surrogates
         // and illegal chars, U+FFFE & U+FFFF

         if ((c >= 0xd800 && c <=0xdfff) || (c >= 0xfffe))
             return false;
         else
             return true;
     }

     /**
      * Character conversion
      */
     public int convert(char[] input, int inOff, int inEnd,
                        byte[] output, int outOff, int outEnd)
         throws UnknownCharacterException, MalformedInputException,
                ConversionBufferFullException
     {
         int linearDiffValue = 0;
         int hiByte = 0 , loByte = 0;  // low and high order bytes
         char inputChar;  // Input character to be converted
         charOff = inOff;
         byteOff = outOff;
         int inputSize;  // Size of the input
         int outputSize; // Size of the output

         flushed = false;

         if (highHalfZoneCode != 0) {
             if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) {

                 // This is legal UTF16 sequence, so shunt in the high
                 // surrogate for conversion by convert() loop.

                 char[] newBuf = new char[inEnd - inOff + 1];
                 newBuf[0] = highHalfZoneCode;
                 System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff);
                 charOff -= 1;
                 input = newBuf;
                 inOff = 0;
                 inEnd = newBuf.length;
                 highHalfZoneCode = 0;
             } else {
                 // This is illegal UTF16 sequence.
                 badInputLength = 0;
                 throw new MalformedInputException();
             }
         }

         // Main encode loop

         while (charOff < inEnd) {
             inputChar = input[charOff++];

             if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
                 // Is this the last character of the input?
                 if (charOff + 1 > inEnd) {
                     highHalfZoneCode = inputChar;
                     break;
                 }

                 char previousChar = inputChar;
                 inputChar = input[charOff];

                 // Is there a low surrogate following?
                 if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
                     inputSize = 2;
                     charOff++;
                     linearDiffValue = ( previousChar - 0xD800) * 0x400 +
                                 ( inputChar - 0xDC00) + 0x2E248;

                     currentState = GB18030_FOUR_BYTE;
                 } else {
                     // We have a malformed surrogate pair
                     badInputLength = 1;
                     throw new MalformedInputException();
                 }
             }
             // Is this an unaccompanied low surrogate?
             else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
                 badInputLength = 1;
                 throw new MalformedInputException();
             }

             // Not part of a surrogate
             else if (inputChar >= 0x0000 && inputChar <= 0x007F) {
                 if (byteOff >= outEnd) {
                    throw new ConversionBufferFullException();
                 }
                 currentState = GB18030_SINGLE_BYTE;
                 output[byteOff++] = (byte) inputChar;
             }
             else if (inputChar <= 0xA4C6 || inputChar >= 0xE000) {
                 int outByteVal = getGB18030(index1, index2, inputChar);

                 if (outByteVal == 0xFFFD ) {
                     if (subMode) {
                         if (byteOff >= outEnd) {
                            throw new ConversionBufferFullException();
                         } else {
                             output[byteOff++] = subBytes[0];
                             continue;
                         }
                     } else {
                         badInputLength = 1;
                         throw new UnknownCharacterException();
                     }
                 }

                 hiByte = (outByteVal & 0xFF00) >> 8;
                 loByte = (outByteVal & 0xFF);

                 linearDiffValue = (hiByte - 0x20) * 256 + loByte;

                 if (inputChar >= 0xE000 && inputChar < 0xF900)
                         linearDiffValue += 0x82BD;
                 else if (inputChar >= 0xF900)
                         linearDiffValue += 0x93A9;

                 if (hiByte > 0x80)
                      currentState = GB18030_DOUBLE_BYTE;
                 else
                      currentState = GB18030_FOUR_BYTE;
             }
             else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) {
                 linearDiffValue = inputChar - 0x5543;
                 currentState = GB18030_FOUR_BYTE;
             }
             else {
                 badInputLength = 1;
                 throw new MalformedInputException();
             }

             if (currentState == GB18030_SINGLE_BYTE)
                 continue;

             if (currentState == GB18030_DOUBLE_BYTE) {
                 if (byteOff + 2 > outEnd) {
                     throw new ConversionBufferFullException();
                 }
                 output[byteOff++] = (byte)hiByte;
                 output[byteOff++] = (byte)loByte;
             }
             else { // Four Byte encoding
                 if (byteOff + 4 > outEnd) {
                     throw new ConversionBufferFullException();
                 }

                 byte b1, b2, b3, b4;

                 b4 = (byte)((linearDiffValue % 10) + 0x30);
                 linearDiffValue /= 10;
                 b3 = (byte)((linearDiffValue % 126) + 0x81);
                 linearDiffValue /= 126;
                 b2 = (byte)((linearDiffValue % 10) + 0x30);
                 b1 = (byte)((linearDiffValue / 10) + 0x81);
                 output[byteOff++] = b1;
                 output[byteOff++] = b2;
                 output[byteOff++] = b3;
                 output[byteOff++] = b4;
             }
         }
         // Return number of bytes written to the output buffer.
         return byteOff - outOff;
     }


     /**
      * returns the maximum number of bytes needed to convert a char
      */
     public int getMaxBytesPerChar() {
         return 4;
     }


     /**
      * Return the character set ID
      */
     public String getCharacterEncoding() {
         return "GB18030";
     }

     private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) {
         int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8;

         return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff));
     }

 }
	/*
	* Copyright (c) 2001, 2003, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/

	/*
	*/


	package sun.io;
	import sun.nio.cs.ext.GB18030;

	public class CharToByteGB18030 extends CharToByteConverter
	{

	private char highHalfZoneCode;
	boolean flushed = true;

	private final static int GB18030_SINGLE_BYTE = 1;
	private final static int GB18030_DOUBLE_BYTE = 2;
	private final static int GB18030_FOUR_BYTE = 3;
	private static short[] index1;
	private static String[] index2;
	private int currentState;

	public CharToByteGB18030() {
	GB18030 nioCoder = new GB18030();
	currentState = GB18030_DOUBLE_BYTE;
	subBytes = new byte[1];
	subBytes[0] = (byte)'?';
	index1 = nioCoder.getEncoderIndex1();
	index2 = nioCoder.getEncoderIndex2();
	}

	public int flush(byte[] output, int outStart, int outEnd)
	throws MalformedInputException
	{
	if (highHalfZoneCode != 0) {
	highHalfZoneCode = 0;
	badInputLength = 0;
	throw new MalformedInputException();
	}
	reset();
	flushed = true;
	return 0;
	}

	public void reset() {
	byteOff = charOff = 0;
	currentState = GB18030_DOUBLE_BYTE;
	}

	public boolean canConvert(char c) {
	// converts all but unpaired surrogates
	// and illegal chars, U+FFFE & U+FFFF

	if ((c >= 0xd800 && c <=0xdfff) \|\| (c >= 0xfffe))
	return false;
	else
	return true;
	}

	/**
	* Character conversion
	*/
	public int convert(char[] input, int inOff, int inEnd,
	byte[] output, int outOff, int outEnd)
	throws UnknownCharacterException, MalformedInputException,
	ConversionBufferFullException
	{
	int linearDiffValue = 0;
	int hiByte = 0 , loByte = 0; // low and high order bytes
	char inputChar; // Input character to be converted
	charOff = inOff;
	byteOff = outOff;
	int inputSize; // Size of the input
	int outputSize; // Size of the output

	flushed = false;

	if (highHalfZoneCode != 0) {
	if (input[inOff] >= 0xDC00 && input[inOff] <= 0xDFFF) {

	// This is legal UTF16 sequence, so shunt in the high
	// surrogate for conversion by convert() loop.

	char[] newBuf = new char[inEnd - inOff + 1];
	newBuf[0] = highHalfZoneCode;
	System.arraycopy(input, inOff, newBuf, 1, inEnd - inOff);
	charOff -= 1;
	input = newBuf;
	inOff = 0;
	inEnd = newBuf.length;
	highHalfZoneCode = 0;
	} else {
	// This is illegal UTF16 sequence.
	badInputLength = 0;
	throw new MalformedInputException();
	}
	}

	// Main encode loop

	while (charOff < inEnd) {
	inputChar = input[charOff++];

	if(inputChar >= '\uD800' && inputChar <= '\uDBFF') {
	// Is this the last character of the input?
	if (charOff + 1 > inEnd) {
	highHalfZoneCode = inputChar;
	break;
	}

	char previousChar = inputChar;
	inputChar = input[charOff];

	// Is there a low surrogate following?
	if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
	inputSize = 2;
	charOff++;
	linearDiffValue = ( previousChar - 0xD800) * 0x400 +
	( inputChar - 0xDC00) + 0x2E248;

	currentState = GB18030_FOUR_BYTE;
	} else {
	// We have a malformed surrogate pair
	badInputLength = 1;
	throw new MalformedInputException();
	}
	}
	// Is this an unaccompanied low surrogate?
	else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') {
	badInputLength = 1;
	throw new MalformedInputException();
	}

	// Not part of a surrogate
	else if (inputChar >= 0x0000 && inputChar <= 0x007F) {
	if (byteOff >= outEnd) {
	throw new ConversionBufferFullException();
	}
	currentState = GB18030_SINGLE_BYTE;
	output[byteOff++] = (byte) inputChar;
	}
	else if (inputChar <= 0xA4C6 \|\| inputChar >= 0xE000) {
	int outByteVal = getGB18030(index1, index2, inputChar);

	if (outByteVal == 0xFFFD ) {
	if (subMode) {
	if (byteOff >= outEnd) {
	throw new ConversionBufferFullException();
	} else {
	output[byteOff++] = subBytes[0];
	continue;
	}
	} else {
	badInputLength = 1;
	throw new UnknownCharacterException();
	}
	}

	hiByte = (outByteVal & 0xFF00) >> 8;
	loByte = (outByteVal & 0xFF);

	linearDiffValue = (hiByte - 0x20) * 256 + loByte;

	if (inputChar >= 0xE000 && inputChar < 0xF900)
	linearDiffValue += 0x82BD;
	else if (inputChar >= 0xF900)
	linearDiffValue += 0x93A9;

	if (hiByte > 0x80)
	currentState = GB18030_DOUBLE_BYTE;
	else
	currentState = GB18030_FOUR_BYTE;
	}
	else if (inputChar >= 0xA4C7 && inputChar <= 0xD7FF) {
	linearDiffValue = inputChar - 0x5543;
	currentState = GB18030_FOUR_BYTE;
	}
	else {
	badInputLength = 1;
	throw new MalformedInputException();
	}

	if (currentState == GB18030_SINGLE_BYTE)
	continue;

	if (currentState == GB18030_DOUBLE_BYTE) {
	if (byteOff + 2 > outEnd) {
	throw new ConversionBufferFullException();
	}
	output[byteOff++] = (byte)hiByte;
	output[byteOff++] = (byte)loByte;
	}
	else { // Four Byte encoding
	if (byteOff + 4 > outEnd) {
	throw new ConversionBufferFullException();
	}

	byte b1, b2, b3, b4;

	b4 = (byte)((linearDiffValue % 10) + 0x30);
	linearDiffValue /= 10;
	b3 = (byte)((linearDiffValue % 126) + 0x81);
	linearDiffValue /= 126;
	b2 = (byte)((linearDiffValue % 10) + 0x30);
	b1 = (byte)((linearDiffValue / 10) + 0x81);
	output[byteOff++] = b1;
	output[byteOff++] = b2;
	output[byteOff++] = b3;
	output[byteOff++] = b4;
	}
	}
	// Return number of bytes written to the output buffer.
	return byteOff - outOff;
	}


	/**
	* returns the maximum number of bytes needed to convert a char
	*/
	public int getMaxBytesPerChar() {
	return 4;
	}


	/**
	* Return the character set ID
	*/
	public String getCharacterEncoding() {
	return "GB18030";
	}

	private int getGB18030(short[] outerIndex, String[] innerIndex, char ch) {
	int offset = outerIndex[((ch & 0xff00) >> 8 )] << 8;

	return innerIndex[offset >> 12].charAt((offset & 0xfff) + (ch & 0xff));
	}

	}