src/share/classes/sun/io/CharToByteUTF8.java - toolchain/jdk/jdk9_jdk - Git at Google

 /*
  * Copyright (c) 1996, 1997, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.  Oracle designates this
  * particular file as subject to the "Classpath" exception as provided
  * by Oracle in the LICENSE file that accompanied this code.
  *
  * This code is distributed in the hope that it will be useful, but WITHOUT
  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  * version 2 for more details (a copy is included in the LICENSE file that
  * accompanied this code).
  *
  * You should have received a copy of the GNU General Public License version
  * 2 along with this work; if not, write to the Free Software Foundation,
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  * or visit www.oracle.com if you need additional information or have any
  * questions.
  */
 package sun.io;


 /**
  * UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
  * It's represented like below.
  *
  * # Bits   Bit pattern
  * 1    7   0xxxxxxx
  * 2   11   110xxxxx 10xxxxxx
  * 3   16   1110xxxx 10xxxxxx 10xxxxxx
  * 4   21   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  * 5   26   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  * 6   31   1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  *
  *     UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
  */

 public class CharToByteUTF8 extends CharToByteConverter {

     private char highHalfZoneCode;

     public int flush(byte[] output, int outStart, int outEnd)
         throws MalformedInputException
     {
         if (highHalfZoneCode != 0) {
             highHalfZoneCode = 0;
             badInputLength = 0;
             throw new MalformedInputException();
         }
         byteOff = charOff = 0;
         return 0;
     }

     /**
      * Character conversion
      */
     public int convert(char[] input, int inOff, int inEnd,
                        byte[] output, int outOff, int outEnd)
         throws ConversionBufferFullException, MalformedInputException
     {
         char inputChar;
         byte[] outputByte = new byte[6];
         int inputSize;
         int outputSize;

         charOff = inOff;
         byteOff = outOff;

         if (highHalfZoneCode != 0) {
             inputChar = highHalfZoneCode;
             highHalfZoneCode = 0;
             if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
                 // This is legal UTF16 sequence.
                 int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
                     + (input[inOff] - 0xdc00) + 0x10000;
                 output[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
                 output[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
                 output[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
                 output[3] = (byte)(0x80 | (ucs4 & 0x3f));
                 charOff++;
                 highHalfZoneCode = 0;
             } else {
                 // This is illegal UTF16 sequence.
                 badInputLength = 0;
                 throw new MalformedInputException();
             }
         }

         while(charOff < inEnd) {
             inputChar = input[charOff];
             if (inputChar < 0x80) {
                 outputByte[0] = (byte)inputChar;
                 inputSize = 1;
                 outputSize = 1;
             } else if (inputChar < 0x800) {
                 outputByte[0] = (byte)(0xc0 | ((inputChar >> 6) & 0x1f));
                 outputByte[1] = (byte)(0x80 | (inputChar & 0x3f));
                 inputSize = 1;
                 outputSize = 2;
             } else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
                 // this is <high-half zone code> in UTF-16
                 if (charOff + 1 >= inEnd) {
                     highHalfZoneCode = inputChar;
                     break;
                 }
                 // check next char is valid <low-half zone code>
                 char lowChar = input[charOff + 1];
                 if (lowChar < 0xdc00 || lowChar > 0xdfff) {
                     badInputLength = 1;
                     throw new MalformedInputException();
                 }
                 int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
                     + 0x10000;
                 outputByte[0] = (byte)(0xf0 | ((ucs4 >> 18)) & 0x07);
                 outputByte[1] = (byte)(0x80 | ((ucs4 >> 12) & 0x3f));
                 outputByte[2] = (byte)(0x80 | ((ucs4 >> 6) & 0x3f));
                 outputByte[3] = (byte)(0x80 | (ucs4 & 0x3f));
                 outputSize = 4;
                 inputSize = 2;
             } else {
                 outputByte[0] = (byte)(0xe0 | ((inputChar >> 12)) & 0x0f);
                 outputByte[1] = (byte)(0x80 | ((inputChar >> 6) & 0x3f));
                 outputByte[2] = (byte)(0x80 | (inputChar & 0x3f));
                 inputSize = 1;
                 outputSize = 3;
             }
             if (byteOff + outputSize > outEnd) {
                 throw new ConversionBufferFullException();
             }
             for (int i = 0; i < outputSize; i++) {
                 output[byteOff++] = outputByte[i];
             }
             charOff += inputSize;
         }
         return byteOff - outOff;
     }

     public boolean canConvert(char ch) {
         return true;
     }

     public int getMaxBytesPerChar() {
         return 3;
     }

     public void reset() {
         byteOff = charOff = 0;
         highHalfZoneCode = 0;
     }

     public String getCharacterEncoding() {
         return "UTF8";
     }
 }
	/*
	* Copyright (c) 1996, 1997, Oracle and/or its affiliates. All rights reserved.
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
	*
	* This code is free software; you can redistribute it and/or modify it
	* under the terms of the GNU General Public License version 2 only, as
	* published by the Free Software Foundation. Oracle designates this
	* particular file as subject to the "Classpath" exception as provided
	* by Oracle in the LICENSE file that accompanied this code.
	*
	* This code is distributed in the hope that it will be useful, but WITHOUT
	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
	* version 2 for more details (a copy is included in the LICENSE file that
	* accompanied this code).
	*
	* You should have received a copy of the GNU General Public License version
	* 2 along with this work; if not, write to the Free Software Foundation,
	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
	*
	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
	* or visit www.oracle.com if you need additional information or have any
	* questions.
	*/
	package sun.io;


	/**
	* UCS2 (UTF16) -> UCS Transformation Format 8 (UTF-8) converter
	* It's represented like below.
	*
	* # Bits Bit pattern
	* 1 7 0xxxxxxx
	* 2 11 110xxxxx 10xxxxxx
	* 3 16 1110xxxx 10xxxxxx 10xxxxxx
	* 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
	* 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	* 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
	*
	* UCS2 uses 1-3 / UTF16 uses 1-4 / UCS4 uses 1-6
	*/

	public class CharToByteUTF8 extends CharToByteConverter {

	private char highHalfZoneCode;

	public int flush(byte[] output, int outStart, int outEnd)
	throws MalformedInputException
	{
	if (highHalfZoneCode != 0) {
	highHalfZoneCode = 0;
	badInputLength = 0;
	throw new MalformedInputException();
	}
	byteOff = charOff = 0;
	return 0;
	}

	/**
	* Character conversion
	*/
	public int convert(char[] input, int inOff, int inEnd,
	byte[] output, int outOff, int outEnd)
	throws ConversionBufferFullException, MalformedInputException
	{
	char inputChar;
	byte[] outputByte = new byte[6];
	int inputSize;
	int outputSize;

	charOff = inOff;
	byteOff = outOff;

	if (highHalfZoneCode != 0) {
	inputChar = highHalfZoneCode;
	highHalfZoneCode = 0;
	if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) {
	// This is legal UTF16 sequence.
	int ucs4 = (highHalfZoneCode - 0xd800) * 0x400
	+ (input[inOff] - 0xdc00) + 0x10000;
	output[0] = (byte)(0xf0 \| ((ucs4 >> 18)) & 0x07);
	output[1] = (byte)(0x80 \| ((ucs4 >> 12) & 0x3f));
	output[2] = (byte)(0x80 \| ((ucs4 >> 6) & 0x3f));
	output[3] = (byte)(0x80 \| (ucs4 & 0x3f));
	charOff++;
	highHalfZoneCode = 0;
	} else {
	// This is illegal UTF16 sequence.
	badInputLength = 0;
	throw new MalformedInputException();
	}
	}

	while(charOff < inEnd) {
	inputChar = input[charOff];
	if (inputChar < 0x80) {
	outputByte[0] = (byte)inputChar;
	inputSize = 1;
	outputSize = 1;
	} else if (inputChar < 0x800) {
	outputByte[0] = (byte)(0xc0 \| ((inputChar >> 6) & 0x1f));
	outputByte[1] = (byte)(0x80 \| (inputChar & 0x3f));
	inputSize = 1;
	outputSize = 2;
	} else if (inputChar >= 0xd800 && inputChar <= 0xdbff) {
	// this is <high-half zone code> in UTF-16
	if (charOff + 1 >= inEnd) {
	highHalfZoneCode = inputChar;
	break;
	}
	// check next char is valid <low-half zone code>
	char lowChar = input[charOff + 1];
	if (lowChar < 0xdc00 \|\| lowChar > 0xdfff) {
	badInputLength = 1;
	throw new MalformedInputException();
	}
	int ucs4 = (inputChar - 0xd800) * 0x400 + (lowChar - 0xdc00)
	+ 0x10000;
	outputByte[0] = (byte)(0xf0 \| ((ucs4 >> 18)) & 0x07);
	outputByte[1] = (byte)(0x80 \| ((ucs4 >> 12) & 0x3f));
	outputByte[2] = (byte)(0x80 \| ((ucs4 >> 6) & 0x3f));
	outputByte[3] = (byte)(0x80 \| (ucs4 & 0x3f));
	outputSize = 4;
	inputSize = 2;
	} else {
	outputByte[0] = (byte)(0xe0 \| ((inputChar >> 12)) & 0x0f);
	outputByte[1] = (byte)(0x80 \| ((inputChar >> 6) & 0x3f));
	outputByte[2] = (byte)(0x80 \| (inputChar & 0x3f));
	inputSize = 1;
	outputSize = 3;
	}
	if (byteOff + outputSize > outEnd) {
	throw new ConversionBufferFullException();
	}
	for (int i = 0; i < outputSize; i++) {
	output[byteOff++] = outputByte[i];
	}
	charOff += inputSize;
	}
	return byteOff - outOff;
	}

	public boolean canConvert(char ch) {
	return true;
	}

	public int getMaxBytesPerChar() {
	return 3;
	}

	public void reset() {
	byteOff = charOff = 0;
	highHalfZoneCode = 0;
	}

	public String getCharacterEncoding() {
	return "UTF8";
	}
	}