| /* |
| * Copyright (c) 1996, 1999, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package sun.io; |
| import java.io.*; |
| |
| public class CharToByteISO2022JP extends CharToByteJIS0208 { |
| |
| private static final int ASCII = 0; // ESC ( B |
| private static final int JISX0201_1976 = 1; // ESC ( J |
| private static final int JISX0208_1978 = 2; // ESC $ @ |
| private static final int JISX0208_1983 = 3; // ESC $ B |
| private static final int JISX0201_1976_KANA = 4; // ESC ( I |
| |
| private char highHalfZoneCode; |
| private boolean flushed = true; |
| |
| // JIS is state full encoding, so currentMode keep the |
| // current codeset |
| private int currentMode = ASCII; |
| |
| /** |
| * Bytes for substitute for unmappable input. |
| */ |
| // XXX: Assumes subBytes are ASCII string. Need to change Escape sequence |
| // for other character sets. |
| protected byte[] subBytesEscape = { (byte)0x1b, (byte)0x28, (byte)0x42 }; // ESC ( B |
| protected int subBytesMode = ASCII; |
| |
| public int flush(byte[] output, int outStart, int outEnd) |
| throws MalformedInputException, ConversionBufferFullException |
| { |
| if (highHalfZoneCode != 0) { |
| highHalfZoneCode = 0; |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| |
| if (!flushed && (currentMode != ASCII)) { |
| if (outEnd - outStart < 3) { |
| throw new ConversionBufferFullException(); |
| } |
| output[outStart] = (byte)0x1b; |
| output[outStart + 1] = (byte)0x28; |
| output[outStart + 2] = (byte)0x42; |
| byteOff += 3; |
| byteOff = charOff = 0; |
| flushed = true; |
| currentMode = ASCII; |
| return 3; |
| } |
| return 0; |
| } |
| |
| public int convert(char[] input, int inOff, int inEnd, |
| byte[] output, int outOff, int outEnd) |
| throws MalformedInputException, UnknownCharacterException, |
| ConversionBufferFullException |
| |
| { |
| char inputChar; // Input character to be converted |
| int inputSize; // Size of the input |
| int outputSize; // Size of the output |
| |
| // Buffer for output bytes |
| byte[] tmpArray = new byte[6]; |
| byte[] outputByte; |
| |
| flushed = false; |
| |
| // Make copies of input and output indexes |
| charOff = inOff; |
| byteOff = outOff; |
| |
| if (highHalfZoneCode != 0) { |
| inputChar = highHalfZoneCode; |
| highHalfZoneCode = 0; |
| if (input[inOff] >= 0xdc00 && input[inOff] <= 0xdfff) { |
| // This is legal UTF16 sequence. |
| badInputLength = 1; |
| throw new UnknownCharacterException(); |
| } else { |
| // This is illegal UTF16 sequence. |
| badInputLength = 0; |
| throw new MalformedInputException(); |
| } |
| } |
| |
| // Loop until we run out of input |
| while(charOff < inEnd) { |
| outputByte = tmpArray; |
| int newMode = currentMode; // Trace character mode changing |
| |
| // Get the input character |
| inputChar = input[charOff]; |
| inputSize = 1; |
| outputSize = 1; |
| |
| // Is this a high surrogate? |
| if(inputChar >= '\uD800' && inputChar <= '\uDBFF') { |
| // Is this the last character of the input? |
| if (charOff + 1 >= inEnd) { |
| highHalfZoneCode = inputChar; |
| break; |
| } |
| |
| // Is there a low surrogate following? |
| inputChar = input[charOff + 1]; |
| if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { |
| // We have a valid surrogate pair. Too bad we don't do |
| // surrogates. Is substitution enabled? |
| if (subMode) { |
| if (currentMode != subBytesMode) { |
| System.arraycopy(subBytesEscape, 0, outputByte, 0, |
| subBytesEscape.length); |
| outputSize = subBytesEscape.length; |
| System.arraycopy(subBytes, 0, outputByte, |
| outputSize, subBytes.length); |
| outputSize += subBytes.length; |
| newMode = subBytesMode; |
| } else { |
| outputByte = subBytes; |
| outputSize = subBytes.length; |
| } |
| inputSize = 2; |
| } else { |
| badInputLength = 2; |
| throw new UnknownCharacterException(); |
| } |
| } else { |
| // We have a malformed surrogate pair |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } |
| } |
| |
| // Is this an unaccompanied low surrogate? |
| else if (inputChar >= '\uDC00' && inputChar <= '\uDFFF') { |
| badInputLength = 1; |
| throw new MalformedInputException(); |
| } else { |
| // Not part of a surrogate |
| |
| // Does this map to the Roman range? |
| if (inputChar <= '\u007F') { |
| if (currentMode != ASCII) { |
| outputByte[0] = (byte)0x1b; |
| outputByte[1] = (byte)0x28; |
| outputByte[2] = (byte)0x42; |
| outputByte[3] = (byte)inputChar; |
| outputSize = 4; |
| newMode = ASCII; |
| } else { |
| outputByte[0] = (byte)inputChar; |
| outputSize = 1; |
| } |
| } |
| // Is it a single byte kana? |
| else if (inputChar >= 0xFF61 && inputChar <= 0xFF9F) { |
| if (currentMode != JISX0201_1976_KANA) { |
| outputByte[0] = (byte)0x1b; |
| outputByte[1] = (byte)0x28; |
| outputByte[2] = (byte)0x49; |
| outputByte[3] = (byte)(inputChar - 0xff40); |
| outputSize = 4; |
| newMode = JISX0201_1976_KANA; |
| } else { |
| outputByte[0] = (byte)(inputChar - 0xff40); |
| outputSize = 1; |
| } |
| } |
| // Is it a yen sign? |
| else if (inputChar == '\u00A5') { |
| if (currentMode != JISX0201_1976) { |
| outputByte[0] = (byte)0x1b; |
| outputByte[1] = (byte)0x28; |
| outputByte[2] = (byte)0x4a; |
| outputByte[3] = (byte)0x5c; |
| outputSize = 4; |
| newMode = JISX0201_1976; |
| } else { |
| outputByte[0] = (byte)0x5C; |
| outputSize = 1; |
| } |
| } |
| // Is it a tilde? |
| else if (inputChar == '\u203E') |
| { |
| if (currentMode != JISX0201_1976) { |
| outputByte[0] = (byte)0x1b; |
| outputByte[1] = (byte)0x28; |
| outputByte[2] = (byte)0x4a; |
| outputByte[3] = (byte)0x7e; |
| outputSize = 4; |
| newMode = JISX0201_1976; |
| } else { |
| outputByte[0] = (byte)0x7e; |
| outputSize = 1; |
| } |
| } |
| // Is it a JIS-X-0208 character? |
| else { |
| int index = getNative(inputChar); |
| if (index != 0) { |
| if (currentMode != JISX0208_1983) { |
| outputByte[0] = (byte)0x1b; |
| outputByte[1] = (byte)0x24; |
| outputByte[2] = (byte)0x42; |
| outputByte[3] = (byte)(index >> 8); |
| outputByte[4] = (byte)(index & 0xff); |
| outputSize = 5; |
| newMode = JISX0208_1983; |
| } else { |
| outputByte[0] = (byte)(index >> 8); |
| outputByte[1] = (byte)(index & 0xff); |
| outputSize = 2; |
| } |
| } |
| // It doesn't map to JIS-0208! |
| else { |
| if (subMode) { |
| if (currentMode != subBytesMode) { |
| System.arraycopy(subBytesEscape, 0, outputByte, 0, |
| subBytesEscape.length); |
| outputSize = subBytesEscape.length; |
| System.arraycopy(subBytes, 0, outputByte, |
| outputSize, subBytes.length); |
| outputSize += subBytes.length; |
| newMode = subBytesMode; |
| } else { |
| outputByte = subBytes; |
| outputSize = subBytes.length; |
| } |
| } else { |
| badInputLength = 1; |
| throw new UnknownCharacterException(); |
| } |
| } |
| } |
| } |
| |
| // Is there room in the output buffer? |
| // XXX: The code assumes output buffer can hold at least 5 bytes, |
| // in this coverter case. However, there is no way for apps to |
| // see how many bytes will be necessary for next call. |
| // getMaxBytesPerChar() should be overriden in every subclass of |
| // CharToByteConverter and reflect real value (5 for this). |
| if (byteOff + outputSize > outEnd) |
| throw new ConversionBufferFullException(); |
| |
| // Put the output into the buffer |
| for ( int i = 0 ; i < outputSize ; i++ ) |
| output[byteOff++] = outputByte[i]; |
| |
| // Advance the input pointer |
| charOff += inputSize; |
| |
| // We can successfuly output the characters, changes |
| // current mode. Fix for 4251646. |
| currentMode = newMode; |
| } |
| |
| // return mode ASCII at the end |
| if (currentMode != ASCII){ |
| if (byteOff + 3 > outEnd) |
| throw new ConversionBufferFullException(); |
| |
| output[byteOff++] = 0x1b; |
| output[byteOff++] = 0x28; |
| output[byteOff++] = 0x42; |
| currentMode = ASCII; |
| } |
| |
| // Return the length written to the output buffer |
| return byteOff-outOff; |
| } |
| |
| // Reset |
| public void reset() { |
| highHalfZoneCode = 0; |
| byteOff = charOff = 0; |
| currentMode = ASCII; |
| } |
| |
| /** |
| * returns the maximum number of bytes needed to convert a char |
| */ |
| public int getMaxBytesPerChar() { |
| return 8; |
| } |
| |
| // Return the character set ID |
| public String getCharacterEncoding() { |
| return "ISO2022JP"; |
| } |
| |
| } |