blob: 97a0e88280748ebc8d516ab7909a28651a2a6c31 [file] [log] [blame]
/*
* Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package sun.io;
import java.io.UnsupportedEncodingException;
import sun.nio.cs.ext.JISAutoDetect;
public class ByteToCharJISAutoDetect extends ByteToCharConverter {
private final static int EUCJP_MASK = 0x01;
private final static int SJIS2B_MASK = 0x02;
private final static int SJIS1B_MASK = 0x04;
private final static int EUCJP_KANA1_MASK = 0x08;
private final static int EUCJP_KANA2_MASK = 0x10;
private static byte[] maskTable1;
private static byte[] maskTable2;
private final static int SS2 = 0x8e;
private final static int SS3 = 0x8f;
private final static JISAutoDetect nioCoder = new JISAutoDetect();
// SJISName is set to either "SJIS" or "MS932"
private String SJISName;
private String EUCJPName;
private String convName = null;
private ByteToCharConverter detectedConv = null;
private ByteToCharConverter defaultConv = null;
public ByteToCharJISAutoDetect() {
super();
SJISName = CharacterEncoding.getSJISName();
EUCJPName = CharacterEncoding.getEUCJPName();
defaultConv = new ByteToCharISO8859_1();
defaultConv.subChars = subChars;
defaultConv.subMode = subMode;
maskTable1 = nioCoder.getByteMask1();
maskTable2 = nioCoder.getByteMask2();
}
public int flush(char [] output, int outStart, int outEnd)
throws MalformedInputException, ConversionBufferFullException
{
badInputLength = 0;
if(detectedConv != null)
return detectedConv.flush(output, outStart, outEnd);
else
return defaultConv.flush(output, outStart, outEnd);
}
/**
* Character conversion
*/
public int convert(byte[] input, int inOff, int inEnd,
char[] output, int outOff, int outEnd)
throws UnknownCharacterException, MalformedInputException,
ConversionBufferFullException
{
int num = 0;
charOff = outOff;
byteOff = inOff;
try {
if (detectedConv == null) {
int euckana = 0;
int ss2count = 0;
int firstmask = 0;
int secondmask = 0;
int cnt;
boolean nonAsciiFound = false;
for (cnt = inOff; cnt < inEnd; cnt++) {
firstmask = 0;
secondmask = 0;
int byte1 = input[cnt]&0xff;
int byte2;
// TODO: should check valid escape sequences!
if (byte1 == 0x1b) {
convName = "ISO2022JP";
break;
}
// Try to convert all leading ASCII characters.
if ((nonAsciiFound == false) && (byte1 < 0x80)) {
if (charOff >= outEnd)
throw new ConversionBufferFullException();
output[charOff++] = (char) byte1;
byteOff++;
num++;
continue;
}
// We can no longer convert ASCII.
nonAsciiFound = true;
firstmask = maskTable1[byte1];
if (byte1 == SS2)
ss2count++;
if (firstmask != 0) {
if (cnt+1 < inEnd) {
byte2 = input[++cnt] & 0xff;
secondmask = maskTable2[byte2];
int mask = firstmask & secondmask;
if (mask == EUCJP_MASK) {
convName = EUCJPName;
break;
}
if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK)
|| (nioCoder.canBeSJIS1B(firstmask) && secondmask == 0)) {
convName = SJISName;
break;
}
// If the first byte is a SS3 and the third byte
// is not an EUC byte, it should be SJIS.
// Otherwise, we can't determine it yet, but it's
// very likely SJIS. So we don't take the EUCJP CS3
// character boundary. If we tried both
// possibilities here, it might be able to be
// determined correctly.
if ((byte1 == SS3) && nioCoder.canBeEUCJP(secondmask)) {
if (cnt+1 < inEnd) {
int nextbyte = input[cnt+1] & 0xff;
if (! nioCoder.canBeEUCJP(maskTable2[nextbyte]))
convName = SJISName;
} else
convName = SJISName;
}
if (nioCoder.canBeEUCKana(firstmask, secondmask))
euckana++;
} else {
if ((firstmask & SJIS1B_MASK) != 0) {
convName = SJISName;
break;
}
}
}
}
if (nonAsciiFound && (convName == null)) {
if ((euckana > 1) || (ss2count > 1))
convName = EUCJPName;
else
convName = SJISName;
}
if (convName != null) {
try {
detectedConv = ByteToCharConverter.getConverter(convName);
detectedConv.subChars = subChars;
detectedConv.subMode = subMode;
} catch (UnsupportedEncodingException e){
detectedConv = null;
convName = null;
}
}
}
} catch (ConversionBufferFullException bufferFullException) {
throw bufferFullException;
} catch (Exception e) {
// If we fail to detect the converter needed for any reason,
// use the default converter.
detectedConv = defaultConv;
}
// If we've converted all ASCII characters, then return.
if (byteOff == inEnd) {
return num;
}
if(detectedConv != null) {
try {
num += detectedConv.convert(input, inOff + num, inEnd,
output, outOff + num, outEnd);
} finally {
charOff = detectedConv.nextCharIndex();
byteOff = detectedConv.nextByteIndex();
badInputLength = detectedConv.badInputLength;
}
} else {
try {
num += defaultConv.convert(input, inOff + num, inEnd,
output, outOff + num, outEnd);
} finally {
charOff = defaultConv.nextCharIndex();
byteOff = defaultConv.nextByteIndex();
badInputLength = defaultConv.badInputLength;
}
}
return num;
}
public void reset() {
if(detectedConv != null) {
detectedConv.reset();
detectedConv = null;
convName = null;
} else
defaultConv.reset();
charOff = byteOff = 0;
}
public String getCharacterEncoding() {
return "JISAutoDetect";
}
public String toString() {
String s = getCharacterEncoding();
if (detectedConv != null) {
s += "[" + detectedConv.getCharacterEncoding() + "]";
} else {
s += "[unknown]";
}
return s;
}
}