| /* |
| * Copyright (c) 1997, 2003, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| package sun.io; |
| |
| import java.io.UnsupportedEncodingException; |
| import sun.nio.cs.ext.JISAutoDetect; |
| |
| public class ByteToCharJISAutoDetect extends ByteToCharConverter { |
| |
| private final static int EUCJP_MASK = 0x01; |
| private final static int SJIS2B_MASK = 0x02; |
| private final static int SJIS1B_MASK = 0x04; |
| private final static int EUCJP_KANA1_MASK = 0x08; |
| private final static int EUCJP_KANA2_MASK = 0x10; |
| private static byte[] maskTable1; |
| private static byte[] maskTable2; |
| |
| private final static int SS2 = 0x8e; |
| private final static int SS3 = 0x8f; |
| |
| private final static JISAutoDetect nioCoder = new JISAutoDetect(); |
| |
| // SJISName is set to either "SJIS" or "MS932" |
| private String SJISName; |
| private String EUCJPName; |
| |
| private String convName = null; |
| private ByteToCharConverter detectedConv = null; |
| private ByteToCharConverter defaultConv = null; |
| |
| public ByteToCharJISAutoDetect() { |
| super(); |
| SJISName = CharacterEncoding.getSJISName(); |
| EUCJPName = CharacterEncoding.getEUCJPName(); |
| defaultConv = new ByteToCharISO8859_1(); |
| defaultConv.subChars = subChars; |
| defaultConv.subMode = subMode; |
| maskTable1 = nioCoder.getByteMask1(); |
| maskTable2 = nioCoder.getByteMask2(); |
| } |
| |
| public int flush(char [] output, int outStart, int outEnd) |
| throws MalformedInputException, ConversionBufferFullException |
| { |
| badInputLength = 0; |
| if(detectedConv != null) |
| return detectedConv.flush(output, outStart, outEnd); |
| else |
| return defaultConv.flush(output, outStart, outEnd); |
| } |
| |
| |
| /** |
| * Character conversion |
| */ |
| public int convert(byte[] input, int inOff, int inEnd, |
| char[] output, int outOff, int outEnd) |
| throws UnknownCharacterException, MalformedInputException, |
| ConversionBufferFullException |
| { |
| int num = 0; |
| |
| charOff = outOff; |
| byteOff = inOff; |
| |
| try { |
| if (detectedConv == null) { |
| int euckana = 0; |
| int ss2count = 0; |
| int firstmask = 0; |
| int secondmask = 0; |
| int cnt; |
| boolean nonAsciiFound = false; |
| |
| for (cnt = inOff; cnt < inEnd; cnt++) { |
| firstmask = 0; |
| secondmask = 0; |
| int byte1 = input[cnt]&0xff; |
| int byte2; |
| |
| // TODO: should check valid escape sequences! |
| if (byte1 == 0x1b) { |
| convName = "ISO2022JP"; |
| break; |
| } |
| |
| // Try to convert all leading ASCII characters. |
| if ((nonAsciiFound == false) && (byte1 < 0x80)) { |
| if (charOff >= outEnd) |
| throw new ConversionBufferFullException(); |
| output[charOff++] = (char) byte1; |
| byteOff++; |
| num++; |
| continue; |
| } |
| |
| // We can no longer convert ASCII. |
| nonAsciiFound = true; |
| |
| firstmask = maskTable1[byte1]; |
| if (byte1 == SS2) |
| ss2count++; |
| |
| if (firstmask != 0) { |
| if (cnt+1 < inEnd) { |
| byte2 = input[++cnt] & 0xff; |
| secondmask = maskTable2[byte2]; |
| int mask = firstmask & secondmask; |
| if (mask == EUCJP_MASK) { |
| convName = EUCJPName; |
| break; |
| } |
| if ((mask == SJIS2B_MASK) || (mask == SJIS1B_MASK) |
| || (nioCoder.canBeSJIS1B(firstmask) && secondmask == 0)) { |
| convName = SJISName; |
| break; |
| } |
| |
| // If the first byte is a SS3 and the third byte |
| // is not an EUC byte, it should be SJIS. |
| // Otherwise, we can't determine it yet, but it's |
| // very likely SJIS. So we don't take the EUCJP CS3 |
| // character boundary. If we tried both |
| // possibilities here, it might be able to be |
| // determined correctly. |
| if ((byte1 == SS3) && nioCoder.canBeEUCJP(secondmask)) { |
| if (cnt+1 < inEnd) { |
| int nextbyte = input[cnt+1] & 0xff; |
| if (! nioCoder.canBeEUCJP(maskTable2[nextbyte])) |
| convName = SJISName; |
| } else |
| convName = SJISName; |
| } |
| if (nioCoder.canBeEUCKana(firstmask, secondmask)) |
| euckana++; |
| } else { |
| if ((firstmask & SJIS1B_MASK) != 0) { |
| convName = SJISName; |
| break; |
| } |
| } |
| } |
| } |
| |
| if (nonAsciiFound && (convName == null)) { |
| if ((euckana > 1) || (ss2count > 1)) |
| convName = EUCJPName; |
| else |
| convName = SJISName; |
| } |
| |
| if (convName != null) { |
| try { |
| detectedConv = ByteToCharConverter.getConverter(convName); |
| detectedConv.subChars = subChars; |
| detectedConv.subMode = subMode; |
| } catch (UnsupportedEncodingException e){ |
| detectedConv = null; |
| convName = null; |
| } |
| } |
| } |
| } catch (ConversionBufferFullException bufferFullException) { |
| throw bufferFullException; |
| } catch (Exception e) { |
| // If we fail to detect the converter needed for any reason, |
| // use the default converter. |
| detectedConv = defaultConv; |
| } |
| |
| // If we've converted all ASCII characters, then return. |
| if (byteOff == inEnd) { |
| return num; |
| } |
| |
| if(detectedConv != null) { |
| try { |
| num += detectedConv.convert(input, inOff + num, inEnd, |
| output, outOff + num, outEnd); |
| } finally { |
| charOff = detectedConv.nextCharIndex(); |
| byteOff = detectedConv.nextByteIndex(); |
| badInputLength = detectedConv.badInputLength; |
| } |
| } else { |
| try { |
| num += defaultConv.convert(input, inOff + num, inEnd, |
| output, outOff + num, outEnd); |
| } finally { |
| charOff = defaultConv.nextCharIndex(); |
| byteOff = defaultConv.nextByteIndex(); |
| badInputLength = defaultConv.badInputLength; |
| } |
| } |
| return num; |
| } |
| |
| public void reset() { |
| if(detectedConv != null) { |
| detectedConv.reset(); |
| detectedConv = null; |
| convName = null; |
| } else |
| defaultConv.reset(); |
| charOff = byteOff = 0; |
| } |
| |
| public String getCharacterEncoding() { |
| return "JISAutoDetect"; |
| } |
| |
| public String toString() { |
| String s = getCharacterEncoding(); |
| if (detectedConv != null) { |
| s += "[" + detectedConv.getCharacterEncoding() + "]"; |
| } else { |
| s += "[unknown]"; |
| } |
| return s; |
| } |
| } |