blob: 07c87af26aeeffa75f5772a175f51dc0e99e7de7 [file] [log] [blame]
/*
* Copyright (c) 2001, 2005, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package sun.nio.cs.ext;
import java.util.Collections;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.nio.charset.*;
final class CompoundTextSupport {
private static final class ControlSequence {
final int hash;
final byte[] escSequence;
final byte[] encoding;
ControlSequence(byte[] escSequence) {
this(escSequence, null);
}
ControlSequence(byte[] escSequence, byte[] encoding) {
if (escSequence == null) {
throw new NullPointerException();
}
this.escSequence = escSequence;
this.encoding = encoding;
int hash = 0;
int length = escSequence.length;
for (int i = 0; i < escSequence.length; i++) {
hash += (((int)escSequence[i]) & 0xff) << (i % 4);
}
if (encoding != null) {
for (int i = 0; i < encoding.length; i++) {
hash += (((int)encoding[i]) & 0xff) << (i % 4);
}
length += 2 /* M L */ + encoding.length + 1 /* 0x02 */;
}
this.hash = hash;
if (MAX_CONTROL_SEQUENCE_LEN < length) {
MAX_CONTROL_SEQUENCE_LEN = length;
}
}
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (!(obj instanceof ControlSequence)) {
return false;
}
ControlSequence rhs = (ControlSequence)obj;
if (escSequence != rhs.escSequence) {
if (escSequence.length != rhs.escSequence.length) {
return false;
}
for (int i = 0; i < escSequence.length; i++) {
if (escSequence[i] != rhs.escSequence[i]) {
return false;
}
}
}
if (encoding != rhs.encoding) {
if (encoding == null || rhs.encoding == null ||
encoding.length != rhs.encoding.length)
{
return false;
}
for (int i = 0; i < encoding.length; i++) {
if (encoding[i] != rhs.encoding[i]) {
return false;
}
}
}
return true;
}
public int hashCode() {
return hash;
}
ControlSequence concatenate(ControlSequence rhs) {
if (encoding != null) {
throw new IllegalArgumentException
("cannot concatenate to a non-standard charset escape " +
"sequence");
}
int len = escSequence.length + rhs.escSequence.length;
byte[] newEscSequence = new byte[len];
System.arraycopy(escSequence, 0, newEscSequence, 0,
escSequence.length);
System.arraycopy(rhs.escSequence, 0, newEscSequence,
escSequence.length, rhs.escSequence.length);
return new ControlSequence(newEscSequence, rhs.encoding);
}
}
static int MAX_CONTROL_SEQUENCE_LEN;
/**
* Maps a GL or GR escape sequence to an encoding.
*/
private static final Map sequenceToEncodingMap;
/**
* Indicates whether a particular encoding wants the high bit turned on
* or off.
*/
private static final Map highBitsMap;
/**
* Maps an encoding to an escape sequence. Rather than manage two
* converters in CharToByteCOMPOUND_TEXT, we output escape sequences which
* modify both GL and GR if necessary. This makes the output slightly less
* efficient, but our code much simpler.
*/
private static final Map encodingToSequenceMap;
/**
* The keys of 'encodingToSequenceMap', sorted in preferential order.
*/
private static final List encodings;
static {
HashMap tSequenceToEncodingMap = new HashMap(33, 1.0f);
HashMap tHighBitsMap = new HashMap(31, 1.0f);
HashMap tEncodingToSequenceMap = new HashMap(21, 1.0f);
ArrayList tEncodings = new ArrayList(21);
if (!(isEncodingSupported("US-ASCII") &&
isEncodingSupported("ISO-8859-1")))
{
throw new ExceptionInInitializerError
("US-ASCII and ISO-8859-1 unsupported");
}
ControlSequence leftAscii = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x28, 0x42 });
tSequenceToEncodingMap.put(leftAscii, "US-ASCII");
tHighBitsMap.put(leftAscii, Boolean.FALSE);
{
ControlSequence rightAscii = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x29, 0x42 });
tSequenceToEncodingMap.put(rightAscii, "US-ASCII");
tHighBitsMap.put(rightAscii, Boolean.FALSE);
}
{
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x41 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-1");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-1", fullSet);
tEncodings.add("ISO-8859-1");
}
if (isEncodingSupported("ISO-8859-2")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x42 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-2");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-2", fullSet);
tEncodings.add("ISO-8859-2");
}
if (isEncodingSupported("ISO-8859-3")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x43 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-3");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-3", fullSet);
tEncodings.add("ISO-8859-3");
}
if (isEncodingSupported("ISO-8859-4")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x44 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-4");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-4", fullSet);
tEncodings.add("ISO-8859-4");
}
if (isEncodingSupported("ISO-8859-5")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x4C });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-5");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-5", fullSet);
tEncodings.add("ISO-8859-5");
}
if (isEncodingSupported("ISO-8859-6")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x47 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-6");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-6", fullSet);
tEncodings.add("ISO-8859-6");
}
if (isEncodingSupported("ISO-8859-7")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x46 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-7");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-7", fullSet);
tEncodings.add("ISO-8859-7");
}
if (isEncodingSupported("ISO-8859-8")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x48 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-8");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-8", fullSet);
tEncodings.add("ISO-8859-8");
}
if (isEncodingSupported("ISO-8859-9")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x4D });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-9");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-9", fullSet);
tEncodings.add("ISO-8859-9");
}
if (isEncodingSupported("JIS_X0201")) {
ControlSequence glLeft = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x28, 0x4A });
ControlSequence glRight = // high bit off, turn on
new ControlSequence(new byte[] { 0x1B, 0x28, 0x49 });
ControlSequence grLeft = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x29, 0x4A });
ControlSequence grRight = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x29, 0x49 });
tSequenceToEncodingMap.put(glLeft, "JIS_X0201");
tSequenceToEncodingMap.put(glRight, "JIS_X0201");
tSequenceToEncodingMap.put(grLeft, "JIS_X0201");
tSequenceToEncodingMap.put(grRight, "JIS_X0201");
tHighBitsMap.put(glLeft, Boolean.FALSE);
tHighBitsMap.put(glRight, Boolean.TRUE);
tHighBitsMap.put(grLeft, Boolean.FALSE);
tHighBitsMap.put(grRight, Boolean.TRUE);
ControlSequence fullSet = glLeft.concatenate(grRight);
tEncodingToSequenceMap.put("JIS_X0201", fullSet);
tEncodings.add("JIS_X0201");
}
if (isEncodingSupported("X11GB2312")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x41 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x41 });
tSequenceToEncodingMap.put(leftHalf, "X11GB2312");
tSequenceToEncodingMap.put(rightHalf, "X11GB2312");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("X11GB2312", leftHalf);
tEncodings.add("X11GB2312");
}
if (isEncodingSupported("x-JIS0208")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x42 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x42 });
tSequenceToEncodingMap.put(leftHalf, "x-JIS0208");
tSequenceToEncodingMap.put(rightHalf, "x-JIS0208");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("x-JIS0208", leftHalf);
tEncodings.add("x-JIS0208");
}
if (isEncodingSupported("X11KSC5601")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x43 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x43 });
tSequenceToEncodingMap.put(leftHalf, "X11KSC5601");
tSequenceToEncodingMap.put(rightHalf, "X11KSC5601");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("X11KSC5601", leftHalf);
tEncodings.add("X11KSC5601");
}
// Encodings not listed in Compound Text Encoding spec
// Esc seq: -b
if (isEncodingSupported("ISO-8859-15")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x62 });
tSequenceToEncodingMap.put(rightHalf, "ISO-8859-15");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("ISO-8859-15", fullSet);
tEncodings.add("ISO-8859-15");
}
// Esc seq: -T
if (isEncodingSupported("TIS-620")) {
ControlSequence rightHalf = // high bit on, leave on
new ControlSequence(new byte[] { 0x1B, 0x2D, 0x54 });
tSequenceToEncodingMap.put(rightHalf, "TIS-620");
tHighBitsMap.put(rightHalf, Boolean.TRUE);
ControlSequence fullSet = leftAscii.concatenate(rightHalf);
tEncodingToSequenceMap.put("TIS-620", fullSet);
tEncodings.add("TIS-620");
}
if (isEncodingSupported("JIS_X0212-1990")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x44 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x44 });
tSequenceToEncodingMap.put(leftHalf, "JIS_X0212-1990");
tSequenceToEncodingMap.put(rightHalf, "JIS_X0212-1990");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("JIS_X0212-1990", leftHalf);
tEncodings.add("JIS_X0212-1990");
}
if (isEncodingSupported("X11CNS11643P1")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x47 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x47 });
tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P1");
tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P1");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("X11CNS11643P1", leftHalf);
tEncodings.add("X11CNS11643P1");
}
if (isEncodingSupported("X11CNS11643P2")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x48 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x48 });
tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P2");
tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P2");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("X11CNS11643P2", leftHalf);
tEncodings.add("X11CNS11643P2");
}
if (isEncodingSupported("X11CNS11643P3")) {
ControlSequence leftHalf = // high bit off, leave off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x28, 0x49 });
ControlSequence rightHalf = // high bit on, turn off
new ControlSequence(new byte[] { 0x1B, 0x24, 0x29, 0x49 });
tSequenceToEncodingMap.put(leftHalf, "X11CNS11643P3");
tSequenceToEncodingMap.put(rightHalf, "X11CNS11643P3");
tHighBitsMap.put(leftHalf, Boolean.FALSE);
tHighBitsMap.put(rightHalf, Boolean.FALSE);
tEncodingToSequenceMap.put("X11CNS11643P3", leftHalf);
tEncodings.add("X11CNS11643P3");
}
// Esc seq: %/2??SUN-KSC5601.1992-3
if (isEncodingSupported("x-Johab")) {
// 0x32 looks wrong. It's copied from the Sun X11 Compound Text
// support code. It implies that all Johab characters comprise two
// octets, which isn't true. Johab supports the ASCII/KS-Roman
// characters from 0x21-0x7E with single-byte representations.
ControlSequence johab = new ControlSequence(
new byte[] { 0x1b, 0x25, 0x2f, 0x32 },
new byte[] { 0x53, 0x55, 0x4e, 0x2d, 0x4b, 0x53, 0x43, 0x35,
0x36, 0x30, 0x31, 0x2e, 0x31, 0x39, 0x39, 0x32,
0x2d, 0x33 });
tSequenceToEncodingMap.put(johab, "x-Johab");
tEncodingToSequenceMap.put("x-Johab", johab);
tEncodings.add("x-Johab");
}
// Esc seq: %/2??SUN-BIG5-1
if (isEncodingSupported("Big5")) {
// 0x32 looks wrong. It's copied from the Sun X11 Compound Text
// support code. It implies that all Big5 characters comprise two
// octets, which isn't true. Big5 supports the ASCII/CNS-Roman
// characters from 0x21-0x7E with single-byte representations.
ControlSequence big5 = new ControlSequence(
new byte[] { 0x1b, 0x25, 0x2f, 0x32 },
new byte[] { 0x53, 0x55, 0x4e, 0x2d, 0x42, 0x49, 0x47, 0x35,
0x2d, 0x31 });
tSequenceToEncodingMap.put(big5, "Big5");
tEncodingToSequenceMap.put("Big5", big5);
tEncodings.add("Big5");
}
sequenceToEncodingMap =
Collections.unmodifiableMap(tSequenceToEncodingMap);
highBitsMap = Collections.unmodifiableMap(tHighBitsMap);
encodingToSequenceMap =
Collections.unmodifiableMap(tEncodingToSequenceMap);
encodings = Collections.unmodifiableList(tEncodings);
}
private static boolean isEncodingSupported(String encoding) {
try {
if (Charset.isSupported(encoding))
return true;
} catch (IllegalArgumentException x) { }
return (getDecoder(encoding) != null &&
getEncoder(encoding) != null);
}
// For Decoder
static CharsetDecoder getStandardDecoder(byte[] escSequence) {
return getNonStandardDecoder(escSequence, null);
}
static boolean getHighBit(byte[] escSequence) {
Boolean bool = (Boolean)highBitsMap.get
(new ControlSequence(escSequence));
return (bool == Boolean.TRUE);
}
static CharsetDecoder getNonStandardDecoder(byte[] escSequence,
byte[] encoding) {
return getDecoder((String)sequenceToEncodingMap.get
(new ControlSequence(escSequence, encoding)));
}
static CharsetDecoder getDecoder(String enc) {
if (enc == null) {
return null;
}
Charset cs = null;
try {
cs = Charset.forName(enc);
} catch (IllegalArgumentException e) {
Class cls;
try {
cls = Class.forName("sun.awt.motif." + enc);
} catch (ClassNotFoundException ee) {
return null;
}
try {
cs = (Charset)cls.newInstance();
} catch (InstantiationException ee) {
return null;
} catch (IllegalAccessException ee) {
return null;
}
}
try {
return cs.newDecoder();
} catch (UnsupportedOperationException e) {}
return null;
}
// For Encoder
static byte[] getEscapeSequence(String encoding) {
ControlSequence seq = (ControlSequence)
encodingToSequenceMap.get(encoding);
if (seq != null) {
return seq.escSequence;
}
return null;
}
static byte[] getEncoding(String encoding) {
ControlSequence seq = (ControlSequence)
encodingToSequenceMap.get(encoding);
if (seq != null) {
return seq.encoding;
}
return null;
}
static List getEncodings() {
return encodings;
}
static CharsetEncoder getEncoder(String enc) {
if (enc == null) {
return null;
}
Charset cs = null;
try {
cs = Charset.forName(enc);
} catch (IllegalArgumentException e) {
Class cls;
try {
cls = Class.forName("sun.awt.motif." + enc);
} catch (ClassNotFoundException ee) {
return null;
}
try {
cs = (Charset)cls.newInstance();
} catch (InstantiationException ee) {
return null;
} catch (IllegalAccessException ee) {
return null;
}
}
try {
return cs.newEncoder();
} catch (Throwable e) {}
return null;
}
// Not an instantiable class
private CompoundTextSupport() {}
}