blob: 15e0f4670c69d75839f440198e6431fd3cc50bff [file] [log] [blame]
/*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package sun.nio.cs.ext;
import java.io.ByteArrayOutputStream;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.*;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class COMPOUND_TEXT_Encoder extends CharsetEncoder {
/**
* NOTE: The following four static variables should be used *only* for
* testing whether a encoder can encode a specific character. They
* cannot be used for actual encoding because they are shared across all
* COMPOUND_TEXT encoders and may be stateful.
*/
private static final Map<String,CharsetEncoder> encodingToEncoderMap =
Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f));
private static final CharsetEncoder latin1Encoder;
private static final CharsetEncoder defaultEncoder;
private static final boolean defaultEncodingSupported;
static {
CharsetEncoder encoder = Charset.defaultCharset().newEncoder();
String encoding = encoder.charset().name();
if ("ISO8859_1".equals(encoding)) {
latin1Encoder = encoder;
defaultEncoder = encoder;
defaultEncodingSupported = true;
} else {
try {
latin1Encoder =
Charset.forName("ISO8859_1").newEncoder();
} catch (IllegalArgumentException e) {
throw new ExceptionInInitializerError
("ISO8859_1 unsupported");
}
defaultEncoder = encoder;
defaultEncodingSupported = CompoundTextSupport.getEncodings().
contains(defaultEncoder.charset().name());
}
}
private CharsetEncoder encoder;
private char[] charBuf = new char[1];
private CharBuffer charbuf = CharBuffer.wrap(charBuf);
private ByteArrayOutputStream nonStandardCharsetBuffer;
private byte[] byteBuf;
private ByteBuffer bytebuf;
private int numNonStandardChars, nonStandardEncodingLen;
public COMPOUND_TEXT_Encoder(Charset cs) {
super(cs,
(float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2),
(float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2));
try {
encoder = Charset.forName("ISO8859_1").newEncoder();
} catch (IllegalArgumentException cannotHappen) {}
initEncoder(encoder);
}
protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) {
CoderResult cr = CoderResult.UNDERFLOW;
char[] input = src.array();
int inOff = src.arrayOffset() + src.position();
int inEnd = src.arrayOffset() + src.limit();
try {
while (inOff < inEnd && cr.isUnderflow()) {
charBuf[0] = input[inOff];
if (charBuf[0] <= '\u0008' ||
(charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') ||
(charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) {
// The compound text specification only permits the octets
// 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and
// 9B must also be removed because they initiate control
// sequences.
charBuf[0] = '?';
}
CharsetEncoder enc = getEncoder(charBuf[0]);
//System.out.println("char=" + charBuf[0] + ", enc=" + enc);
if (enc == null) {
if (unmappableCharacterAction()
== CodingErrorAction.REPORT) {
charBuf[0] = '?';
enc = latin1Encoder;
} else {
return CoderResult.unmappableForLength(1);
}
}
if (enc != encoder) {
if (nonStandardCharsetBuffer != null) {
cr = flushNonStandardCharsetBuffer(des);
} else {
//cr= encoder.flush(des);
flushEncoder(encoder, des);
}
if (!cr.isUnderflow())
return cr;
byte[] escSequence = CompoundTextSupport.
getEscapeSequence(enc.charset().name());
if (escSequence == null) {
throw new InternalError("Unknown encoding: " +
enc.charset().name());
} else if (escSequence[1] == (byte)0x25 &&
escSequence[2] == (byte)0x2F) {
initNonStandardCharsetBuffer(enc, escSequence);
} else if (des.remaining() >= escSequence.length) {
des.put(escSequence, 0, escSequence.length);
} else {
return CoderResult.OVERFLOW;
}
encoder = enc;
continue;
}
charbuf.rewind();
if (nonStandardCharsetBuffer == null) {
cr = encoder.encode(charbuf, des, false);
} else {
bytebuf.clear();
cr = encoder.encode(charbuf, bytebuf, false);
bytebuf.flip();
nonStandardCharsetBuffer.write(byteBuf,
0, bytebuf.limit());
numNonStandardChars++;
}
inOff++;
}
return cr;
} finally {
src.position(inOff - src.arrayOffset());
}
}
protected CoderResult implFlush(ByteBuffer out) {
CoderResult cr = (nonStandardCharsetBuffer != null)
? flushNonStandardCharsetBuffer(out)
//: encoder.flush(out);
: flushEncoder(encoder, out);
reset();
return cr;
}
private void initNonStandardCharsetBuffer(CharsetEncoder c,
byte[] escSequence)
{
nonStandardCharsetBuffer = new ByteArrayOutputStream();
byteBuf = new byte[(int)c.maxBytesPerChar()];
bytebuf = ByteBuffer.wrap(byteBuf);
nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length);
nonStandardCharsetBuffer.write(0); // M placeholder
nonStandardCharsetBuffer.write(0); // L placeholder
byte[] encoding = CompoundTextSupport.
getEncoding(c.charset().name());
if (encoding == null) {
throw new InternalError
("Unknown encoding: " + encoder.charset().name());
}
nonStandardCharsetBuffer.write(encoding, 0, encoding.length);
nonStandardCharsetBuffer.write(0x02); // divider
nonStandardEncodingLen = encoding.length + 1;
}
private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) {
if (numNonStandardChars > 0) {
byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() *
numNonStandardChars];
ByteBuffer bb = ByteBuffer.wrap(flushBuf);
flushEncoder(encoder, bb);
bb.flip();
nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit());
numNonStandardChars = 0;
}
int numBytes = nonStandardCharsetBuffer.size();
int nonStandardBytesOff = 6 + nonStandardEncodingLen;
if (out.remaining() < (numBytes - nonStandardBytesOff) +
nonStandardBytesOff * (((numBytes - nonStandardBytesOff) /
((1 << 14) - 1)) + 1))
{
return CoderResult.OVERFLOW;
}
byte[] nonStandardBytes =
nonStandardCharsetBuffer.toByteArray();
// The non-standard charset header only supports 2^14-1 bytes of data.
// If we have more than that, we have to repeat the header.
do {
out.put((byte)0x1B);
out.put((byte)0x25);
out.put((byte)0x2F);
out.put(nonStandardBytes[3]);
int toWrite = Math.min(numBytes - nonStandardBytesOff,
(1 << 14) - 1 - nonStandardEncodingLen);
out.put((byte)
(((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M
out.put((byte)
(((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L
out.put(nonStandardBytes, 6, nonStandardEncodingLen);
out.put(nonStandardBytes, nonStandardBytesOff, toWrite);
nonStandardBytesOff += toWrite;
} while (nonStandardBytesOff < numBytes);
nonStandardCharsetBuffer = null;
byteBuf = null;
nonStandardEncodingLen = 0;
return CoderResult.UNDERFLOW;
}
/**
* Resets the encoder.
* Call this method to reset the encoder to its initial state
*/
protected void implReset() {
numNonStandardChars = nonStandardEncodingLen = 0;
nonStandardCharsetBuffer = null;
byteBuf = null;
try {
encoder = Charset.forName("ISO8859_1").newEncoder();
} catch (IllegalArgumentException cannotHappen) {
}
initEncoder(encoder);
}
/**
* Return whether a character is mappable or not
* @return true if a character is mappable
*/
public boolean canEncode(char ch) {
return getEncoder(ch) != null;
}
protected void implOnMalformedInput(CodingErrorAction newAction) {
encoder.onUnmappableCharacter(newAction);
}
protected void implOnUnmappableCharacter(CodingErrorAction newAction) {
encoder.onUnmappableCharacter(newAction);
}
protected void implReplaceWith(byte[] newReplacement) {
if (encoder != null)
encoder.replaceWith(newReplacement);
}
/**
* Try to figure out which CharsetEncoder to use for conversion
* of the specified Unicode character. The target character encoding
* of the returned encoder is approved to be used with Compound Text.
*
* @param ch Unicode character
* @return CharsetEncoder to convert the given character
*/
private CharsetEncoder getEncoder(char ch) {
// 1. Try the current encoder.
if (encoder.canEncode(ch)) {
return encoder;
}
// 2. Try the default encoder.
if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) {
CharsetEncoder retval = null;
try {
retval = defaultEncoder.charset().newEncoder();
} catch (UnsupportedOperationException cannotHappen) {
}
initEncoder(retval);
return retval;
}
// 3. Try ISO8859-1.
if (latin1Encoder.canEncode(ch)) {
CharsetEncoder retval = null;
try {
retval = latin1Encoder.charset().newEncoder();
} catch (UnsupportedOperationException cannotHappen) {}
initEncoder(retval);
return retval;
}
// 4. Brute force search of all supported encodings.
for (String encoding : CompoundTextSupport.getEncodings())
{
CharsetEncoder enc = encodingToEncoderMap.get(encoding);
if (enc == null) {
enc = CompoundTextSupport.getEncoder(encoding);
if (enc == null) {
throw new InternalError("Unsupported encoding: " +
encoding);
}
encodingToEncoderMap.put(encoding, enc);
}
if (enc.canEncode(ch)) {
CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding);
initEncoder(retval);
return retval;
}
}
return null;
}
private void initEncoder(CharsetEncoder enc) {
try {
enc.onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(replacement());
} catch (IllegalArgumentException x) {}
}
private CharBuffer fcb= CharBuffer.allocate(0);
private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) {
enc.encode(fcb, bb, true);
return enc.flush(bb);
}
}