| /* |
| * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| package sun.nio.cs.ext; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.*; |
| |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| |
| public class COMPOUND_TEXT_Encoder extends CharsetEncoder { |
| |
| /** |
| * NOTE: The following four static variables should be used *only* for |
| * testing whether a encoder can encode a specific character. They |
| * cannot be used for actual encoding because they are shared across all |
| * COMPOUND_TEXT encoders and may be stateful. |
| */ |
| private static final Map<String,CharsetEncoder> encodingToEncoderMap = |
| Collections.synchronizedMap(new HashMap<String,CharsetEncoder>(21, 1.0f)); |
| private static final CharsetEncoder latin1Encoder; |
| private static final CharsetEncoder defaultEncoder; |
| private static final boolean defaultEncodingSupported; |
| |
| static { |
| CharsetEncoder encoder = Charset.defaultCharset().newEncoder(); |
| String encoding = encoder.charset().name(); |
| if ("ISO8859_1".equals(encoding)) { |
| latin1Encoder = encoder; |
| defaultEncoder = encoder; |
| defaultEncodingSupported = true; |
| } else { |
| try { |
| latin1Encoder = |
| Charset.forName("ISO8859_1").newEncoder(); |
| } catch (IllegalArgumentException e) { |
| throw new ExceptionInInitializerError |
| ("ISO8859_1 unsupported"); |
| } |
| defaultEncoder = encoder; |
| defaultEncodingSupported = CompoundTextSupport.getEncodings(). |
| contains(defaultEncoder.charset().name()); |
| } |
| } |
| |
| private CharsetEncoder encoder; |
| private char[] charBuf = new char[1]; |
| private CharBuffer charbuf = CharBuffer.wrap(charBuf); |
| private ByteArrayOutputStream nonStandardCharsetBuffer; |
| private byte[] byteBuf; |
| private ByteBuffer bytebuf; |
| private int numNonStandardChars, nonStandardEncodingLen; |
| |
| public COMPOUND_TEXT_Encoder(Charset cs) { |
| super(cs, |
| (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2), |
| (float)(CompoundTextSupport.MAX_CONTROL_SEQUENCE_LEN + 2)); |
| try { |
| encoder = Charset.forName("ISO8859_1").newEncoder(); |
| } catch (IllegalArgumentException cannotHappen) {} |
| initEncoder(encoder); |
| } |
| |
| protected CoderResult encodeLoop(CharBuffer src, ByteBuffer des) { |
| CoderResult cr = CoderResult.UNDERFLOW; |
| char[] input = src.array(); |
| int inOff = src.arrayOffset() + src.position(); |
| int inEnd = src.arrayOffset() + src.limit(); |
| |
| try { |
| while (inOff < inEnd && cr.isUnderflow()) { |
| charBuf[0] = input[inOff]; |
| if (charBuf[0] <= '\u0008' || |
| (charBuf[0] >= '\u000B' && charBuf[0] <= '\u001F') || |
| (charBuf[0] >= '\u0080' && charBuf[0] <= '\u009F')) { |
| // The compound text specification only permits the octets |
| // 0x09, 0x0A, 0x1B, and 0x9B in C0 and C1. Of these, 1B and |
| // 9B must also be removed because they initiate control |
| // sequences. |
| charBuf[0] = '?'; |
| } |
| |
| CharsetEncoder enc = getEncoder(charBuf[0]); |
| //System.out.println("char=" + charBuf[0] + ", enc=" + enc); |
| if (enc == null) { |
| if (unmappableCharacterAction() |
| == CodingErrorAction.REPORT) { |
| charBuf[0] = '?'; |
| enc = latin1Encoder; |
| } else { |
| return CoderResult.unmappableForLength(1); |
| } |
| } |
| if (enc != encoder) { |
| if (nonStandardCharsetBuffer != null) { |
| cr = flushNonStandardCharsetBuffer(des); |
| } else { |
| //cr= encoder.flush(des); |
| flushEncoder(encoder, des); |
| } |
| if (!cr.isUnderflow()) |
| return cr; |
| byte[] escSequence = CompoundTextSupport. |
| getEscapeSequence(enc.charset().name()); |
| if (escSequence == null) { |
| throw new InternalError("Unknown encoding: " + |
| enc.charset().name()); |
| } else if (escSequence[1] == (byte)0x25 && |
| escSequence[2] == (byte)0x2F) { |
| initNonStandardCharsetBuffer(enc, escSequence); |
| } else if (des.remaining() >= escSequence.length) { |
| des.put(escSequence, 0, escSequence.length); |
| } else { |
| return CoderResult.OVERFLOW; |
| } |
| encoder = enc; |
| continue; |
| } |
| charbuf.rewind(); |
| if (nonStandardCharsetBuffer == null) { |
| cr = encoder.encode(charbuf, des, false); |
| } else { |
| bytebuf.clear(); |
| cr = encoder.encode(charbuf, bytebuf, false); |
| bytebuf.flip(); |
| nonStandardCharsetBuffer.write(byteBuf, |
| 0, bytebuf.limit()); |
| numNonStandardChars++; |
| } |
| inOff++; |
| } |
| return cr; |
| } finally { |
| src.position(inOff - src.arrayOffset()); |
| } |
| } |
| |
| protected CoderResult implFlush(ByteBuffer out) { |
| CoderResult cr = (nonStandardCharsetBuffer != null) |
| ? flushNonStandardCharsetBuffer(out) |
| //: encoder.flush(out); |
| : flushEncoder(encoder, out); |
| reset(); |
| return cr; |
| } |
| |
| private void initNonStandardCharsetBuffer(CharsetEncoder c, |
| byte[] escSequence) |
| { |
| nonStandardCharsetBuffer = new ByteArrayOutputStream(); |
| byteBuf = new byte[(int)c.maxBytesPerChar()]; |
| bytebuf = ByteBuffer.wrap(byteBuf); |
| nonStandardCharsetBuffer.write(escSequence, 0, escSequence.length); |
| nonStandardCharsetBuffer.write(0); // M placeholder |
| nonStandardCharsetBuffer.write(0); // L placeholder |
| byte[] encoding = CompoundTextSupport. |
| getEncoding(c.charset().name()); |
| if (encoding == null) { |
| throw new InternalError |
| ("Unknown encoding: " + encoder.charset().name()); |
| } |
| nonStandardCharsetBuffer.write(encoding, 0, encoding.length); |
| nonStandardCharsetBuffer.write(0x02); // divider |
| nonStandardEncodingLen = encoding.length + 1; |
| } |
| |
| private CoderResult flushNonStandardCharsetBuffer(ByteBuffer out) { |
| if (numNonStandardChars > 0) { |
| byte[] flushBuf = new byte[(int)encoder.maxBytesPerChar() * |
| numNonStandardChars]; |
| ByteBuffer bb = ByteBuffer.wrap(flushBuf); |
| flushEncoder(encoder, bb); |
| bb.flip(); |
| nonStandardCharsetBuffer.write(flushBuf, 0, bb.limit()); |
| numNonStandardChars = 0; |
| } |
| |
| int numBytes = nonStandardCharsetBuffer.size(); |
| int nonStandardBytesOff = 6 + nonStandardEncodingLen; |
| |
| if (out.remaining() < (numBytes - nonStandardBytesOff) + |
| nonStandardBytesOff * (((numBytes - nonStandardBytesOff) / |
| ((1 << 14) - 1)) + 1)) |
| { |
| return CoderResult.OVERFLOW; |
| } |
| |
| byte[] nonStandardBytes = |
| nonStandardCharsetBuffer.toByteArray(); |
| |
| // The non-standard charset header only supports 2^14-1 bytes of data. |
| // If we have more than that, we have to repeat the header. |
| do { |
| out.put((byte)0x1B); |
| out.put((byte)0x25); |
| out.put((byte)0x2F); |
| out.put(nonStandardBytes[3]); |
| |
| int toWrite = Math.min(numBytes - nonStandardBytesOff, |
| (1 << 14) - 1 - nonStandardEncodingLen); |
| |
| out.put((byte) |
| (((toWrite + nonStandardEncodingLen) / 0x80) | 0x80)); // M |
| out.put((byte) |
| (((toWrite + nonStandardEncodingLen) % 0x80) | 0x80)); // L |
| out.put(nonStandardBytes, 6, nonStandardEncodingLen); |
| out.put(nonStandardBytes, nonStandardBytesOff, toWrite); |
| nonStandardBytesOff += toWrite; |
| } while (nonStandardBytesOff < numBytes); |
| |
| nonStandardCharsetBuffer = null; |
| byteBuf = null; |
| nonStandardEncodingLen = 0; |
| return CoderResult.UNDERFLOW; |
| } |
| |
| /** |
| * Resets the encoder. |
| * Call this method to reset the encoder to its initial state |
| */ |
| protected void implReset() { |
| numNonStandardChars = nonStandardEncodingLen = 0; |
| nonStandardCharsetBuffer = null; |
| byteBuf = null; |
| try { |
| encoder = Charset.forName("ISO8859_1").newEncoder(); |
| } catch (IllegalArgumentException cannotHappen) { |
| } |
| initEncoder(encoder); |
| } |
| |
| /** |
| * Return whether a character is mappable or not |
| * @return true if a character is mappable |
| */ |
| public boolean canEncode(char ch) { |
| return getEncoder(ch) != null; |
| } |
| |
| protected void implOnMalformedInput(CodingErrorAction newAction) { |
| encoder.onUnmappableCharacter(newAction); |
| } |
| |
| protected void implOnUnmappableCharacter(CodingErrorAction newAction) { |
| encoder.onUnmappableCharacter(newAction); |
| } |
| |
| protected void implReplaceWith(byte[] newReplacement) { |
| if (encoder != null) |
| encoder.replaceWith(newReplacement); |
| } |
| |
| /** |
| * Try to figure out which CharsetEncoder to use for conversion |
| * of the specified Unicode character. The target character encoding |
| * of the returned encoder is approved to be used with Compound Text. |
| * |
| * @param ch Unicode character |
| * @return CharsetEncoder to convert the given character |
| */ |
| private CharsetEncoder getEncoder(char ch) { |
| // 1. Try the current encoder. |
| if (encoder.canEncode(ch)) { |
| return encoder; |
| } |
| |
| // 2. Try the default encoder. |
| if (defaultEncodingSupported && defaultEncoder.canEncode(ch)) { |
| CharsetEncoder retval = null; |
| try { |
| retval = defaultEncoder.charset().newEncoder(); |
| } catch (UnsupportedOperationException cannotHappen) { |
| } |
| initEncoder(retval); |
| return retval; |
| } |
| |
| // 3. Try ISO8859-1. |
| if (latin1Encoder.canEncode(ch)) { |
| CharsetEncoder retval = null; |
| try { |
| retval = latin1Encoder.charset().newEncoder(); |
| } catch (UnsupportedOperationException cannotHappen) {} |
| initEncoder(retval); |
| return retval; |
| } |
| |
| // 4. Brute force search of all supported encodings. |
| for (String encoding : CompoundTextSupport.getEncodings()) |
| { |
| CharsetEncoder enc = encodingToEncoderMap.get(encoding); |
| if (enc == null) { |
| enc = CompoundTextSupport.getEncoder(encoding); |
| if (enc == null) { |
| throw new InternalError("Unsupported encoding: " + |
| encoding); |
| } |
| encodingToEncoderMap.put(encoding, enc); |
| } |
| if (enc.canEncode(ch)) { |
| CharsetEncoder retval = CompoundTextSupport.getEncoder(encoding); |
| initEncoder(retval); |
| return retval; |
| } |
| } |
| |
| return null; |
| } |
| |
| private void initEncoder(CharsetEncoder enc) { |
| try { |
| enc.onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .replaceWith(replacement()); |
| } catch (IllegalArgumentException x) {} |
| } |
| |
| private CharBuffer fcb= CharBuffer.allocate(0); |
| private CoderResult flushEncoder(CharsetEncoder enc, ByteBuffer bb) { |
| enc.encode(fcb, bb, true); |
| return enc.flush(bb); |
| } |
| } |