| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package java.lang; |
| |
| import java.io.UnsupportedEncodingException; |
| import java.lang.ref.SoftReference; |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.charset.Charset; |
| import java.nio.charset.CharsetDecoder; |
| import java.nio.charset.CharsetEncoder; |
| import java.nio.charset.CharacterCodingException; |
| import java.nio.charset.CoderResult; |
| import java.nio.charset.CodingErrorAction; |
| import java.nio.charset.IllegalCharsetNameException; |
| import java.nio.charset.UnsupportedCharsetException; |
| import java.util.Arrays; |
| import sun.misc.MessageUtils; |
| import sun.nio.cs.HistoricallyNamedCharset; |
| import sun.nio.cs.ArrayDecoder; |
| import sun.nio.cs.ArrayEncoder; |
| |
| /** |
| * Utility class for string encoding and decoding. |
| */ |
| |
| class StringCoding { |
| |
| private StringCoding() { } |
| |
| /** The cached coders for each thread */ |
| private final static ThreadLocal<SoftReference<StringDecoder>> decoder = |
| new ThreadLocal<>(); |
| private final static ThreadLocal<SoftReference<StringEncoder>> encoder = |
| new ThreadLocal<>(); |
| |
| private static boolean warnUnsupportedCharset = true; |
| |
| private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { |
| SoftReference<T> sr = tl.get(); |
| if (sr == null) |
| return null; |
| return sr.get(); |
| } |
| |
| private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { |
| tl.set(new SoftReference<T>(ob)); |
| } |
| |
| // Trim the given byte array to the given length |
| // |
| private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { |
| |
| // Android-changed: System.getSecurityManager() == null is always true on Android. |
| // Libcore tests expect a defensive copy in pretty much all cases. |
| // if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) |
| if (len == ba.length && (isTrusted)) |
| return ba; |
| else |
| return Arrays.copyOf(ba, len); |
| } |
| |
| // Trim the given char array to the given length |
| // |
| private static char[] safeTrim(char[] ca, int len, |
| Charset cs, boolean isTrusted) { |
| // Android-changed: System.getSecurityManager() == null is always true on Android. |
| // Libcore tests expect a defensive copy in pretty much all cases. |
| // if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) |
| if (len == ca.length && (isTrusted)) |
| return ca; |
| else |
| return Arrays.copyOf(ca, len); |
| } |
| |
| private static int scale(int len, float expansionFactor) { |
| // We need to perform double, not float, arithmetic; otherwise |
| // we lose low order bits when len is larger than 2**24. |
| return (int)(len * (double)expansionFactor); |
| } |
| |
| private static Charset lookupCharset(String csn) { |
| if (Charset.isSupported(csn)) { |
| try { |
| return Charset.forName(csn); |
| } catch (UnsupportedCharsetException x) { |
| throw new Error(x); |
| } |
| } |
| return null; |
| } |
| |
| private static void warnUnsupportedCharset(String csn) { |
| if (warnUnsupportedCharset) { |
| // Use sun.misc.MessageUtils rather than the Logging API or |
| // System.err since this method may be called during VM |
| // initialization before either is available. |
| MessageUtils.err("WARNING: Default charset " + csn + |
| " not supported, using ISO-8859-1 instead"); |
| warnUnsupportedCharset = false; |
| } |
| } |
| |
| |
| // -- Decoding -- |
| private static class StringDecoder { |
| private final String requestedCharsetName; |
| private final Charset cs; |
| private final CharsetDecoder cd; |
| private final boolean isTrusted; |
| |
| private StringDecoder(Charset cs, String rcn) { |
| this.requestedCharsetName = rcn; |
| this.cs = cs; |
| this.cd = cs.newDecoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE); |
| this.isTrusted = (cs.getClass().getClassLoader() == null); |
| } |
| |
| String charsetName() { |
| if (cs instanceof HistoricallyNamedCharset) |
| return ((HistoricallyNamedCharset)cs).historicalName(); |
| return cs.name(); |
| } |
| |
| final String requestedCharsetName() { |
| return requestedCharsetName; |
| } |
| |
| char[] decode(byte[] ba, int off, int len) { |
| int en = scale(len, cd.maxCharsPerByte()); |
| char[] ca = new char[en]; |
| if (len == 0) |
| return ca; |
| if (cd instanceof ArrayDecoder) { |
| int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); |
| return safeTrim(ca, clen, cs, isTrusted); |
| } else { |
| cd.reset(); |
| ByteBuffer bb = ByteBuffer.wrap(ba, off, len); |
| CharBuffer cb = CharBuffer.wrap(ca); |
| try { |
| CoderResult cr = cd.decode(bb, cb, true); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| cr = cd.flush(cb); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| } catch (CharacterCodingException x) { |
| // Substitution is always enabled, |
| // so this shouldn't happen |
| throw new Error(x); |
| } |
| return safeTrim(ca, cb.position(), cs, isTrusted); |
| } |
| } |
| } |
| |
| static char[] decode(String charsetName, byte[] ba, int off, int len) |
| throws UnsupportedEncodingException |
| { |
| StringDecoder sd = deref(decoder); |
| String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; |
| if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) |
| || csn.equals(sd.charsetName()))) { |
| sd = null; |
| try { |
| Charset cs = lookupCharset(csn); |
| if (cs != null) |
| sd = new StringDecoder(cs, csn); |
| } catch (IllegalCharsetNameException x) {} |
| if (sd == null) |
| throw new UnsupportedEncodingException(csn); |
| set(decoder, sd); |
| } |
| return sd.decode(ba, off, len); |
| } |
| |
| static char[] decode(Charset cs, byte[] ba, int off, int len) { |
| // (1)We never cache the "external" cs, the only benefit of creating |
| // an additional StringDe/Encoder object to wrap it is to share the |
| // de/encode() method. These SD/E objects are short-lifed, the young-gen |
| // gc should be able to take care of them well. But the best approash |
| // is still not to generate them if not really necessary. |
| // (2)The defensive copy of the input byte/char[] has a big performance |
| // impact, as well as the outgoing result byte/char[]. Need to do the |
| // optimization check of (sm==null && classLoader0==null) for both. |
| // (3)getClass().getClassLoader0() is expensive |
| // (4)There might be a timing gap in isTrusted setting. getClassLoader0() |
| // is only chcked (and then isTrusted gets set) when (SM==null). It is |
| // possible that the SM==null for now but then SM is NOT null later |
| // when safeTrim() is invoked...the "safe" way to do is to redundant |
| // check (... && (isTrusted || SM == null || getClassLoader0())) in trim |
| // but it then can be argued that the SM is null when the opertaion |
| // is started... |
| CharsetDecoder cd = cs.newDecoder(); |
| int en = scale(len, cd.maxCharsPerByte()); |
| char[] ca = new char[en]; |
| if (len == 0) |
| return ca; |
| boolean isTrusted = false; |
| if (System.getSecurityManager() != null) { |
| if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { |
| ba = Arrays.copyOfRange(ba, off, off + len); |
| off = 0; |
| } |
| } |
| cd.onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .reset(); |
| if (cd instanceof ArrayDecoder) { |
| int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); |
| return safeTrim(ca, clen, cs, isTrusted); |
| } else { |
| ByteBuffer bb = ByteBuffer.wrap(ba, off, len); |
| CharBuffer cb = CharBuffer.wrap(ca); |
| try { |
| CoderResult cr = cd.decode(bb, cb, true); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| cr = cd.flush(cb); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| } catch (CharacterCodingException x) { |
| // Substitution is always enabled, |
| // so this shouldn't happen |
| throw new Error(x); |
| } |
| return safeTrim(ca, cb.position(), cs, isTrusted); |
| } |
| } |
| |
| static char[] decode(byte[] ba, int off, int len) { |
| String csn = Charset.defaultCharset().name(); |
| try { |
| // use charset name decode() variant which provides caching. |
| return decode(csn, ba, off, len); |
| } catch (UnsupportedEncodingException x) { |
| warnUnsupportedCharset(csn); |
| } |
| try { |
| return decode("ISO-8859-1", ba, off, len); |
| } catch (UnsupportedEncodingException x) { |
| // If this code is hit during VM initialization, MessageUtils is |
| // the only way we will be able to get any kind of error message. |
| MessageUtils.err("ISO-8859-1 charset not available: " |
| + x.toString()); |
| // If we can not find ISO-8859-1 (a required encoding) then things |
| // are seriously wrong with the installation. |
| System.exit(1); |
| return null; |
| } |
| } |
| |
| // -- Encoding -- |
| private static class StringEncoder { |
| private Charset cs; |
| private CharsetEncoder ce; |
| private final String requestedCharsetName; |
| private final boolean isTrusted; |
| |
| private StringEncoder(Charset cs, String rcn) { |
| this.requestedCharsetName = rcn; |
| this.cs = cs; |
| this.ce = cs.newEncoder() |
| .onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE); |
| this.isTrusted = (cs.getClass().getClassLoader() == null); |
| } |
| |
| String charsetName() { |
| if (cs instanceof HistoricallyNamedCharset) |
| return ((HistoricallyNamedCharset)cs).historicalName(); |
| return cs.name(); |
| } |
| |
| final String requestedCharsetName() { |
| return requestedCharsetName; |
| } |
| |
| byte[] encode(char[] ca, int off, int len) { |
| int en = scale(len, ce.maxBytesPerChar()); |
| byte[] ba = new byte[en]; |
| if (len == 0) |
| return ba; |
| if (ce instanceof ArrayEncoder) { |
| int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); |
| return safeTrim(ba, blen, cs, isTrusted); |
| } else { |
| ce.reset(); |
| ByteBuffer bb = ByteBuffer.wrap(ba); |
| CharBuffer cb = CharBuffer.wrap(ca, off, len); |
| try { |
| // Android-changed: Pass read-only buffer, so the encoder can't alter it |
| CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| cr = ce.flush(bb); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| } catch (CharacterCodingException x) { |
| // Substitution is always enabled, |
| // so this shouldn't happen |
| throw new Error(x); |
| } |
| return safeTrim(ba, bb.position(), cs, isTrusted); |
| } |
| } |
| } |
| |
| static byte[] encode(String charsetName, char[] ca, int off, int len) |
| throws UnsupportedEncodingException |
| { |
| StringEncoder se = deref(encoder); |
| String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; |
| if ((se == null) || !(csn.equals(se.requestedCharsetName()) |
| || csn.equals(se.charsetName()))) { |
| se = null; |
| try { |
| Charset cs = lookupCharset(csn); |
| if (cs != null) |
| se = new StringEncoder(cs, csn); |
| } catch (IllegalCharsetNameException x) {} |
| if (se == null) |
| throw new UnsupportedEncodingException (csn); |
| set(encoder, se); |
| } |
| return se.encode(ca, off, len); |
| } |
| |
| static byte[] encode(Charset cs, char[] ca, int off, int len) { |
| CharsetEncoder ce = cs.newEncoder(); |
| int en = scale(len, ce.maxBytesPerChar()); |
| byte[] ba = new byte[en]; |
| if (len == 0) |
| return ba; |
| boolean isTrusted = false; |
| if (System.getSecurityManager() != null) { |
| if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { |
| ca = Arrays.copyOfRange(ca, off, off + len); |
| off = 0; |
| } |
| } |
| ce.onMalformedInput(CodingErrorAction.REPLACE) |
| .onUnmappableCharacter(CodingErrorAction.REPLACE) |
| .reset(); |
| if (ce instanceof ArrayEncoder) { |
| int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); |
| return safeTrim(ba, blen, cs, isTrusted); |
| } else { |
| ByteBuffer bb = ByteBuffer.wrap(ba); |
| CharBuffer cb = CharBuffer.wrap(ca, off, len); |
| try { |
| // Android-changed: Pass read-only buffer, so the encoder can't alter it |
| CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| cr = ce.flush(bb); |
| if (!cr.isUnderflow()) |
| cr.throwException(); |
| } catch (CharacterCodingException x) { |
| throw new Error(x); |
| } |
| return safeTrim(ba, bb.position(), cs, isTrusted); |
| } |
| } |
| |
| static byte[] encode(char[] ca, int off, int len) { |
| String csn = Charset.defaultCharset().name(); |
| try { |
| // use charset name encode() variant which provides caching. |
| return encode(csn, ca, off, len); |
| } catch (UnsupportedEncodingException x) { |
| warnUnsupportedCharset(csn); |
| } |
| try { |
| return encode("ISO-8859-1", ca, off, len); |
| } catch (UnsupportedEncodingException x) { |
| // If this code is hit during VM initialization, MessageUtils is |
| // the only way we will be able to get any kind of error message. |
| MessageUtils.err("ISO-8859-1 charset not available: " |
| + x.toString()); |
| // If we can not find ISO-8859-1 (a required encoding) then things |
| // are seriously wrong with the installation. |
| System.exit(1); |
| return null; |
| } |
| } |
| } |