libcore/nio_char/src/main/java/java/nio/charset/Charset.java - platform/dalvik - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 package java.nio.charset;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.spi.CharsetProvider;
 import java.security.AccessController;
 import java.security.PrivilegedAction;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;

 // BEGIN android-changed
 import com.ibm.icu4jni.charset.CharsetProviderICU;
 // END android-changed

 /**
  * A charset defines a mapping between a Unicode character sequence and a byte
  * sequence. It facilitates the encoding from a Unicode character sequence into
  * a byte sequence, and the decoding from a byte sequence into a Unicode
  * character sequence.
  * <p>
  * A charset has a canonical name, which is usually in uppercase. Typically it
  * also has one or more aliases. The name string can only consist of the
  * following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'.
  * The first character of the name must be a digit or a letter.
  * <p>
  * The following charsets should be supported by any java platform: US-ASCII,
  * ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.
  * <p>
  * Additional charsets can be made available by configuring one or more charset
  * providers through provider configuration files. Such files are always named
  * as "java.nio.charset.spi.CharsetProvider" and located in the
  * "META-INF/services" sub folder of one or more classpaths. The files should be
  * encoded in "UTF-8". Each line of their content specifies the class name of a
  * charset provider which extends
  * <code>java.nio.charset.spi.CharsetProvider</code>. A line should end with
  * '\r', '\n' or '\r\n'. Leading and trailing whitespaces are trimmed. Blank
  * lines, and lines (after trimming) starting with "#" which are regarded as
  * comments, are both ignored. Duplicates of names already found are also
  * ignored. Both the configuration files and the provider classes will be loaded
  * using the thread context class loader.
  * <p>
  * This class is thread-safe.
  *
  * @see java.nio.charset.spi.CharsetProvider
  */
 public abstract class Charset implements Comparable<Charset> {

     /*
      * The name of configuration files where charset provider class names can be
      * specified.
      */
     private static final String PROVIDER_CONFIGURATION_FILE_NAME = "META-INF/services/java.nio.charset.spi.CharsetProvider"; //$NON-NLS-1$

     /*
      * The encoding of configuration files
      */
     private static final String PROVIDER_CONFIGURATION_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$

     /*
      * The comment string used in configuration files
      */
     private static final String PROVIDER_CONFIGURATION_FILE_COMMENT = "#"; //$NON-NLS-1$

     private static ClassLoader systemClassLoader;

     // built in provider instance, assuming thread-safe
     // BEGIN android-changed
     private static final CharsetProviderICU _builtInProvider = new CharsetProviderICU();
     // END android-changed

     // cached built in charsets
     private static SortedMap<String, Charset> _builtInCharsets = null;

     private final String canonicalName;

     // the aliases set
     private final HashSet<String> aliasesSet;

     // cached Charset table
     private final static HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>();

     private static boolean inForNameInternal = false;

     /**
      * Constructs a <code>Charset</code> object. Duplicated aliases are
      * ignored.
      *
      * @param canonicalName
      *            the canonical name of the charset.
      * @param aliases
      *            an array containing all aliases of the charset. May be null.
      * @throws IllegalCharsetNameException
      *             on an illegal value being supplied for either
      *             <code>canonicalName</code> or for any element of
      *             <code>aliases</code>.
      */
     protected Charset(String canonicalName, String[] aliases) {
         if (null == canonicalName) {
             throw new NullPointerException();
         }
         // check whether the given canonical name is legal
         checkCharsetName(canonicalName);
         this.canonicalName = canonicalName;
         // check each alias and put into a set
         this.aliasesSet = new HashSet<String>();
         if (aliases != null) {
             for (int i = 0; i < aliases.length; i++) {
                 checkCharsetName(aliases[i]);
                 this.aliasesSet.add(aliases[i]);
             }
         }
     }

     /*
      * Checks whether a character is a special character that can be used in
      * charset names, other than letters and digits.
      */
     private static boolean isSpecial(char c) {
         return ('-' == c || '.' == c || ':' == c || '_' == c);
     }

     /*
      * Checks whether a character is a letter (ascii) which are defined in the
      * spec.
      */
     private static boolean isLetter(char c) {
         return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
     }

     /*
      * Checks whether a character is a digit (ascii) which are defined in the
      * spec.
      */
     private static boolean isDigit(char c) {
         return ('0' <= c && c <= '9');
     }

     /*
      * Checks whether a given string is a legal charset name. The argument name
      * should not be null.
      */
     private static void checkCharsetName(String name) {
         // An empty string is illegal charset name
         if (name.length() == 0) {
             throw new IllegalCharsetNameException(name);
         }
         // The first character must be a letter or a digit
         // This is related to HARMONY-68 (won't fix)
         // char first = name.charAt(0);
         // if (!isLetter(first) && !isDigit(first)) {
         // throw new IllegalCharsetNameException(name);
         // }
         // Check the remaining characters
         int length = name.length();
         for (int i = 0; i < length; i++) {
             char c = name.charAt(i);
             if (!isLetter(c) && !isDigit(c) && !isSpecial(c)) {
                 throw new IllegalCharsetNameException(name);
             }
         }
     }

     /*
      * Use privileged code to get the context class loader.
      */
     private static ClassLoader getContextClassLoader() {
         final Thread t = Thread.currentThread();
         return AccessController
                 .doPrivileged(new PrivilegedAction<ClassLoader>() {
                     public ClassLoader run() {
                         return t.getContextClassLoader();
                     }
                 });
     }

     /*
      * Use privileged code to get the system class loader.
      */
     private static void getSystemClassLoader() {
         if (null == systemClassLoader) {
             systemClassLoader = AccessController
                     .doPrivileged(new PrivilegedAction<ClassLoader>() {
                         public ClassLoader run() {
                             return ClassLoader.getSystemClassLoader();
                         }
                     });
         }
     }

     /*
      * Add the charsets supported by the given provider to the map.
      */
     private static void addCharsets(CharsetProvider cp, Map<String, Charset> charsets) {
         Iterator<Charset> it = cp.charsets();
         while (it.hasNext()) {
             Charset cs = it.next();
             // Only new charsets will be added
             if (!charsets.containsKey(cs.name())) {
                 charsets.put(cs.name(), cs);
             }
         }
     }

     /*
      * Trim comment string, and then trim white spaces.
      */
     private static String trimClassName(String name) {
         String trimedName = name;
         int index = name.indexOf(PROVIDER_CONFIGURATION_FILE_COMMENT);
         // Trim comments
         if (index != -1) {
             trimedName = name.substring(0, index);
         }
         return trimedName.trim();
     }

     /*
      * Read a configuration file and add the charsets supported by the providers
      * specified by this configuration file to the map.
      */
     private static void loadConfiguredCharsets(URL configFile,
             ClassLoader contextClassLoader, Map<String, Charset> charsets) {
         BufferedReader reader = null;
         try {
             InputStream is = configFile.openStream();
             // Read each line for charset provider class names
             reader = new BufferedReader(new InputStreamReader(is,
                     PROVIDER_CONFIGURATION_FILE_ENCODING));
             String providerClassName = reader.readLine();
             while (null != providerClassName) {
                 providerClassName = trimClassName(providerClassName);
                 // Skip comments and blank lines
                 if (providerClassName.length() > 0) { // Non empty string
                     // Load the charset provider
                     Object cp = null;
                     try {
                         Class<?> c = Class.forName(providerClassName, true,
                                 contextClassLoader);
                         cp = c.newInstance();
                     } catch (Exception ex) {
                         // try to use system classloader when context
                         // classloader failed to load config file.
                         try {
                             getSystemClassLoader();
                             Class<?> c = Class.forName(providerClassName, true,
                                     systemClassLoader);
                             cp = c.newInstance();
                         } catch (Exception e) {
                             throw new Error(e.getMessage(), e);
                         }
                     }
                     // Put the charsets supported by this provider into the map
                     addCharsets((CharsetProvider) cp, charsets);
                 }
                 // Read the next line of the config file
                 providerClassName = reader.readLine();
             }
         } catch (IOException ex) {
             // Can't read this configuration file, ignore
         } finally {
             try {
                 if (null != reader) {
                     reader.close();
                 }
             } catch (IOException ex) {
                 // Ignore closing exception
             }
         }
     }

     /**
      * Gets a map of all available charsets supported by the runtime.
      * <p>
      * The returned map contains mappings from canonical names to corresponding
      * instances of <code>Charset</code>. The canonical names can be considered
      * as case-insensitive.
      *
      * @return an unmodifiable map of all available charsets supported by the
      *         runtime
      */
     @SuppressWarnings("unchecked")
     public static SortedMap<String, Charset> availableCharsets() {
         // Initialize the built-in charsets map cache if necessary
         if (_builtInCharsets == null) {
             synchronized (Charset.class) {
                 if (_builtInCharsets == null) {
                     _builtInCharsets = _builtInProvider.initAvailableCharsets();
                 }
             }
         }

         // Start with the built-in charsets...
         SortedMap<String, Charset> charsets = new TreeMap<String, Charset>(_builtInCharsets);

         // Add all charsets provided by charset providers...
         ClassLoader contextClassLoader = getContextClassLoader();
         Enumeration<URL> e = null;
         try {
             if (contextClassLoader != null) {
                 e = contextClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
             } else {
                 getSystemClassLoader();
                 e = systemClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
             }
             // Examine each configuration file
             while (e.hasMoreElements()) {
                 loadConfiguredCharsets(e.nextElement(), contextClassLoader, charsets);
             }
         } catch (IOException ex) {
             // Unexpected ClassLoader exception, ignore
         }
         return Collections.unmodifiableSortedMap(charsets);
     }

     /*
      * Read a configuration file and try to find the desired charset among those
      * which are supported by the providers specified in this configuration
      * file.
      */
     private static Charset searchConfiguredCharsets(String charsetName,
             ClassLoader contextClassLoader, URL configFile) {
         BufferedReader reader = null;
         try {
             InputStream is = configFile.openStream();
             // Read each line for charset provider class names
             reader = new BufferedReader(new InputStreamReader(is,
                     PROVIDER_CONFIGURATION_FILE_ENCODING));
             String providerClassName = reader.readLine();
             while (null != providerClassName) {
                 providerClassName = trimClassName(providerClassName);
                 if (providerClassName.length() > 0) { // Non empty string
                     // Load the charset provider
                     Object cp = null;
                     try {
                         Class<?> c = Class.forName(providerClassName, true,
                                 contextClassLoader);
                         cp = c.newInstance();
                     } catch (Exception ex) {
                         // try to use system classloader when context
                         // classloader failed to load config file.
                         try {
                             getSystemClassLoader();
                             Class<?> c = Class.forName(providerClassName, true,
                                     systemClassLoader);
                             cp = c.newInstance();
                         } catch (SecurityException e) {
                             // BEGIN android-changed
                             // ignore
                             // END android-changed
                         } catch (Exception e) {
                             throw new Error(e.getMessage(), e);
                         }
                     }
                     // BEGIN android-changed
                     if (cp != null) {
                         // Try to get the desired charset from this provider
                         Charset cs = ((CharsetProvider) cp)
                                 .charsetForName(charsetName);
                         if (null != cs) {
                             return cs;
                         }
                     }
                     // END android-changed
                 }
                 // Read the next line of the config file
                 providerClassName = reader.readLine();
             }
             return null;
         } catch (IOException ex) {
             // Can't read this configuration file
             return null;
         } finally {
             try {
                 if (null != reader) {
                     reader.close();
                 }
             } catch (IOException ex) {
                 // Ignore closing exception
             }
         }
     }

     /*
      * Gets a <code>Charset</code> instance for the specified charset name. If
      * the charset is not supported, returns null instead of throwing an
      * exception.
      */
     private synchronized static Charset forNameInternal(String charsetName)
             throws IllegalCharsetNameException {

         Charset cs = lookupCachedOrBuiltInCharset(charsetName);
         if (cs != null || inForNameInternal) {
             return cs;
         }

         // collect all charsets provided by charset providers
         try {
             Enumeration<URL> e = null;
             ClassLoader contextClassLoader = getContextClassLoader();
             if (contextClassLoader != null) {
                 e = contextClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
             } else {
                 getSystemClassLoader();
                 if (systemClassLoader == null) {
                     // Non available during class library start-up phase
                     return null;
                 } else {
                     e = systemClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
                 }
             }

             // examine each configuration file
             while (e.hasMoreElements()) {
                 inForNameInternal = true;
                 cs = searchConfiguredCharsets(charsetName, contextClassLoader, e.nextElement());
                 inForNameInternal = false;
                 if (cs != null) {
                     cacheCharset(cs);
                     return cs;
                 }
             }
         } catch (IOException ex) {
             // Unexpected ClassLoader exception, ignore
         } finally {
             inForNameInternal = false;
         }
         return null;
     }

     private synchronized static Charset lookupCachedOrBuiltInCharset(String charsetName) {
         Charset cs = cachedCharsetTable.get(charsetName);
         if (cs != null) {
             return cs;
         }
         if (charsetName == null) {
             throw new IllegalArgumentException();
         }
         checkCharsetName(charsetName);
         cs = _builtInProvider.charsetForName(charsetName);
         if (cs != null) {
             cacheCharset(cs);
         }
         return cs;
     }

     /*
      * save charset into cachedCharsetTable
      */
     private synchronized static void cacheCharset(Charset cs) {
         // Cache the Charset by its canonical name...
         String canonicalName = cs.name();
         if (!cachedCharsetTable.containsKey(canonicalName)) {
             cachedCharsetTable.put(canonicalName, cs);
         }
         // And all its aliases...
         for (String alias : cs.aliasesSet) {
             if (!cachedCharsetTable.containsKey(alias)) {
                 cachedCharsetTable.put(alias, cs);
             }
         }
     }

     /**
      * Gets a <code>Charset</code> instance for the specified charset name.
      *
      * @param charsetName
      *            the canonical name of the charset or an alias.
      * @return a <code>Charset</code> instance for the specified charset name.
      * @throws IllegalCharsetNameException
      *             if the specified charset name is illegal.
      * @throws UnsupportedCharsetException
      *             if the desired charset is not supported by this runtime.
      */
     public static Charset forName(String charsetName) {
         Charset c = forNameInternal(charsetName);
         if (c == null) {
             throw new UnsupportedCharsetException(charsetName);
         }
         return c;
     }

     /**
      * Determines whether the specified charset is supported by this runtime.
      *
      * @param charsetName
      *            the name of the charset.
      * @return true if the specified charset is supported, otherwise false.
      * @throws IllegalCharsetNameException
      *             if the specified charset name is illegal.
      */
     public static synchronized boolean isSupported(String charsetName) {
         return forNameInternal(charsetName) != null;
     }

     /**
      * Determines whether this charset is a super set of the given charset.
      *
      * @param charset
      *            a given charset.
      * @return true if this charset is a super set of the given charset,
      *         false if it's unknown or this charset is not a superset of
      *         the given charset.
      */
     public abstract boolean contains(Charset charset);

     /**
      * Gets a new instance of an encoder for this charset.
      *
      * @return a new instance of an encoder for this charset.
      */
     public abstract CharsetEncoder newEncoder();

     /**
      * Gets a new instance of a decoder for this charset.
      *
      * @return a new instance of a decoder for this charset.
      */
     public abstract CharsetDecoder newDecoder();

     /**
      * Gets the canonical name of this charset.
      *
      * @return this charset's name in canonical form.
      */
     public final String name() {
         return this.canonicalName;
     }

     /**
      * Gets the set of this charset's aliases.
      *
      * @return an unmodifiable set of this charset's aliases.
      */
     public final Set<String> aliases() {
         return Collections.unmodifiableSet(this.aliasesSet);
     }

     /**
      * Gets the name of this charset for the default locale.
      *
      * <p>The default implementation returns the canonical name of this charset.
      * Subclasses may return a localized display name.
      *
      * @return the name of this charset for the default locale.
      */
     public String displayName() {
         return this.canonicalName;
     }

     /**
      * Gets the name of this charset for the specified locale.
      *
      * <p>The default implementation returns the canonical name of this charset.
      * Subclasses may return a localized display name.
      *
      * @param l
      *            a certain locale
      * @return the name of this charset for the specified locale
      */
     public String displayName(Locale l) {
         return this.canonicalName;
     }

     /**
      * Indicates whether this charset is known to be registered in the IANA
      * Charset Registry.
      *
      * @return true if the charset is known to be registered, otherwise returns
      *         false.
      */
     public final boolean isRegistered() {
         return !canonicalName.startsWith("x-") //$NON-NLS-1$
                 && !canonicalName.startsWith("X-"); //$NON-NLS-1$
     }

     /**
      * Returns true if this charset supports encoding, false otherwise.
      *
      * @return true if this charset supports encoding, false otherwise.
      */
     public boolean canEncode() {
         return true;
     }

     /**
      * Encodes the content of the give character buffer and outputs to a byte
      * buffer that is to be returned.
      * <p>
      * The default action in case of encoding errors is
      * <code>CodingErrorAction.REPLACE</code>.
      *
      * @param buffer
      *            the character buffer containing the content to be encoded.
      * @return the result of the encoding.
      */
     public final ByteBuffer encode(CharBuffer buffer) {
         try {
             return this.newEncoder()
                     .onMalformedInput(CodingErrorAction.REPLACE)
                     .onUnmappableCharacter(CodingErrorAction.REPLACE).encode(
                             buffer);

         } catch (CharacterCodingException ex) {
             throw new Error(ex.getMessage(), ex);
         }
     }

     /**
      * Encodes a string and outputs to a byte buffer that is to be returned.
      * <p>
      * The default action in case of encoding errors is
      * <code>CodingErrorAction.REPLACE</code>.
      *
      * @param s
      *            the string to be encoded.
      * @return the result of the encoding.
      */
     public final ByteBuffer encode(String s) {
         return encode(CharBuffer.wrap(s));
     }

     /**
      * Decodes the content of the specified byte buffer and writes it to a
      * character buffer that is to be returned.
      * <p>
      * The default action in case of decoding errors is
      * <code>CodingErrorAction.REPLACE</code>.
      *
      * @param buffer
      *            the byte buffer containing the content to be decoded.
      * @return a character buffer containing the output of the decoding.
      */
     public final CharBuffer decode(ByteBuffer buffer) {

         try {
             return this.newDecoder()
                     .onMalformedInput(CodingErrorAction.REPLACE)
                     .onUnmappableCharacter(CodingErrorAction.REPLACE).decode(
                             buffer);

         } catch (CharacterCodingException ex) {
             throw new Error(ex.getMessage(), ex);
         }
     }

     /*
      * -------------------------------------------------------------------
      * Methods implementing parent interface Comparable
      * -------------------------------------------------------------------
      */

     /**
      * Compares this charset with the given charset. This comparation is
      * based on the case insensitive canonical names of the charsets.
      *
      * @param charset
      *            the given object to be compared with.
      * @return a negative integer if less than the given object, a positive
      *         integer if larger than it, or 0 if equal to it.
      */
     public final int compareTo(Charset charset) {
         return this.canonicalName.compareToIgnoreCase(charset.canonicalName);
     }

     /*
      * -------------------------------------------------------------------
      * Methods overriding parent class Object
      * -------------------------------------------------------------------
      */

     /**
      * Determines whether this charset equals to the given object. They are
      * considered to be equal if they have the same canonical name.
      *
      * @param obj
      *            the given object to be compared with.
      * @return true if they have the same canonical name, otherwise false.
      */
     @Override
     public final boolean equals(Object obj) {
         if (obj instanceof Charset) {
             Charset that = (Charset) obj;
             return this.canonicalName.equals(that.canonicalName);
         }
         return false;
     }

     /**
      * Gets the hash code of this charset.
      *
      * @return the hash code of this charset.
      */
     @Override
     public final int hashCode() {
         return this.canonicalName.hashCode();
     }

     /**
      * Gets a string representation of this charset. Usually this contains the
      * canonical name of the charset.
      *
      * @return a string representation of this charset.
      */
     @Override
     public final String toString() {
         return "Charset[" + this.canonicalName + "]"; //$NON-NLS-1$//$NON-NLS-2$
     }

     /**
      * Gets the system default charset from the virtual machine.
      *
      * @return the default charset.
      */
     public static Charset defaultCharset() {
         Charset defaultCharset = null;
         String encoding = AccessController
                 .doPrivileged(new PrivilegedAction<String>() {
                     public String run() {
                         return System.getProperty("file.encoding"); //$NON-NLS-1$
                     }
                 });
         try {
             defaultCharset = Charset.forName(encoding);
         } catch (UnsupportedCharsetException e) {
             defaultCharset = Charset.forName("UTF-8"); //$NON-NLS-1$
         }
         return defaultCharset;
     }
 }