blob: 4b8849fb1ab4c6847581132c1e0aee130f551c73 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.nio.charset;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.spi.CharsetProvider;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Collections;
import java.util.Comparator;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
// BEGIN android-changed
import com.ibm.icu4jni.charset.CharsetProviderICU;
// END android-changed
/**
* A charset defines a mapping between a Unicode character sequence and a byte
* sequence. It facilitates the encoding from a Unicode character sequence into
* a byte sequence, and the decoding from a byte sequence into a Unicode
* character sequence.
* <p>
* A charset has a canonical name, which is usually in uppercase. Typically it
* also has one or more aliases. The name string can only consist of the
* following characters: '0' - '9', 'A' - 'Z', 'a' - 'z', '.', ':'. '-' and '_'.
* The first character of the name must be a digit or a letter.
* <p>
* The following charsets should be supported by any java platform: US-ASCII,
* ISO-8859-1, UTF-8, UTF-16BE, UTF-16LE, UTF-16.
* <p>
* Additional charsets can be made available by configuring one or more charset
* providers through provider configuration files. Such files are always named
* as "java.nio.charset.spi.CharsetProvider" and located in the
* "META-INF/services" sub folder of one or more classpaths. The files should be
* encoded in "UTF-8". Each line of their content specifies the class name of a
* charset provider which extends
* <code>java.nio.charset.spi.CharsetProvider</code>. A line should end with
* '\r', '\n' or '\r\n'. Leading and trailing whitespaces are trimmed. Blank
* lines, and lines (after trimming) starting with "#" which are regarded as
* comments, are both ignored. Duplicates of names already found are also
* ignored. Both the configuration files and the provider classes will be loaded
* using the thread context class loader.
* <p>
* This class is thread-safe.
*
* @see java.nio.charset.spi.CharsetProvider
*/
public abstract class Charset implements Comparable<Charset> {
/*
* The name of configuration files where charset provider class names can be
* specified.
*/
private static final String PROVIDER_CONFIGURATION_FILE_NAME = "META-INF/services/java.nio.charset.spi.CharsetProvider"; //$NON-NLS-1$
/*
* The encoding of configuration files
*/
private static final String PROVIDER_CONFIGURATION_FILE_ENCODING = "UTF-8"; //$NON-NLS-1$
/*
* The comment string used in configuration files
*/
private static final String PROVIDER_CONFIGURATION_FILE_COMMENT = "#"; //$NON-NLS-1$
private static ClassLoader systemClassLoader;
// built in provider instance, assuming thread-safe
// BEGIN android-changed
private static final CharsetProviderICU _builtInProvider = new CharsetProviderICU();
// END android-changed
// cached built in charsets
private static SortedMap<String, Charset> _builtInCharsets = null;
private final String canonicalName;
// the aliases set
private final HashSet<String> aliasesSet;
// cached Charset table
private final static HashMap<String, Charset> cachedCharsetTable = new HashMap<String, Charset>();
private static boolean inForNameInternal = false;
/**
* Constructs a <code>Charset</code> object. Duplicated aliases are
* ignored.
*
* @param canonicalName
* the canonical name of the charset.
* @param aliases
* an array containing all aliases of the charset. May be null.
* @throws IllegalCharsetNameException
* on an illegal value being supplied for either
* <code>canonicalName</code> or for any element of
* <code>aliases</code>.
*/
protected Charset(String canonicalName, String[] aliases) {
if (null == canonicalName) {
throw new NullPointerException();
}
// check whether the given canonical name is legal
checkCharsetName(canonicalName);
this.canonicalName = canonicalName;
// check each alias and put into a set
this.aliasesSet = new HashSet<String>();
if (aliases != null) {
for (int i = 0; i < aliases.length; i++) {
checkCharsetName(aliases[i]);
this.aliasesSet.add(aliases[i]);
}
}
}
/*
* Checks whether a character is a special character that can be used in
* charset names, other than letters and digits.
*/
private static boolean isSpecial(char c) {
return ('-' == c || '.' == c || ':' == c || '_' == c);
}
/*
* Checks whether a character is a letter (ascii) which are defined in the
* spec.
*/
private static boolean isLetter(char c) {
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}
/*
* Checks whether a character is a digit (ascii) which are defined in the
* spec.
*/
private static boolean isDigit(char c) {
return ('0' <= c && c <= '9');
}
/*
* Checks whether a given string is a legal charset name. The argument name
* should not be null.
*/
private static void checkCharsetName(String name) {
// An empty string is illegal charset name
if (name.length() == 0) {
throw new IllegalCharsetNameException(name);
}
// The first character must be a letter or a digit
// This is related to HARMONY-68 (won't fix)
// char first = name.charAt(0);
// if (!isLetter(first) && !isDigit(first)) {
// throw new IllegalCharsetNameException(name);
// }
// Check the remaining characters
int length = name.length();
for (int i = 0; i < length; i++) {
char c = name.charAt(i);
if (!isLetter(c) && !isDigit(c) && !isSpecial(c)) {
throw new IllegalCharsetNameException(name);
}
}
}
/*
* Use privileged code to get the context class loader.
*/
private static ClassLoader getContextClassLoader() {
final Thread t = Thread.currentThread();
return AccessController
.doPrivileged(new PrivilegedAction<ClassLoader>() {
public ClassLoader run() {
return t.getContextClassLoader();
}
});
}
/*
* Use privileged code to get the system class loader.
*/
private static void getSystemClassLoader() {
if (null == systemClassLoader) {
systemClassLoader = AccessController
.doPrivileged(new PrivilegedAction<ClassLoader>() {
public ClassLoader run() {
return ClassLoader.getSystemClassLoader();
}
});
}
}
/*
* Add the charsets supported by the given provider to the map.
*/
private static void addCharsets(CharsetProvider cp, Map<String, Charset> charsets) {
Iterator<Charset> it = cp.charsets();
while (it.hasNext()) {
Charset cs = it.next();
// Only new charsets will be added
if (!charsets.containsKey(cs.name())) {
charsets.put(cs.name(), cs);
}
}
}
/*
* Trim comment string, and then trim white spaces.
*/
private static String trimClassName(String name) {
String trimedName = name;
int index = name.indexOf(PROVIDER_CONFIGURATION_FILE_COMMENT);
// Trim comments
if (index != -1) {
trimedName = name.substring(0, index);
}
return trimedName.trim();
}
/*
* Read a configuration file and add the charsets supported by the providers
* specified by this configuration file to the map.
*/
private static void loadConfiguredCharsets(URL configFile,
ClassLoader contextClassLoader, Map<String, Charset> charsets) {
BufferedReader reader = null;
try {
InputStream is = configFile.openStream();
// Read each line for charset provider class names
reader = new BufferedReader(new InputStreamReader(is,
PROVIDER_CONFIGURATION_FILE_ENCODING));
String providerClassName = reader.readLine();
while (null != providerClassName) {
providerClassName = trimClassName(providerClassName);
// Skip comments and blank lines
if (providerClassName.length() > 0) { // Non empty string
// Load the charset provider
Object cp = null;
try {
Class<?> c = Class.forName(providerClassName, true,
contextClassLoader);
cp = c.newInstance();
} catch (Exception ex) {
// try to use system classloader when context
// classloader failed to load config file.
try {
getSystemClassLoader();
Class<?> c = Class.forName(providerClassName, true,
systemClassLoader);
cp = c.newInstance();
} catch (Exception e) {
throw new Error(e.getMessage(), e);
}
}
// Put the charsets supported by this provider into the map
addCharsets((CharsetProvider) cp, charsets);
}
// Read the next line of the config file
providerClassName = reader.readLine();
}
} catch (IOException ex) {
// Can't read this configuration file, ignore
} finally {
try {
if (null != reader) {
reader.close();
}
} catch (IOException ex) {
// Ignore closing exception
}
}
}
/**
* Gets a map of all available charsets supported by the runtime.
* <p>
* The returned map contains mappings from canonical names to corresponding
* instances of <code>Charset</code>. The canonical names can be considered
* as case-insensitive.
*
* @return an unmodifiable map of all available charsets supported by the
* runtime
*/
@SuppressWarnings("unchecked")
public static SortedMap<String, Charset> availableCharsets() {
// Initialize the built-in charsets map cache if necessary
if (_builtInCharsets == null) {
synchronized (Charset.class) {
if (_builtInCharsets == null) {
_builtInCharsets = _builtInProvider.initAvailableCharsets();
}
}
}
// Start with the built-in charsets...
SortedMap<String, Charset> charsets = new TreeMap<String, Charset>(_builtInCharsets);
// Add all charsets provided by charset providers...
ClassLoader contextClassLoader = getContextClassLoader();
Enumeration<URL> e = null;
try {
if (contextClassLoader != null) {
e = contextClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
} else {
getSystemClassLoader();
e = systemClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
}
// Examine each configuration file
while (e.hasMoreElements()) {
loadConfiguredCharsets(e.nextElement(), contextClassLoader, charsets);
}
} catch (IOException ex) {
// Unexpected ClassLoader exception, ignore
}
return Collections.unmodifiableSortedMap(charsets);
}
/*
* Read a configuration file and try to find the desired charset among those
* which are supported by the providers specified in this configuration
* file.
*/
private static Charset searchConfiguredCharsets(String charsetName,
ClassLoader contextClassLoader, URL configFile) {
BufferedReader reader = null;
try {
InputStream is = configFile.openStream();
// Read each line for charset provider class names
reader = new BufferedReader(new InputStreamReader(is,
PROVIDER_CONFIGURATION_FILE_ENCODING));
String providerClassName = reader.readLine();
while (null != providerClassName) {
providerClassName = trimClassName(providerClassName);
if (providerClassName.length() > 0) { // Non empty string
// Load the charset provider
Object cp = null;
try {
Class<?> c = Class.forName(providerClassName, true,
contextClassLoader);
cp = c.newInstance();
} catch (Exception ex) {
// try to use system classloader when context
// classloader failed to load config file.
try {
getSystemClassLoader();
Class<?> c = Class.forName(providerClassName, true,
systemClassLoader);
cp = c.newInstance();
} catch (SecurityException e) {
// BEGIN android-changed
// ignore
// END android-changed
} catch (Exception e) {
throw new Error(e.getMessage(), e);
}
}
// BEGIN android-changed
if (cp != null) {
// Try to get the desired charset from this provider
Charset cs = ((CharsetProvider) cp)
.charsetForName(charsetName);
if (null != cs) {
return cs;
}
}
// END android-changed
}
// Read the next line of the config file
providerClassName = reader.readLine();
}
return null;
} catch (IOException ex) {
// Can't read this configuration file
return null;
} finally {
try {
if (null != reader) {
reader.close();
}
} catch (IOException ex) {
// Ignore closing exception
}
}
}
/*
* Gets a <code>Charset</code> instance for the specified charset name. If
* the charset is not supported, returns null instead of throwing an
* exception.
*/
private synchronized static Charset forNameInternal(String charsetName)
throws IllegalCharsetNameException {
Charset cs = lookupCachedOrBuiltInCharset(charsetName);
if (cs != null || inForNameInternal) {
return cs;
}
// collect all charsets provided by charset providers
try {
Enumeration<URL> e = null;
ClassLoader contextClassLoader = getContextClassLoader();
if (contextClassLoader != null) {
e = contextClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
} else {
getSystemClassLoader();
if (systemClassLoader == null) {
// Non available during class library start-up phase
return null;
} else {
e = systemClassLoader.getResources(PROVIDER_CONFIGURATION_FILE_NAME);
}
}
// examine each configuration file
while (e.hasMoreElements()) {
inForNameInternal = true;
cs = searchConfiguredCharsets(charsetName, contextClassLoader, e.nextElement());
inForNameInternal = false;
if (cs != null) {
cacheCharset(cs);
return cs;
}
}
} catch (IOException ex) {
// Unexpected ClassLoader exception, ignore
} finally {
inForNameInternal = false;
}
return null;
}
private synchronized static Charset lookupCachedOrBuiltInCharset(String charsetName) {
Charset cs = cachedCharsetTable.get(charsetName);
if (cs != null) {
return cs;
}
if (charsetName == null) {
throw new IllegalArgumentException();
}
checkCharsetName(charsetName);
cs = _builtInProvider.charsetForName(charsetName);
if (cs != null) {
cacheCharset(cs);
}
return cs;
}
/*
* save charset into cachedCharsetTable
*/
private synchronized static void cacheCharset(Charset cs) {
// Cache the Charset by its canonical name...
String canonicalName = cs.name();
if (!cachedCharsetTable.containsKey(canonicalName)) {
cachedCharsetTable.put(canonicalName, cs);
}
// And all its aliases...
for (String alias : cs.aliasesSet) {
if (!cachedCharsetTable.containsKey(alias)) {
cachedCharsetTable.put(alias, cs);
}
}
}
/**
* Gets a <code>Charset</code> instance for the specified charset name.
*
* @param charsetName
* the canonical name of the charset or an alias.
* @return a <code>Charset</code> instance for the specified charset name.
* @throws IllegalCharsetNameException
* if the specified charset name is illegal.
* @throws UnsupportedCharsetException
* if the desired charset is not supported by this runtime.
*/
public static Charset forName(String charsetName) {
Charset c = forNameInternal(charsetName);
if (c == null) {
throw new UnsupportedCharsetException(charsetName);
}
return c;
}
/**
* Determines whether the specified charset is supported by this runtime.
*
* @param charsetName
* the name of the charset.
* @return true if the specified charset is supported, otherwise false.
* @throws IllegalCharsetNameException
* if the specified charset name is illegal.
*/
public static synchronized boolean isSupported(String charsetName) {
return forNameInternal(charsetName) != null;
}
/**
* Determines whether this charset is a super set of the given charset.
*
* @param charset
* a given charset.
* @return true if this charset is a super set of the given charset,
* false if it's unknown or this charset is not a superset of
* the given charset.
*/
public abstract boolean contains(Charset charset);
/**
* Gets a new instance of an encoder for this charset.
*
* @return a new instance of an encoder for this charset.
*/
public abstract CharsetEncoder newEncoder();
/**
* Gets a new instance of a decoder for this charset.
*
* @return a new instance of a decoder for this charset.
*/
public abstract CharsetDecoder newDecoder();
/**
* Gets the canonical name of this charset.
*
* @return this charset's name in canonical form.
*/
public final String name() {
return this.canonicalName;
}
/**
* Gets the set of this charset's aliases.
*
* @return an unmodifiable set of this charset's aliases.
*/
public final Set<String> aliases() {
return Collections.unmodifiableSet(this.aliasesSet);
}
/**
* Gets the name of this charset for the default locale.
*
* <p>The default implementation returns the canonical name of this charset.
* Subclasses may return a localized display name.
*
* @return the name of this charset for the default locale.
*/
public String displayName() {
return this.canonicalName;
}
/**
* Gets the name of this charset for the specified locale.
*
* <p>The default implementation returns the canonical name of this charset.
* Subclasses may return a localized display name.
*
* @param l
* a certain locale
* @return the name of this charset for the specified locale
*/
public String displayName(Locale l) {
return this.canonicalName;
}
/**
* Indicates whether this charset is known to be registered in the IANA
* Charset Registry.
*
* @return true if the charset is known to be registered, otherwise returns
* false.
*/
public final boolean isRegistered() {
return !canonicalName.startsWith("x-") //$NON-NLS-1$
&& !canonicalName.startsWith("X-"); //$NON-NLS-1$
}
/**
* Returns true if this charset supports encoding, false otherwise.
*
* @return true if this charset supports encoding, false otherwise.
*/
public boolean canEncode() {
return true;
}
/**
* Encodes the content of the give character buffer and outputs to a byte
* buffer that is to be returned.
* <p>
* The default action in case of encoding errors is
* <code>CodingErrorAction.REPLACE</code>.
*
* @param buffer
* the character buffer containing the content to be encoded.
* @return the result of the encoding.
*/
public final ByteBuffer encode(CharBuffer buffer) {
try {
return this.newEncoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE).encode(
buffer);
} catch (CharacterCodingException ex) {
throw new Error(ex.getMessage(), ex);
}
}
/**
* Encodes a string and outputs to a byte buffer that is to be returned.
* <p>
* The default action in case of encoding errors is
* <code>CodingErrorAction.REPLACE</code>.
*
* @param s
* the string to be encoded.
* @return the result of the encoding.
*/
public final ByteBuffer encode(String s) {
return encode(CharBuffer.wrap(s));
}
/**
* Decodes the content of the specified byte buffer and writes it to a
* character buffer that is to be returned.
* <p>
* The default action in case of decoding errors is
* <code>CodingErrorAction.REPLACE</code>.
*
* @param buffer
* the byte buffer containing the content to be decoded.
* @return a character buffer containing the output of the decoding.
*/
public final CharBuffer decode(ByteBuffer buffer) {
try {
return this.newDecoder()
.onMalformedInput(CodingErrorAction.REPLACE)
.onUnmappableCharacter(CodingErrorAction.REPLACE).decode(
buffer);
} catch (CharacterCodingException ex) {
throw new Error(ex.getMessage(), ex);
}
}
/*
* -------------------------------------------------------------------
* Methods implementing parent interface Comparable
* -------------------------------------------------------------------
*/
/**
* Compares this charset with the given charset. This comparation is
* based on the case insensitive canonical names of the charsets.
*
* @param charset
* the given object to be compared with.
* @return a negative integer if less than the given object, a positive
* integer if larger than it, or 0 if equal to it.
*/
public final int compareTo(Charset charset) {
return this.canonicalName.compareToIgnoreCase(charset.canonicalName);
}
/*
* -------------------------------------------------------------------
* Methods overriding parent class Object
* -------------------------------------------------------------------
*/
/**
* Determines whether this charset equals to the given object. They are
* considered to be equal if they have the same canonical name.
*
* @param obj
* the given object to be compared with.
* @return true if they have the same canonical name, otherwise false.
*/
@Override
public final boolean equals(Object obj) {
if (obj instanceof Charset) {
Charset that = (Charset) obj;
return this.canonicalName.equals(that.canonicalName);
}
return false;
}
/**
* Gets the hash code of this charset.
*
* @return the hash code of this charset.
*/
@Override
public final int hashCode() {
return this.canonicalName.hashCode();
}
/**
* Gets a string representation of this charset. Usually this contains the
* canonical name of the charset.
*
* @return a string representation of this charset.
*/
@Override
public final String toString() {
return "Charset[" + this.canonicalName + "]"; //$NON-NLS-1$//$NON-NLS-2$
}
/**
* Gets the system default charset from the virtual machine.
*
* @return the default charset.
*/
public static Charset defaultCharset() {
Charset defaultCharset = null;
String encoding = AccessController
.doPrivileged(new PrivilegedAction<String>() {
public String run() {
return System.getProperty("file.encoding"); //$NON-NLS-1$
}
});
try {
defaultCharset = Charset.forName(encoding);
} catch (UnsupportedCharsetException e) {
defaultCharset = Charset.forName("UTF-8"); //$NON-NLS-1$
}
return defaultCharset;
}
}