| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package java.util; |
| |
| import java.io.IOException; |
| import java.io.ObjectInputStream; |
| import java.io.ObjectOutputStream; |
| import java.io.ObjectStreamField; |
| import java.io.Serializable; |
| import java.nio.charset.StandardCharsets; |
| import libcore.icu.ICU; |
| |
| /** |
| * {@code Locale} represents a language/country/variant combination. Locales are used to |
| * alter the presentation of information such as numbers or dates to suit the conventions |
| * in the region they describe. |
| * |
| * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by |
| * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>. |
| * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by |
| * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>. |
| * The variant codes are unspecified. |
| * |
| * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language |
| * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This |
| * rewriting happens even if you construct your own {@code Locale} object, not just for |
| * instances returned by the various lookup methods. |
| * |
| * <a name="available_locales"><h3>Available locales</h3></a> |
| * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages |
| * and countries that don't exist, and you can create instances for combinations that don't |
| * exist (such as "de_US" for "German as spoken in the US"). |
| * |
| * <p>Note that locale data is not necessarily available for any of the locales pre-defined as |
| * constants in this class except for en_US, which is the only locale Java guarantees is always |
| * available. |
| * |
| * <p>It is also a mistake to assume that all devices have the same locales available. |
| * A device sold in the US will almost certainly support en_US and es_US, but not necessarily |
| * any locales with the same language but different countries (such as en_GB or es_ES), |
| * nor any locales for other languages (such as de_DE). The opposite may well be true for a device |
| * sold in Europe. |
| * |
| * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the |
| * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales |
| * available on the device you're running on. |
| * |
| * <a name="locale_data"><h3>Locale data</h3></a> |
| * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using |
| * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported. |
| * |
| * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in |
| * various Android releases: |
| * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY=""> |
| * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td> |
| * <td>ICU 3.8</td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr> |
| * <tr><td>Android 2.2 (Froyo)</td> |
| * <td>ICU 4.2</td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr> |
| * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td> |
| * <td>ICU 4.4</td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr> |
| * <tr><td>Android 4.0 (Ice Cream Sandwich)</td> |
| * <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> |
| * <tr><td>Android 4.1 (Jelly Bean)</td> |
| * <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr> |
| * <tr><td>Android 4.3 (Jelly Bean MR2)</td> |
| * <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> |
| * <tr><td>Android 4.4 (KitKat)</td> |
| * <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr> |
| * <tr><td>Android 4.? (STOPSHIP)</td> |
| * <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td> |
| * <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td> |
| * <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr> |
| * </table> |
| * |
| * <a name="default_locale"><h3>Be wary of the default locale</h3></a> |
| * <p>Note that there are many convenience methods that automatically use the default locale, but |
| * using them may lead to subtle bugs. |
| * |
| * <p>The default locale is appropriate for tasks that involve presenting data to the user. In |
| * this case, you want to use the user's date/time formats, number |
| * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the |
| * convenience methods. |
| * |
| * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice |
| * there is usually {@code Locale.US} – this locale is guaranteed to be available on all |
| * devices, and the fact that it has no surprising special cases and is frequently used (especially |
| * for computer-computer communication) means that it tends to be the most efficient choice too. |
| * |
| * <p>A common mistake is to implicitly use the default locale when producing output meant to be |
| * machine-readable. This tends to work on the developer's test devices (especially because so many |
| * developers use en_US), but fails when run on a device whose user is in a more complex locale. |
| * |
| * <p>For example, if you're formatting integers some locales will use non-ASCII decimal |
| * digits. As another example, if you're formatting floating-point numbers some locales will use |
| * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for |
| * human-readable output, but likely to cause problems if presented to another |
| * computer ({@link Double#parseDouble} can't parse such a number, for example). |
| * You should also be wary of the {@link String#toLowerCase} and |
| * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example, |
| * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}. |
| * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say, |
| * HTTP headers. |
| */ |
| public final class Locale implements Cloneable, Serializable { |
| |
| private static final long serialVersionUID = 9149081749638150636L; |
| |
| /** |
| * Locale constant for en_CA. |
| */ |
| public static final Locale CANADA = new Locale(true, "en", "CA"); |
| |
| /** |
| * Locale constant for fr_CA. |
| */ |
| public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA"); |
| |
| /** |
| * Locale constant for zh_CN. |
| */ |
| public static final Locale CHINA = new Locale(true, "zh", "CN"); |
| |
| /** |
| * Locale constant for zh. |
| */ |
| public static final Locale CHINESE = new Locale(true, "zh", ""); |
| |
| /** |
| * Locale constant for en. |
| */ |
| public static final Locale ENGLISH = new Locale(true, "en", ""); |
| |
| /** |
| * Locale constant for fr_FR. |
| */ |
| public static final Locale FRANCE = new Locale(true, "fr", "FR"); |
| |
| /** |
| * Locale constant for fr. |
| */ |
| public static final Locale FRENCH = new Locale(true, "fr", ""); |
| |
| /** |
| * Locale constant for de. |
| */ |
| public static final Locale GERMAN = new Locale(true, "de", ""); |
| |
| /** |
| * Locale constant for de_DE. |
| */ |
| public static final Locale GERMANY = new Locale(true, "de", "DE"); |
| |
| /** |
| * Locale constant for it. |
| */ |
| public static final Locale ITALIAN = new Locale(true, "it", ""); |
| |
| /** |
| * Locale constant for it_IT. |
| */ |
| public static final Locale ITALY = new Locale(true, "it", "IT"); |
| |
| /** |
| * Locale constant for ja_JP. |
| */ |
| public static final Locale JAPAN = new Locale(true, "ja", "JP"); |
| |
| /** |
| * Locale constant for ja. |
| */ |
| public static final Locale JAPANESE = new Locale(true, "ja", ""); |
| |
| /** |
| * Locale constant for ko_KR. |
| */ |
| public static final Locale KOREA = new Locale(true, "ko", "KR"); |
| |
| /** |
| * Locale constant for ko. |
| */ |
| public static final Locale KOREAN = new Locale(true, "ko", ""); |
| |
| /** |
| * Locale constant for zh_CN. |
| */ |
| public static final Locale PRC = new Locale(true, "zh", "CN"); |
| |
| /** |
| * Locale constant for the root locale. The root locale has an empty language, |
| * country, and variant. |
| * |
| * @since 1.6 |
| */ |
| public static final Locale ROOT = new Locale(true, "", ""); |
| |
| /** |
| * Locale constant for zh_CN. |
| */ |
| public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN"); |
| |
| /** |
| * Locale constant for zh_TW. |
| */ |
| public static final Locale TAIWAN = new Locale(true, "zh", "TW"); |
| |
| /** |
| * Locale constant for zh_TW. |
| */ |
| public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW"); |
| |
| /** |
| * Locale constant for en_GB. |
| */ |
| public static final Locale UK = new Locale(true, "en", "GB"); |
| |
| /** |
| * Locale constant for en_US. |
| */ |
| public static final Locale US = new Locale(true, "en", "US"); |
| |
| /** |
| * BCP-47 extension identifier (or "singleton") for the private |
| * use extension. |
| * |
| * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. |
| * |
| * @since 1.7 |
| */ |
| public static final char PRIVATE_USE_EXTENSION = 'x'; |
| |
| /** |
| * BCP-47 extension identifier (or "singleton") for the unicode locale extension. |
| * |
| * |
| * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}. |
| * |
| * @since 1.7 |
| */ |
| public static final char UNICODE_LOCALE_EXTENSION = 'u'; |
| |
| /** |
| * ISO 639-3 generic code for undetermined languages. |
| */ |
| private static final String UNDETERMINED_LANGUAGE = "und"; |
| |
| /** |
| * The current default locale. It is temporarily assigned to US because we |
| * need a default locale to lookup the real default locale. |
| */ |
| private static Locale defaultLocale = US; |
| |
| static { |
| String language = System.getProperty("user.language", "en"); |
| String region = System.getProperty("user.region", "US"); |
| String variant = System.getProperty("user.variant", ""); |
| defaultLocale = new Locale(language, region, variant); |
| } |
| |
| /** |
| * A class that helps construct {@link Locale} instances. |
| * |
| * Unlike the public {@code Locale} constructors, the methods of this class |
| * perform much stricter checks on their input. |
| * |
| * Validity checks on the {@code language}, {@code country}, {@code variant} |
| * and {@code extension} values are carried out as per the |
| * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification. |
| * |
| * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/"> |
| * Unicode locale extension</a> specially and provide methods to manipulate |
| * the structured state (keywords and attributes) specified therein. |
| * |
| * @since 1.7 |
| */ |
| public static final class Builder { |
| private String language; |
| private String region; |
| private String variant; |
| private String script; |
| |
| private final Set<String> attributes; |
| private final Map<String, String> keywords; |
| private final Map<Character, String> extensions; |
| |
| public Builder() { |
| language = region = variant = script = ""; |
| |
| // NOTE: We use sorted maps in the builder & the locale class itself |
| // because serialized forms of the unicode locale extension (and |
| // of the extension map itself) are specified to be in alphabetic |
| // order of keys. |
| attributes = new TreeSet<String>(); |
| keywords = new TreeMap<String, String>(); |
| extensions = new TreeMap<Character, String>(); |
| } |
| |
| /** |
| * Sets the locale language. If {@code language} is {@code null} or empty, the |
| * previous value is cleared. |
| * |
| * As per BCP-47, the language must be between 2 and 3 ASCII characters |
| * in length and must only contain characters in the range {@code [a-zA-Z]}. |
| * |
| * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/"> |
| * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are |
| * carried out that it's a valid code in that namespace. |
| * |
| * Values are normalized to lower case. |
| * |
| * Note that we don't support BCP-47 "extlang" languages because they were |
| * only ever used to substitute for a lack of 3 letter language codes. |
| * |
| * @throws IllformedLocaleException if the language was invalid. |
| */ |
| public Builder setLanguage(String language) { |
| this.language = normalizeAndValidateLanguage(language, true /* strict */); |
| return this; |
| } |
| |
| private static String normalizeAndValidateLanguage(String language, boolean strict) { |
| if (language == null || language.isEmpty()) { |
| return ""; |
| } |
| |
| final String lowercaseLanguage = language.toLowerCase(Locale.ROOT); |
| if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) { |
| if (strict) { |
| throw new IllformedLocaleException("Invalid language: " + language); |
| } else { |
| return UNDETERMINED_LANGUAGE; |
| } |
| } |
| |
| return lowercaseLanguage; |
| } |
| |
| /** |
| * Set the state of this builder to the parsed contents of the BCP-47 language |
| * tag {@code languageTag}. |
| * |
| * This method is equivalent to a call to {@link #clear} if {@code languageTag} |
| * is {@code null} or empty. |
| * |
| * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which |
| * simply ignores malformed input, this method will throw an exception if |
| * its input is malformed. |
| * |
| * @throws IllformedLocaleException if {@code languageTag} is not a well formed |
| * BCP-47 tag. |
| */ |
| public Builder setLanguageTag(String languageTag) { |
| if (languageTag == null || languageTag.isEmpty()) { |
| clear(); |
| return this; |
| } |
| |
| final Locale fromIcu = forLanguageTag(languageTag, true /* strict */); |
| // When we ask ICU for strict parsing, it might return a null locale |
| // if the language tag is malformed. |
| if (fromIcu == null) { |
| throw new IllformedLocaleException("Invalid languageTag: " + languageTag); |
| } |
| |
| setLocale(fromIcu); |
| return this; |
| } |
| |
| /** |
| * Sets the locale region. If {@code region} is {@code null} or empty, the |
| * previous value is cleared. |
| * |
| * As per BCP-47, the region must either be a 2 character ISO-3166-1 code |
| * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code. |
| * |
| * Values are normalized to upper case. |
| * |
| * @throws IllformedLocaleException if {@code} region is invalid. |
| */ |
| public Builder setRegion(String region) { |
| this.region = normalizeAndValidateRegion(region, true /* strict */); |
| return this; |
| } |
| |
| private static String normalizeAndValidateRegion(String region, boolean strict) { |
| if (region == null || region.isEmpty()) { |
| return ""; |
| } |
| |
| final String uppercaseRegion = region.toUpperCase(Locale.ROOT); |
| if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) && |
| !isUnM49AreaCode(uppercaseRegion)) { |
| if (strict) { |
| throw new IllformedLocaleException("Invalid region: " + region); |
| } else { |
| return ""; |
| } |
| } |
| |
| return uppercaseRegion; |
| } |
| |
| /** |
| * Sets the locale variant. If {@code variant} is {@code null} or empty, |
| * the previous value is cleared. |
| * |
| * The input string my consist of one or more variants separated by |
| * valid separators ('-' or '_'). |
| * |
| * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters |
| * in length (each character in the range {@code [a-zA-Z0-9]}) but |
| * can be exactly 4 characters in length if the first character is a digit. |
| * |
| * Note that this is a much stricter interpretation of {@code variant} |
| * than the public {@code Locale} constructors. The latter allowed free form |
| * variants. |
| * |
| * Variants are case sensitive and all separators are normalized to {@code '_'}. |
| * |
| * @throws IllformedLocaleException if {@code} variant is invalid. |
| */ |
| public Builder setVariant(String variant) { |
| this.variant = normalizeAndValidateVariant(variant); |
| return this; |
| } |
| |
| private static String normalizeAndValidateVariant(String variant) { |
| if (variant == null || variant.isEmpty()) { |
| return ""; |
| } |
| |
| // Note that unlike extensions, we canonicalize to lower case alphabets |
| // and underscores instead of hyphens. |
| final String normalizedVariant = variant.replace('-', '_'); |
| String[] subTags = normalizedVariant.split("_"); |
| |
| for (String subTag : subTags) { |
| if (!isValidVariantSubtag(subTag)) { |
| throw new IllformedLocaleException("Invalid variant: " + variant); |
| } |
| } |
| |
| return normalizedVariant; |
| } |
| |
| private static boolean isValidVariantSubtag(String subTag) { |
| // The BCP-47 spec states that : |
| // - Subtags can be between [5, 8] alphanumeric chars in length. |
| // - Subtags that start with a number are allowed to be 4 chars in length. |
| if (subTag.length() >= 5 && subTag.length() <= 8) { |
| if (isAsciiAlphaNum(subTag)) { |
| return true; |
| } |
| } else if (subTag.length() == 4) { |
| final char firstChar = subTag.charAt(0); |
| if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Sets the locale script. If {@code script} is {@code null} or empty, |
| * the previous value is cleared. |
| * |
| * As per BCP-47, the script must be 4 characters in length, and |
| * each character in the range {@code [a-zA-Z]}. |
| * |
| * A script usually represents a valid ISO 15924 script code, though no |
| * other registry or validity checks are performed. |
| * |
| * Scripts are normalized to title cased values. |
| * |
| * @throws IllformedLocaleException if {@code script} is invalid. |
| */ |
| public Builder setScript(String script) { |
| this.script = normalizeAndValidateScript(script, true /* strict */); |
| return this; |
| } |
| |
| private static String normalizeAndValidateScript(String script, boolean strict) { |
| if (script == null || script.isEmpty()) { |
| return ""; |
| } |
| |
| if (!isValidBcp47Alpha(script, 4, 4)) { |
| if (strict) { |
| throw new IllformedLocaleException("Invalid script: " + script); |
| } else { |
| return ""; |
| } |
| } |
| |
| return titleCaseAsciiWord(script); |
| } |
| |
| /** |
| * Sets the state of the builder to the {@link Locale} represented by |
| * {@code locale}. |
| * |
| * Note that the locale's language, region and variant are validated as per |
| * the rules specified in {@link #setLanguage}, {@link #setRegion} and |
| * {@link #setVariant}. |
| * |
| * All existing builder state is discarded. |
| * |
| * @throws IllformedLocaleException if {@code locale} is invalid. |
| * @throws NullPointerException if {@code locale} is null. |
| */ |
| public Builder setLocale(Locale locale) { |
| if (locale == null) { |
| throw new NullPointerException("locale == null"); |
| } |
| |
| // Make copies of the existing values so that we don't partially |
| // update the state if we encounter an error. |
| final String backupLanguage = language; |
| final String backupRegion = region; |
| final String backupVariant = variant; |
| |
| try { |
| setLanguage(locale.getLanguage()); |
| setRegion(locale.getCountry()); |
| setVariant(locale.getVariant()); |
| } catch (IllformedLocaleException ifle) { |
| language = backupLanguage; |
| region = backupRegion; |
| variant = backupVariant; |
| |
| throw ifle; |
| } |
| |
| // The following values can be set only via the builder class, so |
| // there's no need to normalize them or check their validity. |
| |
| this.script = locale.getScript(); |
| |
| extensions.clear(); |
| extensions.putAll(locale.extensions); |
| |
| keywords.clear(); |
| keywords.putAll(locale.unicodeKeywords); |
| |
| attributes.clear(); |
| attributes.addAll(locale.unicodeAttributes); |
| |
| return this; |
| } |
| |
| /** |
| * Adds the specified attribute to the list of attributes in the unicode |
| * locale extension. |
| * |
| * Attributes must be between 3 and 8 characters in length, and each character |
| * must be in the range {@code [a-zA-Z0-9]}. |
| * |
| * Attributes are normalized to lower case values. All added attributes and |
| * keywords are combined to form a complete unicode locale extension on |
| * {@link Locale} objects built by this builder, and accessible via |
| * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} |
| * key. |
| * |
| * @throws IllformedLocaleException if {@code attribute} is invalid. |
| * @throws NullPointerException if {@code attribute} is null. |
| */ |
| public Builder addUnicodeLocaleAttribute(String attribute) { |
| if (attribute == null) { |
| throw new NullPointerException("attribute == null"); |
| } |
| |
| final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); |
| if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { |
| throw new IllformedLocaleException("Invalid locale attribute: " + attribute); |
| } |
| |
| attributes.add(lowercaseAttribute); |
| |
| return this; |
| } |
| |
| /** |
| * Removes an attribute from the list of attributes in the unicode locale |
| * extension. |
| * |
| * {@code attribute} must be valid as per the rules specified in |
| * {@link #addUnicodeLocaleAttribute}. |
| * |
| * This method has no effect if {@code attribute} hasn't already been |
| * added. |
| * |
| * @throws IllformedLocaleException if {@code attribute} is invalid. |
| * @throws NullPointerException if {@code attribute} is null. |
| */ |
| public Builder removeUnicodeLocaleAttribute(String attribute) { |
| if (attribute == null) { |
| throw new NullPointerException("attribute == null"); |
| } |
| |
| // Weirdly, remove is specified to check whether the attribute |
| // is valid, so we have to perform the full alphanumeric check here. |
| final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT); |
| if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) { |
| throw new IllformedLocaleException("Invalid locale attribute: " + attribute); |
| } |
| |
| attributes.remove(attribute); |
| return this; |
| } |
| |
| /** |
| * Sets the extension identified by {@code key} to {@code value}. |
| * |
| * {@code key} must be in the range {@code [a-zA-Z0-9]}. |
| * |
| * If {@code value} is {@code null} or empty, the extension is removed. |
| * |
| * In the general case, {@code value} must be a series of subtags separated |
| * by ({@code "-"} or {@code "_"}). Each subtag must be between |
| * 2 and 8 characters in length, and each character in the subtag must be in |
| * the range {@code [a-zA-Z0-9]}. |
| * |
| * <p> |
| * There are two special cases : |
| * <li> |
| * <ul> |
| * The unicode locale extension |
| * ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting |
| * the unicode locale extension results in all existing keyword and attribute |
| * state being replaced by the parsed result of {@code value}. For example, |
| * {@code builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")} |
| * is equivalent to: |
| * <pre> |
| * builder.addUnicodeLocaleAttribute("baaaz"); |
| * builder.addUnicodeLocaleAttribute("baaar"); |
| * builder.setUnicodeLocaleKeyword("fo", "baar"); |
| * builder.setUnicodeLocaleKeyword("ba", "baaa"); |
| * </pre> |
| * </ul> |
| * <ul> |
| * The private use extension |
| * ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a |
| * private use extension can be between 1 and 8 characters in length (in contrast |
| * to a minimum length of 2 for all other extensions). |
| * </ul> |
| * </li> |
| * |
| * @throws IllformedLocaleException if {@code value} is invalid. |
| */ |
| public Builder setExtension(char key, String value) { |
| if (value == null || value.isEmpty()) { |
| extensions.remove(key); |
| return this; |
| } |
| |
| final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-'); |
| final String[] subtags = normalizedValue.split("-"); |
| |
| // Lengths for subtags in the private use extension should be [1, 8] chars. |
| // For all other extensions, they should be [2, 8] chars. |
| // |
| // http://www.rfc-editor.org/rfc/bcp/bcp47.txt |
| final int minimumLength = (key == PRIVATE_USE_EXTENSION) ? 1 : 2; |
| for (String subtag : subtags) { |
| if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) { |
| throw new IllformedLocaleException( |
| "Invalid private use extension : " + value); |
| } |
| } |
| |
| // We need to take special action in the case of unicode extensions, |
| // since we claim to understand their keywords and attributes. |
| if (key == UNICODE_LOCALE_EXTENSION) { |
| // First clear existing attributes and keywords. |
| extensions.clear(); |
| attributes.clear(); |
| |
| parseUnicodeExtension(subtags, keywords, attributes); |
| } else { |
| extensions.put(key, normalizedValue); |
| } |
| |
| return this; |
| } |
| |
| /** |
| * Clears all extensions from this builder. Note that this also implicitly |
| * clears all state related to the unicode locale extension; all attributes |
| * and keywords set by {@link #addUnicodeLocaleAttribute} and |
| * {@link #setUnicodeLocaleKeyword} are cleared. |
| */ |
| public Builder clearExtensions() { |
| extensions.clear(); |
| attributes.clear(); |
| keywords.clear(); |
| return this; |
| } |
| |
| /** |
| * Adds a key / type pair to the list of unicode locale extension keys. |
| * |
| * {@code key} must be 2 characters in length, and each character must be |
| * in the range {@code [a-zA-Z0-9]}. |
| * |
| * {#code type} can either be empty, or a series of one or more subtags |
| * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must |
| * be between 3 and 8 characters in length and each character in the subtag |
| * must be in the range {@code [a-zA-Z0-9]}. |
| * |
| * Note that the type is normalized to lower case, and all separators |
| * are normalized to {@code "-"}. All added attributes and |
| * keywords are combined to form a complete unicode locale extension on |
| * {@link Locale} objects built by this builder, and accessible via |
| * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION} |
| * key. |
| * |
| * @throws IllformedLocaleException if {@code key} or {@code value} are |
| * invalid. |
| */ |
| public Builder setUnicodeLocaleKeyword(String key, String type) { |
| if (key == null) { |
| throw new NullPointerException("key == null"); |
| } |
| |
| if (type == null && keywords != null) { |
| keywords.remove(key); |
| return this; |
| } |
| |
| final String lowerCaseKey = key.toLowerCase(Locale.ROOT); |
| // The key must be exactly two alphanumeric characters. |
| if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) { |
| throw new IllformedLocaleException("Invalid unicode locale keyword: " + key); |
| } |
| |
| // The type can be one or more alphanumeric strings of length [3, 8] characters, |
| // separated by a separator char, which is one of "_" or "-". Though the spec |
| // doesn't require it, we normalize all "_" to "-" to make the rest of our |
| // processing easier. |
| final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-"); |
| if (!isValidTypeList(lowerCaseType)) { |
| throw new IllformedLocaleException("Invalid unicode locale type: " + type); |
| } |
| |
| // Everything checks out fine, add the <key, type> mapping to the list. |
| keywords.put(lowerCaseKey, lowerCaseType); |
| |
| return this; |
| } |
| |
| /** |
| * Clears all existing state from this builder. |
| */ |
| public Builder clear() { |
| clearExtensions(); |
| language = region = variant = script = ""; |
| |
| return this; |
| } |
| |
| /** |
| * Constructs a locale from the existing state of the builder. Note that this |
| * method is guaranteed to succeed since field validity checks are performed |
| * at the point of setting them. |
| */ |
| public Locale build() { |
| // NOTE: We need to make a copy of attributes, keywords and extensions |
| // because the RI allows this builder to reused. |
| return new Locale(language, region, variant, script, |
| attributes, keywords, extensions, |
| true /* has validated fields */); |
| } |
| } |
| |
| /** |
| * Returns a locale for a given BCP-47 language tag. This method is more |
| * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing |
| * will proceed up to the first malformed subtag. All subsequent tags are discarded. |
| * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}. |
| * |
| * @throws NullPointerException if {@code languageTag} is {@code null}. |
| * |
| * @since 1.7 |
| */ |
| public static Locale forLanguageTag(String languageTag) { |
| if (languageTag == null) { |
| throw new NullPointerException("languageTag == null"); |
| } |
| |
| return forLanguageTag(languageTag, false /* strict */); |
| } |
| |
| private transient String countryCode; |
| private transient String languageCode; |
| private transient String variantCode; |
| private transient String scriptCode; |
| |
| /* Sorted, Unmodifiable */ |
| private transient Set<String> unicodeAttributes; |
| /* Sorted, Unmodifiable */ |
| private transient Map<String, String> unicodeKeywords; |
| /* Sorted, Unmodifiable */ |
| private transient Map<Character, String> extensions; |
| |
| /** |
| * Whether this instance was constructed from a builder. We can make |
| * stronger assumptions about the validity of Locale fields if this was |
| * constructed by a builder. |
| */ |
| private transient final boolean hasValidatedFields; |
| |
| private transient String cachedToStringResult; |
| private transient String cachedLanguageTag; |
| private transient String cachedIcuLocaleId; |
| |
| /** |
| * There's a circular dependency between toLowerCase/toUpperCase and |
| * Locale.US. Work around this by avoiding these methods when constructing |
| * the built-in locales. |
| */ |
| private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode, |
| String upperCaseCountryCode) { |
| this.languageCode = lowerCaseLanguageCode; |
| this.countryCode = upperCaseCountryCode; |
| this.variantCode = ""; |
| this.scriptCode = ""; |
| |
| this.unicodeAttributes = Collections.EMPTY_SET; |
| this.unicodeKeywords = Collections.EMPTY_MAP; |
| this.extensions = Collections.EMPTY_MAP; |
| |
| this.hasValidatedFields = hasValidatedFields; |
| } |
| |
| /** |
| * Constructs a new {@code Locale} using the specified language. |
| */ |
| public Locale(String language) { |
| this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, |
| Collections.EMPTY_MAP, false /* has validated fields */); |
| } |
| |
| /** |
| * Constructs a new {@code Locale} using the specified language and country codes. |
| */ |
| public Locale(String language, String country) { |
| this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP, |
| Collections.EMPTY_MAP, false /* has validated fields */); |
| } |
| |
| /** |
| * Required by libcore.icu.ICU. |
| * |
| * @hide |
| */ |
| public Locale(String language, String country, String variant, String scriptCode, |
| /* nonnull */ Set<String> unicodeAttributes, |
| /* nonnull */ Map<String, String> unicodeKeywords, |
| /* nonnull */ Map<Character, String> extensions, |
| boolean hasValidatedFields) { |
| if (language == null || country == null || variant == null) { |
| throw new NullPointerException("language=" + language + |
| ",country=" + country + |
| ",variant=" + variant); |
| } |
| |
| if (hasValidatedFields) { |
| this.languageCode = adjustLanguageCode(language); |
| this.countryCode = country; |
| this.variantCode = variant; |
| } else { |
| if (language.isEmpty() && country.isEmpty()) { |
| languageCode = ""; |
| countryCode = ""; |
| variantCode = variant; |
| } else { |
| languageCode = adjustLanguageCode(language); |
| countryCode = country.toUpperCase(Locale.US); |
| variantCode = variant; |
| } |
| } |
| |
| this.scriptCode = scriptCode; |
| |
| if (hasValidatedFields) { |
| Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes); |
| Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords); |
| Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions); |
| |
| // We need to transform the list of attributes & keywords set on the |
| // builder to a unicode locale extension. i.e, if we have any keywords |
| // or attributes set, Locale#getExtension('u') should return a well |
| // formed extension. |
| addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy); |
| |
| this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy); |
| this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy); |
| this.extensions = Collections.unmodifiableMap(extensionsCopy); |
| } else { |
| this.unicodeAttributes = unicodeAttributes; |
| this.unicodeKeywords = unicodeKeywords; |
| this.extensions = extensions; |
| } |
| |
| this.hasValidatedFields = hasValidatedFields; |
| } |
| |
| /** |
| * Constructs a new {@code Locale} using the specified language, country, |
| * and variant codes. |
| */ |
| public Locale(String language, String country, String variant) { |
| this(language, country, variant, "", Collections.EMPTY_SET, |
| Collections.EMPTY_MAP, Collections.EMPTY_MAP, |
| false /* has validated fields */); |
| } |
| |
| @Override public Object clone() { |
| try { |
| return super.clone(); |
| } catch (CloneNotSupportedException e) { |
| throw new AssertionError(e); |
| } |
| } |
| |
| /** |
| * Returns true if {@code object} is a locale with the same language, |
| * country and variant. |
| */ |
| @Override public boolean equals(Object object) { |
| if (object == this) { |
| return true; |
| } |
| if (object instanceof Locale) { |
| Locale o = (Locale) object; |
| return languageCode.equals(o.languageCode) |
| && countryCode.equals(o.countryCode) |
| && variantCode.equals(o.variantCode) |
| && scriptCode.equals(o.scriptCode) |
| && extensions.equals(o.extensions); |
| |
| } |
| return false; |
| } |
| |
| /** |
| * Returns the system's installed locales. This array always includes {@code |
| * Locale.US}, and usually several others. Most locale-sensitive classes |
| * offer their own {@code getAvailableLocales} method, which should be |
| * preferred over this general purpose method. |
| * |
| * @see java.text.BreakIterator#getAvailableLocales() |
| * @see java.text.Collator#getAvailableLocales() |
| * @see java.text.DateFormat#getAvailableLocales() |
| * @see java.text.DateFormatSymbols#getAvailableLocales() |
| * @see java.text.DecimalFormatSymbols#getAvailableLocales() |
| * @see java.text.NumberFormat#getAvailableLocales() |
| * @see java.util.Calendar#getAvailableLocales() |
| */ |
| public static Locale[] getAvailableLocales() { |
| return ICU.getAvailableLocales(); |
| } |
| |
| /** |
| * Returns the country code for this locale, or {@code ""} if this locale |
| * doesn't correspond to a specific country. |
| */ |
| public String getCountry() { |
| return countryCode; |
| } |
| |
| /** |
| * Returns the user's preferred locale. This may have been overridden for |
| * this process with {@link #setDefault}. |
| * |
| * <p>Since the user's locale changes dynamically, avoid caching this value. |
| * Instead, use this method to look it up for each use. |
| */ |
| public static Locale getDefault() { |
| return defaultLocale; |
| } |
| |
| /** |
| * Equivalent to {@code getDisplayCountry(Locale.getDefault())}. |
| */ |
| public final String getDisplayCountry() { |
| return getDisplayCountry(getDefault()); |
| } |
| |
| /** |
| * Returns the name of this locale's country, localized to {@code locale}. |
| * Returns the empty string if this locale does not correspond to a specific |
| * country. |
| */ |
| public String getDisplayCountry(Locale locale) { |
| if (countryCode.isEmpty()) { |
| return ""; |
| } |
| |
| final String normalizedRegion = Builder.normalizeAndValidateRegion( |
| countryCode, false /* strict */); |
| if (normalizedRegion.isEmpty()) { |
| return countryCode; |
| } |
| |
| String result = ICU.getDisplayCountry(this, locale); |
| if (result == null) { // TODO: do we need to do this, or does ICU do it for us? |
| result = ICU.getDisplayCountry(this, Locale.getDefault()); |
| } |
| return result; |
| } |
| |
| /** |
| * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}. |
| */ |
| public final String getDisplayLanguage() { |
| return getDisplayLanguage(getDefault()); |
| } |
| |
| /** |
| * Returns the name of this locale's language, localized to {@code locale}. |
| * If the language name is unknown, the language code is returned. |
| */ |
| public String getDisplayLanguage(Locale locale) { |
| if (languageCode.isEmpty()) { |
| return ""; |
| } |
| |
| // Hacks for backward compatibility. |
| // |
| // Our language tag will contain "und" if the languageCode is invalid |
| // or missing. ICU will then return "langue indéterminée" or the equivalent |
| // display language for the indeterminate language code. |
| // |
| // Sigh... ugh... and what not. |
| final String normalizedLanguage = Builder.normalizeAndValidateLanguage( |
| languageCode, false /* strict */); |
| if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) { |
| return languageCode; |
| } |
| |
| // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would |
| // cover the frameworks' tracks when they were using "tl" instead of "fil". |
| String result = ICU.getDisplayLanguage(this, locale); |
| if (result == null) { // TODO: do we need to do this, or does ICU do it for us? |
| result = ICU.getDisplayLanguage(this, Locale.getDefault()); |
| } |
| return result; |
| } |
| |
| /** |
| * Equivalent to {@code getDisplayName(Locale.getDefault())}. |
| */ |
| public final String getDisplayName() { |
| return getDisplayName(getDefault()); |
| } |
| |
| /** |
| * Returns this locale's language name, country name, and variant, localized |
| * to {@code locale}. The exact output form depends on whether this locale |
| * corresponds to a specific language, script, country and variant. |
| * |
| * <p>For example: |
| * <ul> |
| * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English} |
| * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)} |
| * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)} |
| * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)} |
| * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais} |
| * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)} |
| * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}. |
| * </ul> |
| */ |
| public String getDisplayName(Locale locale) { |
| int count = 0; |
| StringBuilder buffer = new StringBuilder(); |
| if (!languageCode.isEmpty()) { |
| String displayLanguage = getDisplayLanguage(locale); |
| buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage); |
| ++count; |
| } |
| if (!scriptCode.isEmpty()) { |
| if (count == 1) { |
| buffer.append(" ("); |
| } |
| String displayScript = getDisplayScript(locale); |
| buffer.append(displayScript.isEmpty() ? scriptCode : displayScript); |
| ++count; |
| } |
| if (!countryCode.isEmpty()) { |
| if (count == 1) { |
| buffer.append(" ("); |
| } else if (count == 2) { |
| buffer.append(","); |
| } |
| String displayCountry = getDisplayCountry(locale); |
| buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry); |
| ++count; |
| } |
| if (!variantCode.isEmpty()) { |
| if (count == 1) { |
| buffer.append(" ("); |
| } else if (count == 2 || count == 3) { |
| buffer.append(","); |
| } |
| String displayVariant = getDisplayVariant(locale); |
| buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant); |
| ++count; |
| } |
| if (count > 1) { |
| buffer.append(")"); |
| } |
| return buffer.toString(); |
| } |
| |
| /** |
| * Returns the full variant name in the default {@code Locale} for the variant code of |
| * this {@code Locale}. If there is no matching variant name, the variant code is |
| * returned. |
| * |
| * @since 1.7 |
| */ |
| public final String getDisplayVariant() { |
| return getDisplayVariant(getDefault()); |
| } |
| |
| /** |
| * Returns the full variant name in the specified {@code Locale} for the variant code |
| * of this {@code Locale}. If there is no matching variant name, the variant code is |
| * returned. |
| * |
| * @since 1.7 |
| */ |
| public String getDisplayVariant(Locale locale) { |
| if (variantCode.isEmpty()) { |
| return ""; |
| } |
| |
| try { |
| Builder.normalizeAndValidateVariant(variantCode); |
| } catch (IllformedLocaleException ilfe) { |
| return variantCode; |
| } |
| |
| String result = ICU.getDisplayVariant(this, locale); |
| if (result == null) { // TODO: do we need to do this, or does ICU do it for us? |
| result = ICU.getDisplayVariant(this, Locale.getDefault()); |
| } |
| |
| // The "old style" locale constructors allow us to pass in variants that aren't |
| // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit |
| // them. Note that we know variantCode.length() > 0 due to the isEmpty check at |
| // the beginning of this function. |
| if (result.isEmpty()) { |
| return variantCode; |
| } |
| return result; |
| } |
| |
| /** |
| * Returns the three-letter ISO 3166 country code which corresponds to the country |
| * code for this {@code Locale}. |
| * @throws MissingResourceException if there's no 3-letter country code for this locale. |
| */ |
| public String getISO3Country() { |
| // The results of getISO3Country do not depend on the languageCode, |
| // so we pass an arbitrarily selected language code here. This guards |
| // against errors caused by malformed or invalid language codes. |
| String code = ICU.getISO3Country("en-" + countryCode); |
| if (!countryCode.isEmpty() && code.isEmpty()) { |
| throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry"); |
| } |
| return code; |
| } |
| |
| /** |
| * Returns the three-letter ISO 639-2/T language code which corresponds to the language |
| * code for this {@code Locale}. |
| * @throws MissingResourceException if there's no 3-letter language code for this locale. |
| */ |
| public String getISO3Language() { |
| // For backward compatibility, we must return "" for an empty language |
| // code and not "und" which is the accurate ISO-639-3 code for an |
| // undetermined language. |
| if (languageCode.isEmpty()) { |
| return ""; |
| } |
| |
| // The results of getISO3Language do not depend on the country code |
| // or any of the other locale fields, so we pass just the language here. |
| String code = ICU.getISO3Language(languageCode); |
| if (!languageCode.isEmpty() && code.isEmpty()) { |
| throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage"); |
| } |
| return code; |
| } |
| |
| /** |
| * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be |
| * used as the country code when constructing a {@code Locale}. |
| */ |
| public static String[] getISOCountries() { |
| return ICU.getISOCountries(); |
| } |
| |
| /** |
| * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be |
| * used as the language code when constructing a {@code Locale}. |
| */ |
| public static String[] getISOLanguages() { |
| return ICU.getISOLanguages(); |
| } |
| |
| /** |
| * Returns the language code for this {@code Locale} or the empty string if no language |
| * was set. |
| */ |
| public String getLanguage() { |
| return languageCode; |
| } |
| |
| /** |
| * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant |
| * was set. |
| */ |
| public String getVariant() { |
| return variantCode; |
| } |
| |
| /** |
| * Returns the script code for this {@code Locale} or an empty {@code String} if no script |
| * was set. |
| * |
| * If set, the script code will be a title cased string of length 4, as per the ISO 15924 |
| * specification. |
| * |
| * @since 1.7 |
| */ |
| public String getScript() { |
| return scriptCode; |
| } |
| |
| /** |
| * Equivalent to {@code getDisplayScript(Locale.getDefault()))} |
| * |
| * @since 1.7 |
| */ |
| public String getDisplayScript() { |
| return getDisplayScript(getDefault()); |
| } |
| |
| /** |
| * Returns the name of this locale's script code, localized to {@link Locale}. If the |
| * script code is unknown, the return value of this method is the same as that of |
| * {@link #getScript()}. |
| * |
| * @since 1.7 |
| */ |
| public String getDisplayScript(Locale locale) { |
| if (scriptCode.isEmpty()) { |
| return ""; |
| } |
| |
| String result = ICU.getDisplayScript(this, locale); |
| if (result == null) { // TODO: do we need to do this, or does ICU do it for us? |
| result = ICU.getDisplayScript(this, Locale.getDefault()); |
| } |
| |
| return result; |
| |
| } |
| |
| /** |
| * Returns a well formed BCP-47 language tag that identifies this locale. |
| * |
| * Note that this locale itself might consist of ill formed fields, since the |
| * public {@code Locale} constructors do not perform validity checks to maintain |
| * backwards compatibility. When this is the case, this method will either replace |
| * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined) |
| * for invalid languages) or omit them altogether. |
| * |
| * Additionally, ill formed variants will result in the remainder of the tag |
| * (both variants and extensions) being moved to the private use extension, |
| * where they will appear after a subtag whose value is {@code "lvariant"}. |
| * |
| * It's also important to note that the BCP-47 tag is well formed in the sense |
| * that it is unambiguously parseable into its specified components. We do not |
| * require that any of the components are registered with the applicable registries. |
| * For example, we do not require scripts to be a registered ISO 15924 scripts or |
| * languages to appear in the ISO-639-2 code list. |
| * |
| * @since 1.7 |
| */ |
| public String toLanguageTag() { |
| if (cachedLanguageTag == null) { |
| cachedLanguageTag = makeLanguageTag(); |
| } |
| |
| return cachedLanguageTag; |
| } |
| |
| /** |
| * Constructs a valid BCP-47 language tag from locale fields. Additional validation |
| * is required when this Locale was not constructed using a Builder and variants |
| * set this way are treated specially. |
| * |
| * In both cases, we convert empty language tags to "und", omit invalid country tags |
| * and perform a special case conversion of "no-NO-NY" to "nn-NO". |
| */ |
| private String makeLanguageTag() { |
| // We only need to revalidate the language, country and variant because |
| // the rest of the fields can only be set via the builder which validates |
| // them anyway. |
| String language = ""; |
| String region = ""; |
| String variant = ""; |
| String illFormedVariantSubtags = ""; |
| |
| if (hasValidatedFields) { |
| language = languageCode; |
| region = countryCode; |
| // Note that we are required to normalize hyphens to underscores |
| // in the builder, but we must use hyphens in the BCP-47 language tag. |
| variant = variantCode.replace('_', '-'); |
| } else { |
| language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */); |
| region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */); |
| |
| try { |
| variant = Builder.normalizeAndValidateVariant(variantCode); |
| } catch (IllformedLocaleException ilfe) { |
| // If our variant is ill formed, we must attempt to split it into |
| // its constituent subtags and preserve the well formed bits and |
| // move the rest to the private use extension (if they're well |
| // formed extension subtags). |
| String split[] = splitIllformedVariant(variantCode); |
| |
| variant = split[0]; |
| illFormedVariantSubtags = split[1]; |
| } |
| } |
| |
| if (language.isEmpty()) { |
| language = UNDETERMINED_LANGUAGE; |
| } |
| |
| if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) { |
| language = "nn"; |
| region = "NO"; |
| variant = ""; |
| } |
| |
| final StringBuilder sb = new StringBuilder(16); |
| sb.append(language); |
| |
| if (!scriptCode.isEmpty()) { |
| sb.append('-'); |
| sb.append(scriptCode); |
| } |
| |
| if (!region.isEmpty()) { |
| sb.append('-'); |
| sb.append(region); |
| } |
| |
| if (!variant.isEmpty()) { |
| sb.append('-'); |
| sb.append(variant); |
| } |
| |
| // Extensions (optional, omitted if empty). Note that we don't |
| // emit the private use extension here, but add it in the end. |
| for (Map.Entry<Character, String> extension : extensions.entrySet()) { |
| if (!extension.getKey().equals('x')) { |
| sb.append('-').append(extension.getKey()); |
| sb.append('-').append(extension.getValue()); |
| } |
| } |
| |
| // The private use extension comes right at the very end. |
| final String privateUse = extensions.get('x'); |
| if (privateUse != null) { |
| sb.append("-x-"); |
| sb.append(privateUse); |
| } |
| |
| // If we have any ill-formed variant subtags, we append them to the |
| // private use extension (or add a private use extension if one doesn't |
| // exist). |
| if (!illFormedVariantSubtags.isEmpty()) { |
| if (privateUse == null) { |
| sb.append("-x-lvariant-"); |
| } else { |
| sb.append('-'); |
| } |
| sb.append(illFormedVariantSubtags); |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** |
| * Splits ill formed variants into a set of valid variant subtags (which |
| * can be used directly in language tag construction) and a set of invalid |
| * variant subtags (which can be appended to the private use extension), |
| * provided that each subtag is a valid private use extension subtag. |
| * |
| * This method returns a two element String array. The first element is a string |
| * containing the concatenation of valid variant subtags which can be appended |
| * to a BCP-47 tag directly and the second containing the concatenation of |
| * invalid variant subtags which can be appended to the private use extension |
| * directly. |
| * |
| * This method assumes that {@code variant} contains at least one ill formed |
| * variant subtag. |
| */ |
| private static String[] splitIllformedVariant(String variant) { |
| final String normalizedVariant = variant.replace('_', '-'); |
| final String[] subTags = normalizedVariant.split("-"); |
| |
| final String[] split = new String[] { "", "" }; |
| |
| // First go through the list of variant subtags and check if they're |
| // valid private use extension subtags. If they're not, we will omit |
| // the first such subtag and all subtags after. |
| // |
| // NOTE: |firstInvalidSubtag| is the index of the first variant |
| // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the |
| // index of the first subtag we decide to append to the private use extension. |
| // |
| // In other words: |
| // [0, firstIllformedSubtag) => expressed as variant subtags. |
| // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use |
| // extension subtags. |
| // [firstInvalidSubtag, subTags.length) => omitted. |
| int firstInvalidSubtag = subTags.length; |
| for (int i = 0; i < subTags.length; ++i) { |
| if (!isValidBcp47Alphanum(subTags[i], 1, 8)) { |
| firstInvalidSubtag = i; |
| break; |
| } |
| } |
| |
| if (firstInvalidSubtag == 0) { |
| return split; |
| } |
| |
| // We now consider each subtag that could potentially be appended to |
| // the private use extension and check if it's valid. |
| int firstIllformedSubtag = firstInvalidSubtag; |
| for (int i = 0; i < firstInvalidSubtag; ++i) { |
| final String subTag = subTags[i]; |
| // The BCP-47 spec states that : |
| // - Subtags can be between [5, 8] alphanumeric chars in length. |
| // - Subtags that start with a number are allowed to be 4 chars in length. |
| if (subTag.length() >= 5 && subTag.length() <= 8) { |
| if (!isAsciiAlphaNum(subTag)) { |
| firstIllformedSubtag = i; |
| } |
| } else if (subTag.length() == 4) { |
| final char firstChar = subTag.charAt(0); |
| if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) { |
| firstIllformedSubtag = i; |
| } |
| } else { |
| firstIllformedSubtag = i; |
| } |
| } |
| |
| split[0] = concatenateRange(subTags, 0, firstIllformedSubtag); |
| split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag); |
| |
| return split; |
| } |
| |
| /** |
| * Builds a string by concatenating array elements within the range [start, end). |
| * The supplied range is assumed to be valid and no checks are performed. |
| */ |
| private static String concatenateRange(String[] array, int start, int end) { |
| StringBuilder builder = new StringBuilder(32); |
| for (int i = start; i < end; ++i) { |
| if (i != start) { |
| builder.append('-'); |
| } |
| builder.append(array[i]); |
| } |
| |
| return builder.toString(); |
| } |
| |
| /** |
| * Returns the set of BCP-47 extensions this locale contains. |
| * |
| * See <a href="https://tools.ietf.org/html/bcp47#section-2.1"> |
| * the IETF BCP-47 specification</a> (Section 2.2.6) for details. |
| * |
| * @since 1.7 |
| */ |
| public Set<Character> getExtensionKeys() { |
| return extensions.keySet(); |
| } |
| |
| /** |
| * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null} |
| * if this locale does not contain the extension. |
| * |
| * Individual Keywords and attributes for the unicode |
| * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()}, |
| * {@link #getUnicodeLocaleKeys()} and {@link #getUnicodeLocaleType}. |
| * |
| * @since 1.7 |
| */ |
| public String getExtension(char extensionKey) { |
| return extensions.get(extensionKey); |
| } |
| |
| /** |
| * Returns the {@code type} for the specified unicode locale extension {@code key}. |
| * |
| * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} |
| * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> |
| * |
| * @since 1.7 |
| */ |
| public String getUnicodeLocaleType(String keyWord) { |
| return unicodeKeywords.get(keyWord); |
| } |
| |
| /** |
| * Returns the set of unicode locale extension attributes this locale contains. |
| * |
| * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute} |
| * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> |
| * |
| * @since 1.7 |
| */ |
| public Set<String> getUnicodeLocaleAttributes() { |
| return unicodeAttributes; |
| } |
| |
| /** |
| * Returns the set of unicode locale extension keywords this locale contains. |
| * |
| * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword} |
| * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a> |
| * |
| * @since 1.7 |
| */ |
| public Set<String> getUnicodeLocaleKeys() { |
| return unicodeKeywords.keySet(); |
| } |
| |
| @Override |
| public synchronized int hashCode() { |
| return countryCode.hashCode() |
| + languageCode.hashCode() + variantCode.hashCode() |
| + scriptCode.hashCode() + extensions.hashCode(); |
| } |
| |
| /** |
| * Overrides the default locale. This does not affect system configuration, |
| * and attempts to override the system-provided default locale may |
| * themselves be overridden by actual changes to the system configuration. |
| * Code that calls this method is usually incorrect, and should be fixed by |
| * passing the appropriate locale to each locale-sensitive method that's |
| * called. |
| */ |
| public synchronized static void setDefault(Locale locale) { |
| if (locale == null) { |
| throw new NullPointerException("locale == null"); |
| } |
| String languageTag = locale.toLanguageTag(); |
| defaultLocale = locale; |
| ICU.setDefaultLocale(languageTag); |
| } |
| |
| /** |
| * Returns the string representation of this {@code Locale}. It consists of the |
| * language code, country code and variant separated by underscores. |
| * If the language is missing the string begins |
| * with an underscore. If the country is missing there are 2 underscores |
| * between the language and the variant. The variant cannot stand alone |
| * without a language and/or country code: in this case this method would |
| * return the empty string. |
| * |
| * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX" |
| */ |
| @Override |
| public final String toString() { |
| String result = cachedToStringResult; |
| if (result == null) { |
| result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode, |
| scriptCode, extensions); |
| } |
| return result; |
| } |
| |
| private static String toNewString(String languageCode, String countryCode, |
| String variantCode, String scriptCode, Map<Character, String> extensions) { |
| // The string form of a locale that only has a variant is the empty string. |
| if (languageCode.length() == 0 && countryCode.length() == 0) { |
| return ""; |
| } |
| |
| // Otherwise, the output format is "ll_cc_variant", where language and country are always |
| // two letters, but the variant is an arbitrary length. A size of 11 characters has room |
| // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost |
| // always 5 characters: "ll_cc".) |
| StringBuilder result = new StringBuilder(11); |
| result.append(languageCode); |
| |
| final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty(); |
| |
| if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) { |
| result.append('_'); |
| } |
| result.append(countryCode); |
| if (!variantCode.isEmpty() || hasScriptOrExtensions) { |
| result.append('_'); |
| } |
| result.append(variantCode); |
| |
| if (hasScriptOrExtensions) { |
| if (!variantCode.isEmpty()) { |
| result.append('_'); |
| } |
| |
| // Note that this is notably different from the BCP-47 spec (for |
| // backwards compatibility). We are forced to append a "#" before the script tag. |
| // and also put the script code right at the end. |
| result.append("#"); |
| if (!scriptCode.isEmpty() ) { |
| result.append(scriptCode); |
| } |
| |
| // Note the use of "-" instead of "_" before the extensions. |
| if (!extensions.isEmpty()) { |
| if (!scriptCode.isEmpty()) { |
| result.append('-'); |
| } |
| result.append(serializeExtensions(extensions)); |
| } |
| } |
| |
| return result.toString(); |
| } |
| |
| private static final ObjectStreamField[] serialPersistentFields = { |
| new ObjectStreamField("country", String.class), |
| new ObjectStreamField("hashcode", int.class), |
| new ObjectStreamField("language", String.class), |
| new ObjectStreamField("variant", String.class), |
| new ObjectStreamField("script", String.class), |
| new ObjectStreamField("extensions", String.class), |
| }; |
| |
| private void writeObject(ObjectOutputStream stream) throws IOException { |
| ObjectOutputStream.PutField fields = stream.putFields(); |
| fields.put("country", countryCode); |
| fields.put("hashcode", -1); |
| fields.put("language", languageCode); |
| fields.put("variant", variantCode); |
| fields.put("script", scriptCode); |
| |
| if (!extensions.isEmpty()) { |
| fields.put("extensions", serializeExtensions(extensions)); |
| } |
| |
| stream.writeFields(); |
| } |
| |
| private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { |
| ObjectInputStream.GetField fields = stream.readFields(); |
| countryCode = (String) fields.get("country", ""); |
| languageCode = (String) fields.get("language", ""); |
| variantCode = (String) fields.get("variant", ""); |
| scriptCode = (String) fields.get("script", ""); |
| |
| this.unicodeKeywords = Collections.EMPTY_MAP; |
| this.unicodeAttributes = Collections.EMPTY_SET; |
| this.extensions = Collections.EMPTY_MAP; |
| |
| String extensions = (String) fields.get("extensions", null); |
| if (extensions != null) { |
| readExtensions(extensions); |
| } |
| } |
| |
| private void readExtensions(String extensions) { |
| Map<Character, String> extensionsMap = new TreeMap<Character, String>(); |
| parseSerializedExtensions(extensions, extensionsMap); |
| this.extensions = Collections.unmodifiableMap(extensionsMap); |
| |
| if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) { |
| String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION); |
| String[] subTags = unicodeExtension.split("-"); |
| |
| Map<String, String> unicodeKeywords = new TreeMap<String, String>(); |
| Set<String> unicodeAttributes = new TreeSet<String>(); |
| parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes); |
| |
| this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords); |
| this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes); |
| } |
| } |
| |
| /** |
| * The serialized form for extensions is straightforward. It's simply |
| * of the form key1-value1-key2-value2 where each value might in turn contain |
| * multiple subtags separated by hyphens. Each key is guaranteed to be a single |
| * character in length. |
| * |
| * This method assumes that {@code extensionsMap} is non-empty. |
| * |
| * Visible for testing. |
| * |
| * @hide |
| */ |
| public static String serializeExtensions(Map<Character, String> extensionsMap) { |
| Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator(); |
| StringBuilder sb = new StringBuilder(64); |
| |
| while (true) { |
| final Map.Entry<Character, String> entry = entryIterator.next(); |
| sb.append(entry.getKey()); |
| sb.append('-'); |
| sb.append(entry.getValue()); |
| |
| if (entryIterator.hasNext()) { |
| sb.append('-'); |
| } else { |
| break; |
| } |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** |
| * Visible for testing. |
| * |
| * @hide |
| */ |
| public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) { |
| // This probably isn't the most efficient approach, but it's the |
| // most straightforward to code. |
| // |
| // Start by splitting the string on "-". We will then keep track of |
| // where each of the extension keys (single characters) appear in the |
| // original string and then use those indices to construct substrings |
| // representing the values. |
| final String[] subTags = extString.split("-"); |
| final int[] typeStartIndices = new int[subTags.length / 2]; |
| |
| int length = 0; |
| int count = 0; |
| for (String subTag : subTags) { |
| if (subTag.length() > 0) { |
| // Account for the length of the "-" at the end of each subtag. |
| length += (subTag.length() + 1); |
| } |
| |
| if (subTag.length() == 1) { |
| typeStartIndices[count++] = length; |
| } |
| } |
| |
| for (int i = 0; i < count; ++i) { |
| final int valueStart = typeStartIndices[i]; |
| // Since the start Index points to the beginning of the next type |
| // ....prev-k-next..... |
| // |_ here |
| // (idx - 2) is the index of the next key |
| // (idx - 3) is the (non inclusive) end of the previous type. |
| final int valueEnd = (i == (count - 1)) ? |
| extString.length() : (typeStartIndices[i + 1] - 3); |
| |
| outputMap.put(extString.charAt(typeStartIndices[i] - 2), |
| extString.substring(valueStart, valueEnd)); |
| } |
| } |
| |
| |
| /** |
| * A UN M.49 is a 3 digit numeric code. |
| */ |
| private static boolean isUnM49AreaCode(String code) { |
| if (code.length() != 3) { |
| return false; |
| } |
| |
| for (int i = 0; i < 3; ++i) { |
| final char character = code.charAt(i); |
| if (!(character >= '0' && character <= '9')) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* |
| * Checks whether a given string is an ASCII alphanumeric string. |
| */ |
| private static boolean isAsciiAlphaNum(String string) { |
| for (int i = 0; i < string.length(); i++) { |
| final char character = string.charAt(i); |
| if (!(character >= 'a' && character <= 'z' || |
| character >= 'A' && character <= 'Z' || |
| character >= '0' && character <= '9')) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) { |
| final int length = string.length(); |
| if (length < lowerBound || length > upperBound) { |
| return false; |
| } |
| |
| for (int i = 0; i < length; ++i) { |
| final char character = string.charAt(i); |
| if (!(character >= 'a' && character <= 'z' || |
| character >= 'A' && character <= 'Z')) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private static boolean isValidBcp47Alphanum(String attributeOrType, |
| int lowerBound, int upperBound) { |
| if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) { |
| return false; |
| } |
| |
| return isAsciiAlphaNum(attributeOrType); |
| } |
| |
| private static String titleCaseAsciiWord(String word) { |
| try { |
| byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII); |
| chars[0] = (byte) ((int) chars[0] + 'A' - 'a'); |
| return new String(chars, StandardCharsets.US_ASCII); |
| } catch (UnsupportedOperationException uoe) { |
| throw new AssertionError(uoe); |
| } |
| } |
| |
| /** |
| * A type list must contain one or more alphanumeric subtags whose lengths |
| * are between 3 and 8. |
| */ |
| private static boolean isValidTypeList(String lowerCaseTypeList) { |
| final String[] splitList = lowerCaseTypeList.split("-"); |
| for (String type : splitList) { |
| if (!isValidBcp47Alphanum(type, 3, 8)) { |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| private static void addUnicodeExtensionToExtensionsMap( |
| Set<String> attributes, Map<String, String> keywords, |
| Map<Character, String> extensions) { |
| if (attributes.isEmpty() && keywords.isEmpty()) { |
| return; |
| } |
| |
| // Assume that the common case is a low number of keywords & attributes |
| // (usually one or two). |
| final StringBuilder sb = new StringBuilder(32); |
| |
| // All attributes must appear before keywords, in lexical order. |
| if (!attributes.isEmpty()) { |
| Iterator<String> attributesIterator = attributes.iterator(); |
| while (true) { |
| sb.append(attributesIterator.next()); |
| if (attributesIterator.hasNext()) { |
| sb.append('-'); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| if (!keywords.isEmpty()) { |
| if (!attributes.isEmpty()) { |
| sb.append('-'); |
| } |
| |
| Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator(); |
| while (true) { |
| final Map.Entry<String, String> keyWord = keywordsIterator.next(); |
| sb.append(keyWord.getKey()); |
| if (!keyWord.getValue().isEmpty()) { |
| sb.append('-'); |
| sb.append(keyWord.getValue()); |
| } |
| if (keywordsIterator.hasNext()) { |
| sb.append('-'); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString()); |
| } |
| |
| /** |
| * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234 |
| * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)). |
| * |
| * It must contain at least one keyword or attribute and attributes (if any) |
| * must appear before keywords. Attributes can't appear after keywords because |
| * they will be indistinguishable from a subtag of the keyword type. |
| * |
| * Visible for testing. |
| * |
| * @hide |
| */ |
| public static void parseUnicodeExtension(String[] subtags, |
| Map<String, String> keywords, Set<String> attributes) { |
| String lastKeyword = null; |
| List<String> subtagsForKeyword = new ArrayList<String>(); |
| for (String subtag : subtags) { |
| if (subtag.length() == 2) { |
| if (subtagsForKeyword.size() > 0) { |
| keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); |
| subtagsForKeyword.clear(); |
| } |
| |
| lastKeyword = subtag; |
| } else if (subtag.length() > 2) { |
| if (lastKeyword == null) { |
| attributes.add(subtag); |
| } else { |
| subtagsForKeyword.add(subtag); |
| } |
| } |
| } |
| |
| if (subtagsForKeyword.size() > 0) { |
| keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword)); |
| } else if (lastKeyword != null) { |
| keywords.put(lastKeyword, ""); |
| } |
| } |
| |
| /** |
| * Joins a list of subtags into a BCP-47 tag using the standard separator |
| * ("-"). |
| */ |
| private static String joinBcp47Subtags(List<String> strings) { |
| final int size = strings.size(); |
| |
| StringBuilder sb = new StringBuilder(strings.get(0).length()); |
| for (int i = 0; i < size; ++i) { |
| sb.append(strings.get(i)); |
| if (i != size - 1) { |
| sb.append('-'); |
| } |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** |
| * @hide for internal use only. |
| */ |
| public static String adjustLanguageCode(String languageCode) { |
| String adjusted = languageCode.toLowerCase(Locale.US); |
| // Map new language codes to the obsolete language |
| // codes so the correct resource bundles will be used. |
| if (languageCode.equals("he")) { |
| adjusted = "iw"; |
| } else if (languageCode.equals("id")) { |
| adjusted = "in"; |
| } else if (languageCode.equals("yi")) { |
| adjusted = "ji"; |
| } |
| |
| return adjusted; |
| } |
| |
| /** |
| * Map of grandfathered language tags to their modern replacements. |
| */ |
| private static final TreeMap<String, String> GRANDFATHERED_LOCALES; |
| |
| static { |
| GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER); |
| |
| // From http://tools.ietf.org/html/bcp47 |
| // |
| // grandfathered = irregular ; non-redundant tags registered |
| // / regular ; during the RFC 3066 era |
| // irregular = |
| GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed"); |
| GRANDFATHERED_LOCALES.put("i-ami", "ami"); |
| GRANDFATHERED_LOCALES.put("i-bnn", "bnn"); |
| GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default"); |
| GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian"); |
| GRANDFATHERED_LOCALES.put("i-hak", "hak"); |
| GRANDFATHERED_LOCALES.put("i-klingon", "tlh"); |
| GRANDFATHERED_LOCALES.put("i-lux", "lb"); |
| GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo"); |
| GRANDFATHERED_LOCALES.put("i-navajo", "nv"); |
| GRANDFATHERED_LOCALES.put("i-pwn", "pwn"); |
| GRANDFATHERED_LOCALES.put("i-tao", "tao"); |
| GRANDFATHERED_LOCALES.put("i-tay", "tay"); |
| GRANDFATHERED_LOCALES.put("i-tsu", "tsu"); |
| GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb"); |
| GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt"); |
| GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg"); |
| |
| // regular = |
| GRANDFATHERED_LOCALES.put("art-lojban", "jbo"); |
| GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish"); |
| GRANDFATHERED_LOCALES.put("no-bok", "nb"); |
| GRANDFATHERED_LOCALES.put("no-nyn", "nn"); |
| GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn"); |
| GRANDFATHERED_LOCALES.put("zh-hakka", "hak"); |
| GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min"); |
| GRANDFATHERED_LOCALES.put("zh-min-nan", "nan"); |
| GRANDFATHERED_LOCALES.put("zh-xiang", "hsn"); |
| } |
| |
| private static String convertGrandfatheredTag(String original) { |
| final String converted = GRANDFATHERED_LOCALES.get(original); |
| return converted != null ? converted : original; |
| } |
| |
| /** |
| * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} |
| * and appends valid variant subtags upto the first invalid subtag (if any) to |
| * {@code normalizedVariants}. |
| */ |
| private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex, |
| List<String> normalizedVariants) { |
| for (int i = startIndex; i < endIndex; i++) { |
| final String subtag = subtags[i]; |
| |
| if (Builder.isValidVariantSubtag(subtag)) { |
| normalizedVariants.add(subtag); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| /** |
| * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)} |
| * and inserts valid extensions into {@code extensions}. The scan is aborted |
| * when an invalid extension is encountered. Returns the index of the first |
| * unparsable element of {@code subtags}. |
| */ |
| private static int extractExtensions(String[] subtags, int startIndex, int endIndex, |
| Map<Character, String> extensions) { |
| int privateUseExtensionIndex = -1; |
| int extensionKeyIndex = -1; |
| |
| int i = startIndex; |
| for (; i < endIndex; i++) { |
| final String subtag = subtags[i]; |
| |
| final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) && |
| (extensionKeyIndex == privateUseExtensionIndex); |
| |
| // Note that private use extensions allow subtags of length 1. |
| // Private use extensions *must* come last, so there's no ambiguity |
| // in that case. |
| if (subtag.length() == 1 && !parsingPrivateUse) { |
| // Emit the last extension we encountered if any. First check |
| // whether we encountered two keys in a row (which is an error). |
| // Also checks if we already have an extension with the same key, |
| // which is again an error. |
| if (extensionKeyIndex != -1) { |
| if ((i - 1) == extensionKeyIndex) { |
| return extensionKeyIndex; |
| } |
| |
| final String key = subtags[extensionKeyIndex]; |
| if (extensions.containsKey(key.charAt(0))) { |
| return extensionKeyIndex; |
| } |
| |
| final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); |
| extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); |
| } |
| |
| // Mark the start of the next extension. Also keep track of whether this |
| // is a private use extension, and throw an error if it doesn't come last. |
| extensionKeyIndex = i; |
| if ("x".equals(subtag)) { |
| privateUseExtensionIndex = i; |
| } else if (privateUseExtensionIndex != -1) { |
| // The private use extension must come last. |
| return privateUseExtensionIndex; |
| } |
| } else if (extensionKeyIndex != -1) { |
| // We must have encountered a valid key in order to start parsing |
| // its subtags. |
| if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) { |
| return i; |
| } |
| } else { |
| // Encountered a value without a preceding key. |
| return i; |
| } |
| } |
| |
| if (extensionKeyIndex != -1) { |
| if ((i - 1) == extensionKeyIndex) { |
| return extensionKeyIndex; |
| } |
| |
| final String key = subtags[extensionKeyIndex]; |
| if (extensions.containsKey(key.charAt(0))) { |
| return extensionKeyIndex; |
| } |
| |
| final String value = concatenateRange(subtags, extensionKeyIndex + 1, i); |
| extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT)); |
| } |
| |
| return i; |
| } |
| |
| private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) { |
| final String converted = convertGrandfatheredTag(tag); |
| final String[] subtags = converted.split("-"); |
| |
| int lastSubtag = subtags.length; |
| for (int i = 0; i < subtags.length; ++i) { |
| final String subtag = subtags[i]; |
| if (subtag.isEmpty() || subtag.length() > 8) { |
| if (strict) { |
| throw new IllformedLocaleException("Invalid subtag at index: " + i |
| + " in tag: " + tag); |
| } else { |
| lastSubtag = (i - 1); |
| } |
| |
| break; |
| } |
| } |
| |
| final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict); |
| String scriptCode = ""; |
| int nextSubtag = 1; |
| if (lastSubtag > nextSubtag) { |
| scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */); |
| if (!scriptCode.isEmpty()) { |
| nextSubtag++; |
| } |
| } |
| |
| String regionCode = ""; |
| if (lastSubtag > nextSubtag) { |
| regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */); |
| if (!regionCode.isEmpty()) { |
| nextSubtag++; |
| } |
| } |
| |
| List<String> variants = null; |
| if (lastSubtag > nextSubtag) { |
| variants = new ArrayList<String>(); |
| extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants); |
| nextSubtag += variants.size(); |
| } |
| |
| Map<Character, String> extensions = Collections.EMPTY_MAP; |
| if (lastSubtag > nextSubtag) { |
| extensions = new TreeMap<Character, String>(); |
| nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions); |
| } |
| |
| if (nextSubtag != lastSubtag) { |
| if (strict) { |
| throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag] |
| + " from language tag: " + tag); |
| } |
| } |
| |
| Set<String> unicodeKeywords = Collections.EMPTY_SET; |
| Map<String, String> unicodeAttributes = Collections.EMPTY_MAP; |
| if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) { |
| unicodeKeywords = new TreeSet<String>(); |
| unicodeAttributes = new TreeMap<String, String>(); |
| parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"), |
| unicodeAttributes, unicodeKeywords); |
| } |
| |
| String variantCode = ""; |
| if (variants != null && !variants.isEmpty()) { |
| StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8); |
| for (int i = 0; i < variants.size(); ++i) { |
| if (i != 0) { |
| variantsBuilder.append('_'); |
| } |
| variantsBuilder.append(variants.get(i)); |
| } |
| variantCode = variantsBuilder.toString(); |
| } |
| |
| return new Locale(languageCode, regionCode, variantCode, scriptCode, |
| unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */); |
| } |
| } |