luni/src/main/java/java/util/Locale.java - platform/libcore - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 package java.util;

 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.ObjectStreamField;
 import java.io.Serializable;
 import java.nio.charset.StandardCharsets;
 import libcore.icu.ICU;

 /**
  * {@code Locale} represents a language/country/variant combination. Locales are used to
  * alter the presentation of information such as numbers or dates to suit the conventions
  * in the region they describe.
  *
  * <p>The language codes are two-letter lowercase ISO language codes (such as "en") as defined by
  * <a href="http://en.wikipedia.org/wiki/ISO_639-1">ISO 639-1</a>.
  * The country codes are two-letter uppercase ISO country codes (such as "US") as defined by
  * <a href="http://en.wikipedia.org/wiki/ISO_3166-1_alpha-3">ISO 3166-1</a>.
  * The variant codes are unspecified.
  *
  * <p>Note that Java uses several deprecated two-letter codes. The Hebrew ("he") language
  * code is rewritten as "iw", Indonesian ("id") as "in", and Yiddish ("yi") as "ji". This
  * rewriting happens even if you construct your own {@code Locale} object, not just for
  * instances returned by the various lookup methods.
  *
  * <a name="available_locales"><h3>Available locales</h3></a>
  * <p>This class' constructors do no error checking. You can create a {@code Locale} for languages
  * and countries that don't exist, and you can create instances for combinations that don't
  * exist (such as "de_US" for "German as spoken in the US").
  *
  * <p>Note that locale data is not necessarily available for any of the locales pre-defined as
  * constants in this class except for en_US, which is the only locale Java guarantees is always
  * available.
  *
  * <p>It is also a mistake to assume that all devices have the same locales available.
  * A device sold in the US will almost certainly support en_US and es_US, but not necessarily
  * any locales with the same language but different countries (such as en_GB or es_ES),
  * nor any locales for other languages (such as de_DE). The opposite may well be true for a device
  * sold in Europe.
  *
  * <p>You can use {@link Locale#getDefault} to get an appropriate locale for the <i>user</i> of the
  * device you're running on, or {@link Locale#getAvailableLocales} to get a list of all the locales
  * available on the device you're running on.
  *
  * <a name="locale_data"><h3>Locale data</h3></a>
  * <p>Note that locale data comes solely from ICU. User-supplied locale service providers (using
  * the {@code java.text.spi} or {@code java.util.spi} mechanisms) are not supported.
  *
  * <p>Here are the versions of ICU (and the corresponding CLDR and Unicode versions) used in
  * various Android releases:
  * <table BORDER="1" WIDTH="100%" CELLPADDING="3" CELLSPACING="0" SUMMARY="">
  * <tr><td>Android 1.5 (Cupcake)/Android 1.6 (Donut)/Android 2.0 (Eclair)</td>
  *     <td>ICU 3.8</td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-5">CLDR 1.5</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode5.0.0/">Unicode 5.0</a></td></tr>
  * <tr><td>Android 2.2 (Froyo)</td>
  *     <td>ICU 4.2</td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-7">CLDR 1.7</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode5.1.0/">Unicode 5.1</a></td></tr>
  * <tr><td>Android 2.3 (Gingerbread)/Android 3.0 (Honeycomb)</td>
  *     <td>ICU 4.4</td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-8">CLDR 1.8</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode5.2.0/">Unicode 5.2</a></td></tr>
  * <tr><td>Android 4.0 (Ice Cream Sandwich)</td>
  *     <td><a href="http://site.icu-project.org/download/46">ICU 4.6</a></td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-1-9">CLDR 1.9</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
  * <tr><td>Android 4.1 (Jelly Bean)</td>
  *     <td><a href="http://site.icu-project.org/download/48">ICU 4.8</a></td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-2-0">CLDR 2.0</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode6.0.0/">Unicode 6.0</a></td></tr>
  * <tr><td>Android 4.3 (Jelly Bean MR2)</td>
  *     <td><a href="http://site.icu-project.org/download/50">ICU 50</a></td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-22-1">CLDR 22.1</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
  * <tr><td>Android 4.4 (KitKat)</td>
  *     <td><a href="http://site.icu-project.org/download/51">ICU 51</a></td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-23">CLDR 23</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode6.2.0/">Unicode 6.2</a></td></tr>
  * <tr><td>Android 4.? (STOPSHIP)</td>
  *     <td><a href="http://site.icu-project.org/download/53">ICU 53</a></td>
  *     <td><a href="http://cldr.unicode.org/index/downloads/cldr-25">CLDR 25</a></td>
  *     <td><a href="http://www.unicode.org/versions/Unicode6.3.0/">Unicode 6.3</a></td></tr>
  * </table>
  *
  * <a name="default_locale"><h3>Be wary of the default locale</h3></a>
  * <p>Note that there are many convenience methods that automatically use the default locale, but
  * using them may lead to subtle bugs.
  *
  * <p>The default locale is appropriate for tasks that involve presenting data to the user. In
  * this case, you want to use the user's date/time formats, number
  * formats, rules for conversion to lowercase, and so on. In this case, it's safe to use the
  * convenience methods.
  *
  * <p>The default locale is <i>not</i> appropriate for machine-readable output. The best choice
  * there is usually {@code Locale.US}&nbsp;&ndash; this locale is guaranteed to be available on all
  * devices, and the fact that it has no surprising special cases and is frequently used (especially
  * for computer-computer communication) means that it tends to be the most efficient choice too.
  *
  * <p>A common mistake is to implicitly use the default locale when producing output meant to be
  * machine-readable. This tends to work on the developer's test devices (especially because so many
  * developers use en_US), but fails when run on a device whose user is in a more complex locale.
  *
  * <p>For example, if you're formatting integers some locales will use non-ASCII decimal
  * digits. As another example, if you're formatting floating-point numbers some locales will use
  * {@code ','} as the decimal point and {@code '.'} for digit grouping. That's correct for
  * human-readable output, but likely to cause problems if presented to another
  * computer ({@link Double#parseDouble} can't parse such a number, for example).
  * You should also be wary of the {@link String#toLowerCase} and
  * {@link String#toUpperCase} overloads that don't take a {@code Locale}: in Turkey, for example,
  * the characters {@code 'i'} and {@code 'I'} won't be converted to {@code 'I'} and {@code 'i'}.
  * This is the correct behavior for Turkish text (such as user input), but inappropriate for, say,
  * HTTP headers.
  */
 public final class Locale implements Cloneable, Serializable {

     private static final long serialVersionUID = 9149081749638150636L;

     /**
      * Locale constant for en_CA.
      */
     public static final Locale CANADA = new Locale(true, "en", "CA");

     /**
      * Locale constant for fr_CA.
      */
     public static final Locale CANADA_FRENCH = new Locale(true, "fr", "CA");

     /**
      * Locale constant for zh_CN.
      */
     public static final Locale CHINA = new Locale(true, "zh", "CN");

     /**
      * Locale constant for zh.
      */
     public static final Locale CHINESE = new Locale(true, "zh", "");

     /**
      * Locale constant for en.
      */
     public static final Locale ENGLISH = new Locale(true, "en", "");

     /**
      * Locale constant for fr_FR.
      */
     public static final Locale FRANCE = new Locale(true, "fr", "FR");

     /**
      * Locale constant for fr.
      */
     public static final Locale FRENCH = new Locale(true, "fr", "");

     /**
      * Locale constant for de.
      */
     public static final Locale GERMAN = new Locale(true, "de", "");

     /**
      * Locale constant for de_DE.
      */
     public static final Locale GERMANY = new Locale(true, "de", "DE");

     /**
      * Locale constant for it.
      */
     public static final Locale ITALIAN = new Locale(true, "it", "");

     /**
      * Locale constant for it_IT.
      */
     public static final Locale ITALY = new Locale(true, "it", "IT");

     /**
      * Locale constant for ja_JP.
      */
     public static final Locale JAPAN = new Locale(true, "ja", "JP");

     /**
      * Locale constant for ja.
      */
     public static final Locale JAPANESE = new Locale(true, "ja", "");

     /**
      * Locale constant for ko_KR.
      */
     public static final Locale KOREA = new Locale(true, "ko", "KR");

     /**
      * Locale constant for ko.
      */
     public static final Locale KOREAN = new Locale(true, "ko", "");

     /**
      * Locale constant for zh_CN.
      */
     public static final Locale PRC = new Locale(true, "zh", "CN");

     /**
      * Locale constant for the root locale. The root locale has an empty language,
      * country, and variant.
      *
      * @since 1.6
      */
     public static final Locale ROOT = new Locale(true, "", "");

     /**
      * Locale constant for zh_CN.
      */
     public static final Locale SIMPLIFIED_CHINESE = new Locale(true, "zh", "CN");

     /**
      * Locale constant for zh_TW.
      */
     public static final Locale TAIWAN = new Locale(true, "zh", "TW");

     /**
      * Locale constant for zh_TW.
      */
     public static final Locale TRADITIONAL_CHINESE = new Locale(true, "zh", "TW");

     /**
      * Locale constant for en_GB.
      */
     public static final Locale UK = new Locale(true, "en", "GB");

     /**
      * Locale constant for en_US.
      */
     public static final Locale US = new Locale(true, "en", "US");

     /**
      * BCP-47 extension identifier (or "singleton") for the private
      * use extension.
      *
      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
      *
      * @since 1.7
      */
     public static final char PRIVATE_USE_EXTENSION = 'x';

     /**
      * BCP-47 extension identifier (or "singleton") for the unicode locale extension.
      *
      *
      * See {@link #getExtension(char)} and {@link Builder#setExtension(char, String)}.
      *
      * @since 1.7
      */
     public static final char UNICODE_LOCALE_EXTENSION = 'u';

     /**
      * ISO 639-3 generic code for undetermined languages.
      */
     private static final String UNDETERMINED_LANGUAGE = "und";

     /**
      * The current default locale. It is temporarily assigned to US because we
      * need a default locale to lookup the real default locale.
      */
     private static Locale defaultLocale = US;

     static {
         String language = System.getProperty("user.language", "en");
         String region = System.getProperty("user.region", "US");
         String variant = System.getProperty("user.variant", "");
         defaultLocale = new Locale(language, region, variant);
     }

     /**
      * A class that helps construct {@link Locale} instances.
      *
      * Unlike the public {@code Locale} constructors, the methods of this class
      * perform much stricter checks on their input.
      *
      * Validity checks on the {@code language}, {@code country}, {@code variant}
      * and {@code extension} values are carried out as per the
      * <a href="https://tools.ietf.org/html/bcp47">BCP-47</a> specification.
      *
      * In addition, we treat the <a href="http://www.unicode.org/reports/tr35/">
      * Unicode locale extension</a> specially and provide methods to manipulate
      * the structured state (keywords and attributes) specified therein.
      *
      * @since 1.7
      */
     public static final class Builder {
         private String language;
         private String region;
         private String variant;
         private String script;

         private final Set<String> attributes;
         private final Map<String, String> keywords;
         private final Map<Character, String> extensions;

         public Builder() {
             language = region = variant = script = "";

             // NOTE: We use sorted maps in the builder & the locale class itself
             // because serialized forms of the unicode locale extension (and
             // of the extension map itself) are specified to be in alphabetic
             // order of keys.
             attributes = new TreeSet<String>();
             keywords = new TreeMap<String, String>();
             extensions = new TreeMap<Character, String>();
         }

         /**
          * Sets the locale language. If {@code language} is {@code null} or empty, the
          * previous value is cleared.
          *
          * As per BCP-47, the language must be between 2 and 3 ASCII characters
          * in length and must only contain characters in the range {@code [a-zA-Z]}.
          *
          * This value is usually an <a href="http://www.loc.gov/standards/iso639-2/">
          * ISO-639-2</a> alpha-2 or alpha-3 code, though no explicit checks are
          * carried out that it's a valid code in that namespace.
          *
          * Values are normalized to lower case.
          *
          * Note that we don't support BCP-47 "extlang" languages because they were
          * only ever used to substitute for a lack of 3 letter language codes.
          *
          * @throws IllformedLocaleException if the language was invalid.
          */
         public Builder setLanguage(String language) {
             this.language = normalizeAndValidateLanguage(language, true /* strict */);
             return this;
         }

         private static String normalizeAndValidateLanguage(String language, boolean strict) {
             if (language == null || language.isEmpty()) {
                 return "";
             }

             final String lowercaseLanguage = language.toLowerCase(Locale.ROOT);
             if (!isValidBcp47Alpha(lowercaseLanguage, 2, 3)) {
                 if (strict) {
                     throw new IllformedLocaleException("Invalid language: " + language);
                 } else {
                     return UNDETERMINED_LANGUAGE;
                 }
             }

             return lowercaseLanguage;
         }

         /**
          * Set the state of this builder to the parsed contents of the BCP-47 language
          * tag {@code languageTag}.
          *
          * This method is equivalent to a call to {@link #clear} if {@code languageTag}
          * is {@code null} or empty.
          *
          * <b>NOTE:</b> In contrast to {@link Locale#forLanguageTag(String)}, which
          * simply ignores malformed input, this method will throw an exception if
          * its input is malformed.
          *
          * @throws IllformedLocaleException if {@code languageTag} is not a well formed
          *         BCP-47 tag.
          */
         public Builder setLanguageTag(String languageTag) {
             if (languageTag == null || languageTag.isEmpty()) {
                 clear();
                 return this;
             }

             final Locale fromIcu = forLanguageTag(languageTag, true /* strict */);
             // When we ask ICU for strict parsing, it might return a null locale
             // if the language tag is malformed.
             if (fromIcu == null) {
                 throw new IllformedLocaleException("Invalid languageTag: " + languageTag);
             }

             setLocale(fromIcu);
             return this;
         }

         /**
          * Sets the locale region. If {@code region} is {@code null} or empty, the
          * previous value is cleared.
          *
          * As per BCP-47, the region must either be a 2 character ISO-3166-1 code
          * (each character in the range [a-zA-Z]) OR a 3 digit UN M.49 code.
          *
          * Values are normalized to upper case.
          *
          * @throws IllformedLocaleException if {@code} region is invalid.
          */
         public Builder setRegion(String region) {
             this.region = normalizeAndValidateRegion(region, true /* strict */);
             return this;
         }

         private static String normalizeAndValidateRegion(String region, boolean strict) {
             if (region == null || region.isEmpty()) {
                 return "";
             }

             final String uppercaseRegion = region.toUpperCase(Locale.ROOT);
             if (!isValidBcp47Alpha(uppercaseRegion, 2, 2) &&
                     !isUnM49AreaCode(uppercaseRegion)) {
                 if (strict) {
                     throw new IllformedLocaleException("Invalid region: " + region);
                 } else {
                     return "";
                 }
             }

             return uppercaseRegion;
         }

         /**
          * Sets the locale variant. If {@code variant} is {@code null} or empty,
          * the previous value is cleared.
          *
          * The input string my consist of one or more variants separated by
          * valid separators ('-' or '_').
          *
          * As per BCP-47, each variant must be between 5 and 8 alphanumeric characters
          * in length (each character in the range {@code [a-zA-Z0-9]}) but
          * can be exactly 4 characters in length if the first character is a digit.
          *
          * Note that this is a much stricter interpretation of {@code variant}
          * than the public {@code Locale} constructors. The latter allowed free form
          * variants.
          *
          * Variants are case sensitive and all separators are normalized to {@code '_'}.
          *
          * @throws IllformedLocaleException if {@code} variant is invalid.
          */
         public Builder setVariant(String variant) {
             this.variant = normalizeAndValidateVariant(variant);
             return this;
         }

         private static String normalizeAndValidateVariant(String variant) {
             if (variant == null || variant.isEmpty()) {
                 return "";
             }

             // Note that unlike extensions, we canonicalize to lower case alphabets
             // and underscores instead of hyphens.
             final String normalizedVariant = variant.replace('-', '_');
             String[] subTags = normalizedVariant.split("_");

             for (String subTag : subTags) {
                 if (!isValidVariantSubtag(subTag)) {
                     throw new IllformedLocaleException("Invalid variant: " + variant);
                 }
             }

             return normalizedVariant;
         }

         private static boolean isValidVariantSubtag(String subTag) {
             // The BCP-47 spec states that :
             // - Subtags can be between [5, 8] alphanumeric chars in length.
             // - Subtags that start with a number are allowed to be 4 chars in length.
             if (subTag.length() >= 5 && subTag.length() <= 8) {
                 if (isAsciiAlphaNum(subTag)) {
                     return true;
                 }
             } else if (subTag.length() == 4) {
                 final char firstChar = subTag.charAt(0);
                 if ((firstChar >= '0' && firstChar <= '9') && isAsciiAlphaNum(subTag)) {
                     return true;
                 }
             }

             return false;
         }

         /**
          * Sets the locale script. If {@code script} is {@code null} or empty,
          * the previous value is cleared.
          *
          * As per BCP-47, the script must be 4 characters in length, and
          * each character in the range {@code [a-zA-Z]}.
          *
          * A script usually represents a valid ISO 15924 script code, though no
          * other registry or validity checks are performed.
          *
          * Scripts are normalized to title cased values.
          *
          * @throws IllformedLocaleException if {@code script} is invalid.
          */
         public Builder setScript(String script) {
             this.script = normalizeAndValidateScript(script, true /* strict */);
             return this;
         }

         private static String normalizeAndValidateScript(String script, boolean strict) {
             if (script == null || script.isEmpty()) {
                 return "";
             }

             if (!isValidBcp47Alpha(script, 4, 4)) {
                 if (strict) {
                     throw new IllformedLocaleException("Invalid script: " + script);
                 } else {
                     return "";
                 }
             }

             return titleCaseAsciiWord(script);
         }

         /**
          * Sets the state of the builder to the {@link Locale} represented by
          * {@code locale}.
          *
          * Note that the locale's language, region and variant are validated as per
          * the rules specified in {@link #setLanguage}, {@link #setRegion} and
          * {@link #setVariant}.
          *
          * All existing builder state is discarded.
          *
          * @throws IllformedLocaleException if {@code locale} is invalid.
          * @throws NullPointerException if {@code locale} is null.
          */
         public Builder setLocale(Locale locale) {
             if (locale == null) {
                 throw new NullPointerException("locale == null");
             }

             // Make copies of the existing values so that we don't partially
             // update the state if we encounter an error.
             final String backupLanguage = language;
             final String backupRegion = region;
             final String backupVariant = variant;

             try {
                 setLanguage(locale.getLanguage());
                 setRegion(locale.getCountry());
                 setVariant(locale.getVariant());
             } catch (IllformedLocaleException ifle) {
                 language = backupLanguage;
                 region = backupRegion;
                 variant = backupVariant;

                 throw ifle;
             }

             // The following values can be set only via the builder class, so
             // there's no need to normalize them or check their validity.

             this.script = locale.getScript();

             extensions.clear();
             extensions.putAll(locale.extensions);

             keywords.clear();
             keywords.putAll(locale.unicodeKeywords);

             attributes.clear();
             attributes.addAll(locale.unicodeAttributes);

             return this;
         }

         /**
          * Adds the specified attribute to the list of attributes in the unicode
          * locale extension.
          *
          * Attributes must be between 3 and 8 characters in length, and each character
          * must be in the range {@code [a-zA-Z0-9]}.
          *
          * Attributes are normalized to lower case values. All added attributes and
          * keywords are combined to form a complete unicode locale extension on
          * {@link Locale} objects built by this builder, and accessible via
          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
          * key.
          *
          * @throws IllformedLocaleException if {@code attribute} is invalid.
          * @throws NullPointerException if {@code attribute} is null.
          */
         public Builder addUnicodeLocaleAttribute(String attribute) {
             if (attribute == null) {
                 throw new NullPointerException("attribute == null");
             }

             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
             }

             attributes.add(lowercaseAttribute);

             return this;
         }

         /**
          * Removes an attribute from the list of attributes in the unicode locale
          * extension.
          *
          * {@code attribute} must be valid as per the rules specified in
          * {@link #addUnicodeLocaleAttribute}.
          *
          * This method has no effect if {@code attribute} hasn't already been
          * added.
          *
          * @throws IllformedLocaleException if {@code attribute} is invalid.
          * @throws NullPointerException if {@code attribute} is null.
          */
         public Builder removeUnicodeLocaleAttribute(String attribute) {
             if (attribute == null) {
                 throw new NullPointerException("attribute == null");
             }

             // Weirdly, remove is specified to check whether the attribute
             // is valid, so we have to perform the full alphanumeric check here.
             final String lowercaseAttribute = attribute.toLowerCase(Locale.ROOT);
             if (!isValidBcp47Alphanum(lowercaseAttribute, 3, 8)) {
                 throw new IllformedLocaleException("Invalid locale attribute: " + attribute);
             }

             attributes.remove(attribute);
             return this;
         }

         /**
          * Sets the extension identified by {@code key} to {@code value}.
          *
          * {@code key} must be in the range {@code [a-zA-Z0-9]}.
          *
          * If {@code value} is {@code null} or empty, the extension is removed.
          *
          * In the general case, {@code value} must be a series of subtags separated
          * by ({@code "-"} or {@code "_"}). Each subtag must be between
          * 2 and 8 characters in length, and each character in the subtag must be in
          * the range {@code [a-zA-Z0-9]}.
          *
          * <p>
          * There are two special cases :
          * <li>
          *     <ul>
          *         The unicode locale extension
          *         ({@code key == 'u'}, {@link Locale#UNICODE_LOCALE_EXTENSION}) : Setting
          *         the unicode locale extension results in all existing keyword and attribute
          *         state being replaced by the parsed result of {@code value}. For example,
          *         {@code  builder.setExtension('u', "baaaz-baaar-fo-baar-ba-baaz")}
          *         is equivalent to:
          *         <pre>
          *             builder.addUnicodeLocaleAttribute("baaaz");
          *             builder.addUnicodeLocaleAttribute("baaar");
          *             builder.setUnicodeLocaleKeyword("fo", "baar");
          *             builder.setUnicodeLocaleKeyword("ba", "baaa");
          *         </pre>
          *     </ul>
          *     <ul>
          *         The private use extension
          *         ({@code key == 'x'}, {@link Locale#PRIVATE_USE_EXTENSION}) : Each subtag in a
          *         private use extension can be between 1 and 8 characters in length (in contrast
          *         to a minimum length of 2 for all other extensions).
          *     </ul>
          * </li>
          *
          * @throws IllformedLocaleException if {@code value} is invalid.
          */
         public Builder setExtension(char key, String value) {
             if (value == null || value.isEmpty()) {
                 extensions.remove(key);
                 return this;
             }

             final String normalizedValue = value.toLowerCase(Locale.ROOT).replace('_', '-');
             final String[] subtags = normalizedValue.split("-");

             // Lengths for subtags in the private use extension should be [1, 8] chars.
             // For all other extensions, they should be [2, 8] chars.
             //
             // http://www.rfc-editor.org/rfc/bcp/bcp47.txt
             final int minimumLength = (key == PRIVATE_USE_EXTENSION) ? 1 : 2;
             for (String subtag : subtags) {
                 if (!isValidBcp47Alphanum(subtag, minimumLength, 8)) {
                     throw new IllformedLocaleException(
                             "Invalid private use extension : " + value);
                 }
             }

             // We need to take special action in the case of unicode extensions,
             // since we claim to understand their keywords and attributes.
             if (key == UNICODE_LOCALE_EXTENSION) {
                 // First clear existing attributes and keywords.
                 extensions.clear();
                 attributes.clear();

                 parseUnicodeExtension(subtags, keywords, attributes);
             } else {
                 extensions.put(key, normalizedValue);
             }

             return this;
         }

         /**
          * Clears all extensions from this builder. Note that this also implicitly
          * clears all state related to the unicode locale extension; all attributes
          * and keywords set by {@link #addUnicodeLocaleAttribute} and
          * {@link #setUnicodeLocaleKeyword} are cleared.
          */
         public Builder clearExtensions() {
             extensions.clear();
             attributes.clear();
             keywords.clear();
             return this;
         }

         /**
          * Adds a key / type pair to the list of unicode locale extension keys.
          *
          * {@code key} must be 2 characters in length, and each character must be
          * in the range {@code [a-zA-Z0-9]}.
          *
          * {#code type} can either be empty, or a series of one or more subtags
          * separated by a separator ({@code "-"} or {@code "_"}). Each subtag must
          * be between 3 and 8 characters in length and each character in the subtag
          * must be in the range {@code [a-zA-Z0-9]}.
          *
          * Note that the type is normalized to lower case, and all separators
          * are normalized to {@code "-"}. All added attributes and
          * keywords are combined to form a complete unicode locale extension on
          * {@link Locale} objects built by this builder, and accessible via
          * {@link Locale#getExtension(char)} with the {@link Locale#UNICODE_LOCALE_EXTENSION}
          * key.
          *
          * @throws IllformedLocaleException if {@code key} or {@code value} are
          *         invalid.
          */
         public Builder setUnicodeLocaleKeyword(String key, String type) {
             if (key == null) {
                 throw new NullPointerException("key == null");
             }

             if (type == null && keywords != null) {
                 keywords.remove(key);
                 return this;
             }

             final String lowerCaseKey = key.toLowerCase(Locale.ROOT);
             // The key must be exactly two alphanumeric characters.
             if (lowerCaseKey.length() != 2 || !isAsciiAlphaNum(lowerCaseKey)) {
                 throw new IllformedLocaleException("Invalid unicode locale keyword: " + key);
             }

             // The type can be one or more alphanumeric strings of length [3, 8] characters,
             // separated by a separator char, which is one of "_" or "-". Though the spec
             // doesn't require it, we normalize all "_" to "-" to make the rest of our
             // processing easier.
             final String lowerCaseType = type.toLowerCase(Locale.ROOT).replace("_", "-");
             if (!isValidTypeList(lowerCaseType)) {
                 throw new IllformedLocaleException("Invalid unicode locale type: " + type);
             }

             // Everything checks out fine, add the <key, type> mapping to the list.
             keywords.put(lowerCaseKey, lowerCaseType);

             return this;
         }

         /**
          * Clears all existing state from this builder.
          */
         public Builder clear() {
             clearExtensions();
             language = region = variant = script = "";

             return this;
         }

         /**
          * Constructs a locale from the existing state of the builder. Note that this
          * method is guaranteed to succeed since field validity checks are performed
          * at the point of setting them.
          */
         public Locale build() {
             // NOTE: We need to make a copy of attributes, keywords and extensions
             // because the RI allows this builder to reused.
             return new Locale(language, region, variant, script,
                     attributes, keywords, extensions,
                     true /* has validated fields */);
         }
     }

     /**
      * Returns a locale for a given BCP-47 language tag. This method is more
      * lenient than {@link Builder#setLanguageTag}. For a given language tag, parsing
      * will proceed up to the first malformed subtag. All subsequent tags are discarded.
      * Note that language tags use {@code -} rather than {@code _}, for example {@code en-US}.
      *
      * @throws NullPointerException if {@code languageTag} is {@code null}.
      *
      * @since 1.7
      */
     public static Locale forLanguageTag(String languageTag) {
         if (languageTag == null) {
             throw new NullPointerException("languageTag == null");
         }

         return forLanguageTag(languageTag, false /* strict */);
     }

     private transient String countryCode;
     private transient String languageCode;
     private transient String variantCode;
     private transient String scriptCode;

     /* Sorted, Unmodifiable */
     private transient Set<String> unicodeAttributes;
     /* Sorted, Unmodifiable */
     private transient Map<String, String> unicodeKeywords;
     /* Sorted, Unmodifiable */
     private transient Map<Character, String> extensions;

     /**
      * Whether this instance was constructed from a builder. We can make
      * stronger assumptions about the validity of Locale fields if this was
      * constructed by a builder.
      */
     private transient final boolean hasValidatedFields;

     private transient String cachedToStringResult;
     private transient String cachedLanguageTag;
     private transient String cachedIcuLocaleId;

     /**
      * There's a circular dependency between toLowerCase/toUpperCase and
      * Locale.US. Work around this by avoiding these methods when constructing
      * the built-in locales.
      */
     private Locale(boolean hasValidatedFields, String lowerCaseLanguageCode,
             String upperCaseCountryCode) {
         this.languageCode = lowerCaseLanguageCode;
         this.countryCode = upperCaseCountryCode;
         this.variantCode = "";
         this.scriptCode = "";

         this.unicodeAttributes = Collections.EMPTY_SET;
         this.unicodeKeywords = Collections.EMPTY_MAP;
         this.extensions = Collections.EMPTY_MAP;

         this.hasValidatedFields = hasValidatedFields;
     }

     /**
      * Constructs a new {@code Locale} using the specified language.
      */
     public Locale(String language) {
         this(language, "", "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
                 Collections.EMPTY_MAP, false /* has validated fields */);
     }

     /**
      * Constructs a new {@code Locale} using the specified language and country codes.
      */
     public Locale(String language, String country) {
         this(language, country, "", "", Collections.EMPTY_SET, Collections.EMPTY_MAP,
                 Collections.EMPTY_MAP, false /* has validated fields */);
     }

     /**
      * Required by libcore.icu.ICU.
      *
      * @hide
      */
     public Locale(String language, String country, String variant, String scriptCode,
             /* nonnull */ Set<String> unicodeAttributes,
             /* nonnull */ Map<String, String> unicodeKeywords,
             /* nonnull */ Map<Character, String> extensions,
             boolean hasValidatedFields) {
         if (language == null || country == null || variant == null) {
             throw new NullPointerException("language=" + language +
                     ",country=" + country +
                     ",variant=" + variant);
         }

         if (hasValidatedFields) {
             this.languageCode = adjustLanguageCode(language);
             this.countryCode = country;
             this.variantCode = variant;
         } else {
             if (language.isEmpty() && country.isEmpty()) {
                 languageCode = "";
                 countryCode = "";
                 variantCode = variant;
             } else {
                 languageCode = adjustLanguageCode(language);
                 countryCode = country.toUpperCase(Locale.US);
                 variantCode = variant;
             }
         }

         this.scriptCode = scriptCode;

         if (hasValidatedFields) {
             Set<String> attribsCopy = new TreeSet<String>(unicodeAttributes);
             Map<String, String> keywordsCopy = new TreeMap<String, String>(unicodeKeywords);
             Map<Character, String> extensionsCopy = new TreeMap<Character, String>(extensions);

             // We need to transform the list of attributes & keywords set on the
             // builder to a unicode locale extension. i.e, if we have any keywords
             // or attributes set, Locale#getExtension('u') should return a well
             // formed extension.
             addUnicodeExtensionToExtensionsMap(attribsCopy, keywordsCopy, extensionsCopy);

             this.unicodeAttributes = Collections.unmodifiableSet(attribsCopy);
             this.unicodeKeywords = Collections.unmodifiableMap(keywordsCopy);
             this.extensions = Collections.unmodifiableMap(extensionsCopy);
         } else {
             this.unicodeAttributes = unicodeAttributes;
             this.unicodeKeywords = unicodeKeywords;
             this.extensions = extensions;
         }

         this.hasValidatedFields = hasValidatedFields;
     }

     /**
      * Constructs a new {@code Locale} using the specified language, country,
      * and variant codes.
      */
     public Locale(String language, String country, String variant) {
         this(language, country, variant, "", Collections.EMPTY_SET,
                 Collections.EMPTY_MAP, Collections.EMPTY_MAP,
                 false /* has validated fields */);
     }

     @Override public Object clone() {
         try {
             return super.clone();
         } catch (CloneNotSupportedException e) {
             throw new AssertionError(e);
         }
     }

     /**
      * Returns true if {@code object} is a locale with the same language,
      * country and variant.
      */
     @Override public boolean equals(Object object) {
         if (object == this) {
             return true;
         }
         if (object instanceof Locale) {
             Locale o = (Locale) object;
             return languageCode.equals(o.languageCode)
                     && countryCode.equals(o.countryCode)
                     && variantCode.equals(o.variantCode)
                     && scriptCode.equals(o.scriptCode)
                     && extensions.equals(o.extensions);

         }
         return false;
     }

     /**
      * Returns the system's installed locales. This array always includes {@code
      * Locale.US}, and usually several others. Most locale-sensitive classes
      * offer their own {@code getAvailableLocales} method, which should be
      * preferred over this general purpose method.
      *
      * @see java.text.BreakIterator#getAvailableLocales()
      * @see java.text.Collator#getAvailableLocales()
      * @see java.text.DateFormat#getAvailableLocales()
      * @see java.text.DateFormatSymbols#getAvailableLocales()
      * @see java.text.DecimalFormatSymbols#getAvailableLocales()
      * @see java.text.NumberFormat#getAvailableLocales()
      * @see java.util.Calendar#getAvailableLocales()
      */
     public static Locale[] getAvailableLocales() {
         return ICU.getAvailableLocales();
     }

     /**
      * Returns the country code for this locale, or {@code ""} if this locale
      * doesn't correspond to a specific country.
      */
     public String getCountry() {
         return countryCode;
     }

     /**
      * Returns the user's preferred locale. This may have been overridden for
      * this process with {@link #setDefault}.
      *
      * <p>Since the user's locale changes dynamically, avoid caching this value.
      * Instead, use this method to look it up for each use.
      */
     public static Locale getDefault() {
         return defaultLocale;
     }

     /**
      * Equivalent to {@code getDisplayCountry(Locale.getDefault())}.
      */
     public final String getDisplayCountry() {
         return getDisplayCountry(getDefault());
     }

     /**
      * Returns the name of this locale's country, localized to {@code locale}.
      * Returns the empty string if this locale does not correspond to a specific
      * country.
      */
     public String getDisplayCountry(Locale locale) {
         if (countryCode.isEmpty()) {
             return "";
         }

         final String normalizedRegion = Builder.normalizeAndValidateRegion(
                 countryCode, false /* strict */);
         if (normalizedRegion.isEmpty()) {
             return countryCode;
         }

         String result = ICU.getDisplayCountry(this, locale);
         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
             result = ICU.getDisplayCountry(this, Locale.getDefault());
         }
         return result;
     }

     /**
      * Equivalent to {@code getDisplayLanguage(Locale.getDefault())}.
      */
     public final String getDisplayLanguage() {
         return getDisplayLanguage(getDefault());
     }

     /**
      * Returns the name of this locale's language, localized to {@code locale}.
      * If the language name is unknown, the language code is returned.
      */
     public String getDisplayLanguage(Locale locale) {
         if (languageCode.isEmpty()) {
             return "";
         }

         // Hacks for backward compatibility.
         //
         // Our language tag will contain "und" if the languageCode is invalid
         // or missing. ICU will then return "langue indéterminée" or the equivalent
         // display language for the indeterminate language code.
         //
         // Sigh... ugh... and what not.
         final String normalizedLanguage = Builder.normalizeAndValidateLanguage(
                 languageCode, false /* strict */);
         if (UNDETERMINED_LANGUAGE.equals(normalizedLanguage)) {
             return languageCode;
         }

         // TODO: We need a new hack or a complete fix for http://b/8049507 --- We would
         // cover the frameworks' tracks when they were using "tl" instead of "fil".
         String result = ICU.getDisplayLanguage(this, locale);
         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
             result = ICU.getDisplayLanguage(this, Locale.getDefault());
         }
         return result;
     }

     /**
      * Equivalent to {@code getDisplayName(Locale.getDefault())}.
      */
     public final String getDisplayName() {
         return getDisplayName(getDefault());
     }

     /**
      * Returns this locale's language name, country name, and variant, localized
      * to {@code locale}. The exact output form depends on whether this locale
      * corresponds to a specific language, script, country and variant.
      *
      * <p>For example:
      * <ul>
      * <li>{@code new Locale("en").getDisplayName(Locale.US)} -> {@code English}
      * <li>{@code new Locale("en", "US").getDisplayName(Locale.US)} -> {@code English (United States)}
      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.US)} -> {@code English (United States,Computer)}
      * <li>{@code Locale.fromLanguageTag("zh-Hant-CN").getDisplayName(Locale.US)} -> {@code Chinese (Traditional Han,China)}
      * <li>{@code new Locale("en").getDisplayName(Locale.FRANCE)} -> {@code anglais}
      * <li>{@code new Locale("en", "US").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis)}
      * <li>{@code new Locale("en", "US", "POSIX").getDisplayName(Locale.FRANCE)} -> {@code anglais (États-Unis,informatique)}.
      * </ul>
      */
     public String getDisplayName(Locale locale) {
         int count = 0;
         StringBuilder buffer = new StringBuilder();
         if (!languageCode.isEmpty()) {
             String displayLanguage = getDisplayLanguage(locale);
             buffer.append(displayLanguage.isEmpty() ? languageCode : displayLanguage);
             ++count;
         }
         if (!scriptCode.isEmpty()) {
             if (count == 1) {
                 buffer.append(" (");
             }
             String displayScript = getDisplayScript(locale);
             buffer.append(displayScript.isEmpty() ? scriptCode : displayScript);
             ++count;
         }
         if (!countryCode.isEmpty()) {
             if (count == 1) {
                 buffer.append(" (");
             } else if (count == 2) {
                 buffer.append(",");
             }
             String displayCountry = getDisplayCountry(locale);
             buffer.append(displayCountry.isEmpty() ? countryCode : displayCountry);
             ++count;
         }
         if (!variantCode.isEmpty()) {
             if (count == 1) {
                 buffer.append(" (");
             } else if (count == 2 || count == 3) {
                 buffer.append(",");
             }
             String displayVariant = getDisplayVariant(locale);
             buffer.append(displayVariant.isEmpty() ? variantCode : displayVariant);
             ++count;
         }
         if (count > 1) {
             buffer.append(")");
         }
         return buffer.toString();
     }

     /**
      * Returns the full variant name in the default {@code Locale} for the variant code of
      * this {@code Locale}. If there is no matching variant name, the variant code is
      * returned.
      *
      * @since 1.7
      */
     public final String getDisplayVariant() {
         return getDisplayVariant(getDefault());
     }

     /**
      * Returns the full variant name in the specified {@code Locale} for the variant code
      * of this {@code Locale}. If there is no matching variant name, the variant code is
      * returned.
      *
      * @since 1.7
      */
     public String getDisplayVariant(Locale locale) {
         if (variantCode.isEmpty()) {
             return "";
         }

         try {
             Builder.normalizeAndValidateVariant(variantCode);
         } catch (IllformedLocaleException ilfe) {
             return variantCode;
         }

         String result = ICU.getDisplayVariant(this, locale);
         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
             result = ICU.getDisplayVariant(this, Locale.getDefault());
         }

         // The "old style" locale constructors allow us to pass in variants that aren't
         // valid BCP-47 variant subtags. When that happens, toLanguageTag will not emit
         // them. Note that we know variantCode.length() > 0 due to the isEmpty check at
         // the beginning of this function.
         if (result.isEmpty()) {
             return variantCode;
         }
         return result;
     }

     /**
      * Returns the three-letter ISO 3166 country code which corresponds to the country
      * code for this {@code Locale}.
      * @throws MissingResourceException if there's no 3-letter country code for this locale.
      */
     public String getISO3Country() {
         // The results of getISO3Country do not depend on the languageCode,
         // so we pass an arbitrarily selected language code here. This guards
         // against errors caused by malformed or invalid language codes.
         String code = ICU.getISO3Country("en-" + countryCode);
         if (!countryCode.isEmpty() && code.isEmpty()) {
             throw new MissingResourceException("No 3-letter country code for locale: " + this, "FormatData_" + this, "ShortCountry");
         }
         return code;
     }

     /**
      * Returns the three-letter ISO 639-2/T language code which corresponds to the language
      * code for this {@code Locale}.
      * @throws MissingResourceException if there's no 3-letter language code for this locale.
      */
     public String getISO3Language() {
         // For backward compatibility, we must return "" for an empty language
         // code and not "und" which is the accurate ISO-639-3 code for an
         // undetermined language.
         if (languageCode.isEmpty()) {
             return "";
         }

         // The results of getISO3Language do not depend on the country code
         // or any of the other locale fields, so we pass just the language here.
         String code = ICU.getISO3Language(languageCode);
         if (!languageCode.isEmpty() && code.isEmpty()) {
             throw new MissingResourceException("No 3-letter language code for locale: " + this, "FormatData_" + this, "ShortLanguage");
         }
         return code;
     }

     /**
      * Returns an array of strings containing all the two-letter ISO 3166 country codes that can be
      * used as the country code when constructing a {@code Locale}.
      */
     public static String[] getISOCountries() {
         return ICU.getISOCountries();
     }

     /**
      * Returns an array of strings containing all the two-letter ISO 639-1 language codes that can be
      * used as the language code when constructing a {@code Locale}.
      */
     public static String[] getISOLanguages() {
         return ICU.getISOLanguages();
     }

     /**
      * Returns the language code for this {@code Locale} or the empty string if no language
      * was set.
      */
     public String getLanguage() {
         return languageCode;
     }

     /**
      * Returns the variant code for this {@code Locale} or an empty {@code String} if no variant
      * was set.
      */
     public String getVariant() {
         return variantCode;
     }

     /**
      * Returns the script code for this {@code Locale} or an empty {@code String} if no script
      * was set.
      *
      * If set, the script code will be a title cased string of length 4, as per the ISO 15924
      * specification.
      *
      * @since 1.7
      */
     public String getScript() {
         return scriptCode;
     }

     /**
      * Equivalent to {@code getDisplayScript(Locale.getDefault()))}
      *
      * @since 1.7
      */
     public String getDisplayScript() {
         return getDisplayScript(getDefault());
     }

     /**
      * Returns the name of this locale's script code, localized to {@link Locale}. If the
      * script code is unknown, the return value of this method is the same as that of
      * {@link #getScript()}.
      *
      * @since 1.7
      */
     public String getDisplayScript(Locale locale) {
         if (scriptCode.isEmpty()) {
             return "";
         }

         String result = ICU.getDisplayScript(this, locale);
         if (result == null) { // TODO: do we need to do this, or does ICU do it for us?
             result = ICU.getDisplayScript(this, Locale.getDefault());
         }

         return result;

     }

     /**
      * Returns a well formed BCP-47 language tag that identifies this locale.
      *
      * Note that this locale itself might consist of ill formed fields, since the
      * public {@code Locale} constructors do not perform validity checks to maintain
      * backwards compatibility. When this is the case, this method will either replace
      * ill formed fields with standard BCP-47 subtags (For eg. "und" (undetermined)
      * for invalid languages) or omit them altogether.
      *
      * Additionally, ill formed variants will result in the remainder of the tag
      * (both variants and extensions) being moved to the private use extension,
      * where they will appear after a subtag whose value is {@code "lvariant"}.
      *
      * It's also important to note that the BCP-47 tag is well formed in the sense
      * that it is unambiguously parseable into its specified components. We do not
      * require that any of the components are registered with the applicable registries.
      * For example, we do not require scripts to be a registered ISO 15924 scripts or
      * languages to appear in the ISO-639-2 code list.
      *
      * @since 1.7
      */
     public String toLanguageTag() {
         if (cachedLanguageTag == null) {
             cachedLanguageTag = makeLanguageTag();
         }

         return cachedLanguageTag;
     }

     /**
      * Constructs a valid BCP-47 language tag from locale fields. Additional validation
      * is required when this Locale was not constructed using a Builder and variants
      * set this way are treated specially.
      *
      * In both cases, we convert empty language tags to "und", omit invalid country tags
      * and perform a special case conversion of "no-NO-NY" to "nn-NO".
      */
     private String makeLanguageTag() {
         // We only need to revalidate the language, country and variant because
         // the rest of the fields can only be set via the builder which validates
         // them anyway.
         String language = "";
         String region = "";
         String variant = "";
         String illFormedVariantSubtags = "";

         if (hasValidatedFields) {
             language = languageCode;
             region = countryCode;
             // Note that we are required to normalize hyphens to underscores
             // in the builder, but we must use hyphens in the BCP-47 language tag.
             variant = variantCode.replace('_', '-');
         } else {
             language = Builder.normalizeAndValidateLanguage(languageCode, false /* strict */);
             region = Builder.normalizeAndValidateRegion(countryCode, false /* strict */);

             try {
                 variant = Builder.normalizeAndValidateVariant(variantCode);
             } catch (IllformedLocaleException ilfe) {
                 // If our variant is ill formed, we must attempt to split it into
                 // its constituent subtags and preserve the well formed bits and
                 // move the rest to the private use extension (if they're well
                 // formed extension subtags).
                 String split[] = splitIllformedVariant(variantCode);

                 variant = split[0];
                 illFormedVariantSubtags = split[1];
             }
         }

         if (language.isEmpty()) {
             language = UNDETERMINED_LANGUAGE;
         }

         if ("no".equals(language) && "NO".equals(region) && "NY".equals(variant)) {
             language = "nn";
             region = "NO";
             variant = "";
         }

         final StringBuilder sb = new StringBuilder(16);
         sb.append(language);

         if (!scriptCode.isEmpty()) {
             sb.append('-');
             sb.append(scriptCode);
         }

         if (!region.isEmpty()) {
             sb.append('-');
             sb.append(region);
         }

         if (!variant.isEmpty()) {
             sb.append('-');
             sb.append(variant);
         }

         // Extensions (optional, omitted if empty). Note that we don't
         // emit the private use extension here, but add it in the end.
         for (Map.Entry<Character, String> extension : extensions.entrySet()) {
             if (!extension.getKey().equals('x')) {
                 sb.append('-').append(extension.getKey());
                 sb.append('-').append(extension.getValue());
             }
         }

         // The private use extension comes right at the very end.
         final String privateUse = extensions.get('x');
         if (privateUse != null) {
             sb.append("-x-");
             sb.append(privateUse);
         }

         // If we have any ill-formed variant subtags, we append them to the
         // private use extension (or add a private use extension if one doesn't
         // exist).
         if (!illFormedVariantSubtags.isEmpty()) {
             if (privateUse == null) {
                 sb.append("-x-lvariant-");
             } else {
                 sb.append('-');
             }
             sb.append(illFormedVariantSubtags);
         }

         return sb.toString();
     }

     /**
      * Splits ill formed variants into a set of valid variant subtags (which
      * can be used directly in language tag construction) and a set of invalid
      * variant subtags (which can be appended to the private use extension),
      * provided that each subtag is a valid private use extension subtag.
      *
      * This method returns a two element String array. The first element is a string
      * containing the concatenation of valid variant subtags which can be appended
      * to a BCP-47 tag directly and the second containing the concatenation of
      * invalid variant subtags which can be appended to the private use extension
      * directly.
      *
      * This method assumes that {@code variant} contains at least one ill formed
      * variant subtag.
      */
     private static String[] splitIllformedVariant(String variant) {
         final String normalizedVariant = variant.replace('_', '-');
         final String[] subTags = normalizedVariant.split("-");

         final String[] split = new String[] { "", "" };

         // First go through the list of variant subtags and check if they're
         // valid private use extension subtags. If they're not, we will omit
         // the first such subtag and all subtags after.
         //
         // NOTE: |firstInvalidSubtag| is the index of the first variant
         // subtag we decide to omit altogether, whereas |firstIllformedSubtag| is the
         // index of the first subtag we decide to append to the private use extension.
         //
         // In other words:
         // [0, firstIllformedSubtag) => expressed as variant subtags.
         // [firstIllformedSubtag, firstInvalidSubtag) => expressed as private use
         // extension subtags.
         // [firstInvalidSubtag, subTags.length) => omitted.
         int firstInvalidSubtag = subTags.length;
         for (int i = 0; i < subTags.length; ++i) {
             if (!isValidBcp47Alphanum(subTags[i], 1, 8)) {
                 firstInvalidSubtag = i;
                 break;
             }
         }

         if (firstInvalidSubtag == 0) {
             return split;
         }

         // We now consider each subtag that could potentially be appended to
         // the private use extension and check if it's valid.
         int firstIllformedSubtag = firstInvalidSubtag;
         for (int i = 0; i < firstInvalidSubtag; ++i) {
             final String subTag = subTags[i];
             // The BCP-47 spec states that :
             // - Subtags can be between [5, 8] alphanumeric chars in length.
             // - Subtags that start with a number are allowed to be 4 chars in length.
             if (subTag.length() >= 5 && subTag.length() <= 8) {
                 if (!isAsciiAlphaNum(subTag)) {
                     firstIllformedSubtag = i;
                 }
             } else if (subTag.length() == 4) {
                 final char firstChar = subTag.charAt(0);
                 if (!(firstChar >= '0' && firstChar <= '9') || !isAsciiAlphaNum(subTag)) {
                     firstIllformedSubtag = i;
                 }
             } else {
                 firstIllformedSubtag = i;
             }
         }

         split[0] = concatenateRange(subTags, 0, firstIllformedSubtag);
         split[1] = concatenateRange(subTags, firstIllformedSubtag, firstInvalidSubtag);

         return split;
     }

     /**
      * Builds a string by concatenating array elements within the range [start, end).
      * The supplied range is assumed to be valid and no checks are performed.
      */
     private static String concatenateRange(String[] array, int start, int end) {
         StringBuilder builder = new StringBuilder(32);
         for (int i = start; i < end; ++i) {
             if (i != start) {
                 builder.append('-');
             }
             builder.append(array[i]);
         }

         return builder.toString();
     }

     /**
      * Returns the set of BCP-47 extensions this locale contains.
      *
      * See <a href="https://tools.ietf.org/html/bcp47#section-2.1">
      *     the IETF BCP-47 specification</a> (Section 2.2.6) for details.
      *
      * @since 1.7
      */
     public Set<Character> getExtensionKeys() {
         return extensions.keySet();
     }

     /**
      * Returns the BCP-47 extension whose key is {@code extensionKey}, or {@code null}
      * if this locale does not contain the extension.
      *
      * Individual Keywords and attributes for the unicode
      * locale extension can be fetched using {@link #getUnicodeLocaleAttributes()},
      * {@link #getUnicodeLocaleKeys()}  and {@link #getUnicodeLocaleType}.
      *
      * @since 1.7
      */
     public String getExtension(char extensionKey) {
         return extensions.get(extensionKey);
     }

     /**
      * Returns the {@code type} for the specified unicode locale extension {@code key}.
      *
      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
      *
      * @since 1.7
      */
     public String getUnicodeLocaleType(String keyWord) {
         return unicodeKeywords.get(keyWord);
     }

     /**
      * Returns the set of unicode locale extension attributes this locale contains.
      *
      * For more information about attributes, see {@link Builder#addUnicodeLocaleAttribute}
      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
      *
      * @since 1.7
      */
     public Set<String> getUnicodeLocaleAttributes() {
         return unicodeAttributes;
     }

     /**
      * Returns the set of unicode locale extension keywords this locale contains.
      *
      * For more information about types and keywords, see {@link Builder#setUnicodeLocaleKeyword}
      * and <a href="http://www.unicode.org/reports/tr35/#BCP47">Unicode Technical Standard #35</a>
      *
      * @since 1.7
      */
     public Set<String> getUnicodeLocaleKeys() {
         return unicodeKeywords.keySet();
     }

     @Override
     public synchronized int hashCode() {
         return countryCode.hashCode()
                 + languageCode.hashCode() + variantCode.hashCode()
                 + scriptCode.hashCode() + extensions.hashCode();
     }

     /**
      * Overrides the default locale. This does not affect system configuration,
      * and attempts to override the system-provided default locale may
      * themselves be overridden by actual changes to the system configuration.
      * Code that calls this method is usually incorrect, and should be fixed by
      * passing the appropriate locale to each locale-sensitive method that's
      * called.
      */
     public synchronized static void setDefault(Locale locale) {
         if (locale == null) {
             throw new NullPointerException("locale == null");
         }
         String languageTag = locale.toLanguageTag();
         defaultLocale = locale;
         ICU.setDefaultLocale(languageTag);
     }

     /**
      * Returns the string representation of this {@code Locale}. It consists of the
      * language code, country code and variant separated by underscores.
      * If the language is missing the string begins
      * with an underscore. If the country is missing there are 2 underscores
      * between the language and the variant. The variant cannot stand alone
      * without a language and/or country code: in this case this method would
      * return the empty string.
      *
      * <p>Examples: "en", "en_US", "_US", "en__POSIX", "en_US_POSIX"
      */
     @Override
     public final String toString() {
         String result = cachedToStringResult;
         if (result == null) {
             result = cachedToStringResult = toNewString(languageCode, countryCode, variantCode,
                                                         scriptCode, extensions);
         }
         return result;
     }

     private static String toNewString(String languageCode, String countryCode,
             String variantCode, String scriptCode, Map<Character, String> extensions) {
         // The string form of a locale that only has a variant is the empty string.
         if (languageCode.length() == 0 && countryCode.length() == 0) {
             return "";
         }

         // Otherwise, the output format is "ll_cc_variant", where language and country are always
         // two letters, but the variant is an arbitrary length. A size of 11 characters has room
         // for "en_US_POSIX", the largest "common" value. (In practice, the string form is almost
         // always 5 characters: "ll_cc".)
         StringBuilder result = new StringBuilder(11);
         result.append(languageCode);

         final boolean hasScriptOrExtensions = !scriptCode.isEmpty() || !extensions.isEmpty();

         if (!countryCode.isEmpty() || !variantCode.isEmpty() || hasScriptOrExtensions) {
             result.append('_');
         }
         result.append(countryCode);
         if (!variantCode.isEmpty() || hasScriptOrExtensions) {
             result.append('_');
         }
         result.append(variantCode);

         if (hasScriptOrExtensions) {
             if (!variantCode.isEmpty()) {
                 result.append('_');
             }

             // Note that this is notably different from the BCP-47 spec (for
             // backwards compatibility). We are forced to append a "#" before the script tag.
             // and also put the script code right at the end.
             result.append("#");
             if (!scriptCode.isEmpty() ) {
                 result.append(scriptCode);
             }

             // Note the use of "-" instead of "_" before the extensions.
             if (!extensions.isEmpty()) {
                 if (!scriptCode.isEmpty()) {
                     result.append('-');
                 }
                 result.append(serializeExtensions(extensions));
             }
         }

         return result.toString();
     }

     private static final ObjectStreamField[] serialPersistentFields = {
         new ObjectStreamField("country", String.class),
         new ObjectStreamField("hashcode", int.class),
         new ObjectStreamField("language", String.class),
         new ObjectStreamField("variant", String.class),
         new ObjectStreamField("script", String.class),
         new ObjectStreamField("extensions", String.class),
     };

     private void writeObject(ObjectOutputStream stream) throws IOException {
         ObjectOutputStream.PutField fields = stream.putFields();
         fields.put("country", countryCode);
         fields.put("hashcode", -1);
         fields.put("language", languageCode);
         fields.put("variant", variantCode);
         fields.put("script", scriptCode);

         if (!extensions.isEmpty()) {
             fields.put("extensions", serializeExtensions(extensions));
         }

         stream.writeFields();
     }

     private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
         ObjectInputStream.GetField fields = stream.readFields();
         countryCode = (String) fields.get("country", "");
         languageCode = (String) fields.get("language", "");
         variantCode = (String) fields.get("variant", "");
         scriptCode = (String) fields.get("script", "");

         this.unicodeKeywords = Collections.EMPTY_MAP;
         this.unicodeAttributes = Collections.EMPTY_SET;
         this.extensions = Collections.EMPTY_MAP;

         String extensions = (String) fields.get("extensions", null);
         if (extensions != null) {
             readExtensions(extensions);
         }
     }

     private void readExtensions(String extensions) {
         Map<Character, String> extensionsMap = new TreeMap<Character, String>();
         parseSerializedExtensions(extensions, extensionsMap);
         this.extensions = Collections.unmodifiableMap(extensionsMap);

         if (extensionsMap.containsKey(UNICODE_LOCALE_EXTENSION)) {
             String unicodeExtension = extensionsMap.get(UNICODE_LOCALE_EXTENSION);
             String[] subTags = unicodeExtension.split("-");

             Map<String, String> unicodeKeywords = new TreeMap<String, String>();
             Set<String> unicodeAttributes = new TreeSet<String>();
             parseUnicodeExtension(subTags, unicodeKeywords, unicodeAttributes);

             this.unicodeKeywords = Collections.unmodifiableMap(unicodeKeywords);
             this.unicodeAttributes = Collections.unmodifiableSet(unicodeAttributes);
         }
     }

     /**
      * The serialized form for extensions is straightforward. It's simply
      * of the form key1-value1-key2-value2 where each value might in turn contain
      * multiple subtags separated by hyphens. Each key is guaranteed to be a single
      * character in length.
      *
      * This method assumes that {@code extensionsMap} is non-empty.
      *
      * Visible for testing.
      *
      * @hide
      */
     public static String serializeExtensions(Map<Character, String> extensionsMap) {
         Iterator<Map.Entry<Character, String>> entryIterator = extensionsMap.entrySet().iterator();
         StringBuilder sb = new StringBuilder(64);

         while (true) {
             final Map.Entry<Character, String> entry = entryIterator.next();
             sb.append(entry.getKey());
             sb.append('-');
             sb.append(entry.getValue());

             if (entryIterator.hasNext()) {
                 sb.append('-');
             } else {
                 break;
             }
         }

         return sb.toString();
     }

     /**
      * Visible for testing.
      *
      * @hide
      */
     public static void parseSerializedExtensions(String extString, Map<Character, String> outputMap) {
         // This probably isn't the most efficient approach, but it's the
         // most straightforward to code.
         //
         // Start by splitting the string on "-". We will then keep track of
         // where each of the extension keys (single characters) appear in the
         // original string and then use those indices to construct substrings
         // representing the values.
         final String[] subTags = extString.split("-");
         final int[] typeStartIndices = new int[subTags.length / 2];

         int length = 0;
         int count = 0;
         for (String subTag : subTags) {
             if (subTag.length() > 0) {
                 // Account for the length of the "-" at the end of each subtag.
                 length += (subTag.length() + 1);
             }

             if (subTag.length() == 1) {
                 typeStartIndices[count++] = length;
             }
         }

         for (int i = 0; i < count; ++i) {
             final int valueStart = typeStartIndices[i];
             // Since the start Index points to the beginning of the next type
             // ....prev-k-next.....
             //            |_ here
             // (idx - 2) is the index of the next key
             // (idx - 3) is the (non inclusive) end of the previous type.
             final int valueEnd = (i == (count - 1)) ?
                     extString.length() : (typeStartIndices[i + 1] - 3);

             outputMap.put(extString.charAt(typeStartIndices[i] - 2),
                     extString.substring(valueStart, valueEnd));
         }
     }


     /**
      * A UN M.49 is a 3 digit numeric code.
      */
     private static boolean isUnM49AreaCode(String code) {
         if (code.length() != 3) {
             return false;
         }

         for (int i = 0; i < 3; ++i) {
             final char character = code.charAt(i);
             if (!(character >= '0' && character <= '9')) {
                 return false;
             }
         }

         return true;
     }

     /*
      * Checks whether a given string is an ASCII alphanumeric string.
      */
     private static boolean isAsciiAlphaNum(String string) {
         for (int i = 0; i < string.length(); i++) {
             final char character = string.charAt(i);
             if (!(character >= 'a' && character <= 'z' ||
                     character >= 'A' && character <= 'Z' ||
                     character >= '0' && character <= '9')) {
                 return false;
             }
         }

         return true;
     }

     private static boolean isValidBcp47Alpha(String string, int lowerBound, int upperBound) {
         final int length = string.length();
         if (length < lowerBound || length > upperBound) {
             return false;
         }

         for (int i = 0; i < length; ++i) {
             final char character = string.charAt(i);
             if (!(character >= 'a' && character <= 'z' ||
                     character >= 'A' && character <= 'Z')) {
                 return false;
             }
         }

         return true;
     }

     private static boolean isValidBcp47Alphanum(String attributeOrType,
             int lowerBound, int upperBound) {
         if (attributeOrType.length() < lowerBound || attributeOrType.length() > upperBound) {
             return false;
         }

         return isAsciiAlphaNum(attributeOrType);
     }

     private static String titleCaseAsciiWord(String word) {
         try {
             byte[] chars = word.toLowerCase(Locale.ROOT).getBytes(StandardCharsets.US_ASCII);
             chars[0] = (byte) ((int) chars[0] + 'A' - 'a');
             return new String(chars, StandardCharsets.US_ASCII);
         } catch (UnsupportedOperationException uoe) {
             throw new AssertionError(uoe);
         }
     }

     /**
      * A type list must contain one or more alphanumeric subtags whose lengths
      * are between 3 and 8.
      */
     private static boolean isValidTypeList(String lowerCaseTypeList) {
         final String[] splitList = lowerCaseTypeList.split("-");
         for (String type : splitList) {
             if (!isValidBcp47Alphanum(type, 3, 8)) {
                 return false;
             }
         }

         return true;
     }

     private static void addUnicodeExtensionToExtensionsMap(
             Set<String> attributes, Map<String, String> keywords,
             Map<Character, String> extensions) {
         if (attributes.isEmpty() && keywords.isEmpty()) {
             return;
         }

         // Assume that the common case is a low number of keywords & attributes
         // (usually one or two).
         final StringBuilder sb = new StringBuilder(32);

         // All attributes must appear before keywords, in lexical order.
         if (!attributes.isEmpty()) {
             Iterator<String> attributesIterator = attributes.iterator();
             while (true) {
                 sb.append(attributesIterator.next());
                 if (attributesIterator.hasNext()) {
                     sb.append('-');
                 } else {
                     break;
                 }
             }
         }

         if (!keywords.isEmpty()) {
             if (!attributes.isEmpty()) {
                 sb.append('-');
             }

             Iterator<Map.Entry<String, String>> keywordsIterator = keywords.entrySet().iterator();
             while (true) {
                 final Map.Entry<String, String> keyWord = keywordsIterator.next();
                 sb.append(keyWord.getKey());
                 if (!keyWord.getValue().isEmpty()) {
                     sb.append('-');
                     sb.append(keyWord.getValue());
                 }
                 if (keywordsIterator.hasNext()) {
                     sb.append('-');
                 } else {
                     break;
                 }
             }
         }

         extensions.put(UNICODE_LOCALE_EXTENSION, sb.toString());
     }

     /**
      * This extension is described by http://www.unicode.org/reports/tr35/#RFC5234
      * unicode_locale_extensions = sep "u" (1*(sep keyword) / 1*(sep attribute) *(sep keyword)).
      *
      * It must contain at least one keyword or attribute and attributes (if any)
      * must appear before keywords. Attributes can't appear after keywords because
      * they will be indistinguishable from a subtag of the keyword type.
      *
      * Visible for testing.
      *
      * @hide
      */
     public static void parseUnicodeExtension(String[] subtags,
             Map<String, String> keywords, Set<String> attributes)  {
         String lastKeyword = null;
         List<String> subtagsForKeyword = new ArrayList<String>();
         for (String subtag : subtags) {
             if (subtag.length() == 2) {
                 if (subtagsForKeyword.size() > 0) {
                     keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
                     subtagsForKeyword.clear();
                 }

                 lastKeyword = subtag;
             } else if (subtag.length() > 2) {
                 if (lastKeyword == null) {
                     attributes.add(subtag);
                 } else {
                     subtagsForKeyword.add(subtag);
                 }
             }
         }

         if (subtagsForKeyword.size() > 0) {
             keywords.put(lastKeyword, joinBcp47Subtags(subtagsForKeyword));
         } else if (lastKeyword != null) {
             keywords.put(lastKeyword, "");
         }
     }

     /**
      * Joins a list of subtags into a BCP-47 tag using the standard separator
      * ("-").
      */
     private static String joinBcp47Subtags(List<String> strings) {
         final int size = strings.size();

         StringBuilder sb = new StringBuilder(strings.get(0).length());
         for (int i = 0; i < size; ++i) {
             sb.append(strings.get(i));
             if (i != size - 1) {
                 sb.append('-');
             }
         }

         return sb.toString();
     }

     /**
      * @hide for internal use only.
      */
     public static String adjustLanguageCode(String languageCode) {
         String adjusted = languageCode.toLowerCase(Locale.US);
         // Map new language codes to the obsolete language
         // codes so the correct resource bundles will be used.
         if (languageCode.equals("he")) {
             adjusted = "iw";
         } else if (languageCode.equals("id")) {
             adjusted = "in";
         } else if (languageCode.equals("yi")) {
             adjusted = "ji";
         }

         return adjusted;
     }

     /**
      * Map of grandfathered language tags to their modern replacements.
      */
     private static final TreeMap<String, String> GRANDFATHERED_LOCALES;

     static {
         GRANDFATHERED_LOCALES = new TreeMap<String, String>(String.CASE_INSENSITIVE_ORDER);

         // From http://tools.ietf.org/html/bcp47
         //
         // grandfathered = irregular           ; non-redundant tags registered
         //               / regular             ; during the RFC 3066 era
         //  irregular =
         GRANDFATHERED_LOCALES.put("en-GB-oed", "en-GB-x-oed");
         GRANDFATHERED_LOCALES.put("i-ami", "ami");
         GRANDFATHERED_LOCALES.put("i-bnn", "bnn");
         GRANDFATHERED_LOCALES.put("i-default", "en-x-i-default");
         GRANDFATHERED_LOCALES.put("i-enochian", "und-x-i-enochian");
         GRANDFATHERED_LOCALES.put("i-hak", "hak");
         GRANDFATHERED_LOCALES.put("i-klingon", "tlh");
         GRANDFATHERED_LOCALES.put("i-lux", "lb");
         GRANDFATHERED_LOCALES.put("i-mingo", "see-x-i-mingo");
         GRANDFATHERED_LOCALES.put("i-navajo", "nv");
         GRANDFATHERED_LOCALES.put("i-pwn", "pwn");
         GRANDFATHERED_LOCALES.put("i-tao", "tao");
         GRANDFATHERED_LOCALES.put("i-tay", "tay");
         GRANDFATHERED_LOCALES.put("i-tsu", "tsu");
         GRANDFATHERED_LOCALES.put("sgn-BE-FR", "sfb");
         GRANDFATHERED_LOCALES.put("sgn-BE-NL", "vgt");
         GRANDFATHERED_LOCALES.put("sgn-CH-DE", "sgg");

         // regular =
         GRANDFATHERED_LOCALES.put("art-lojban", "jbo");
         GRANDFATHERED_LOCALES.put("cel-gaulish", "xtg-x-cel-gaulish");
         GRANDFATHERED_LOCALES.put("no-bok", "nb");
         GRANDFATHERED_LOCALES.put("no-nyn", "nn");
         GRANDFATHERED_LOCALES.put("zh-guoyu", "cmn");
         GRANDFATHERED_LOCALES.put("zh-hakka", "hak");
         GRANDFATHERED_LOCALES.put("zh-min", "nan-x-zh-min");
         GRANDFATHERED_LOCALES.put("zh-min-nan", "nan");
         GRANDFATHERED_LOCALES.put("zh-xiang", "hsn");
     }

     private static String convertGrandfatheredTag(String original) {
         final String converted = GRANDFATHERED_LOCALES.get(original);
         return converted != null ? converted : original;
     }

     /**
      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
      * and appends valid variant subtags upto the first invalid subtag  (if any) to
      * {@code normalizedVariants}.
      */
     private static void extractVariantSubtags(String[] subtags, int startIndex, int endIndex,
             List<String> normalizedVariants) {
         for (int i = startIndex; i < endIndex; i++) {
             final String subtag = subtags[i];

             if (Builder.isValidVariantSubtag(subtag)) {
                 normalizedVariants.add(subtag);
             } else {
                 break;
             }
         }
     }

     /**
      * Scans elements of {@code subtags} in the range {@code [startIndex, endIndex)}
      * and inserts valid extensions into {@code extensions}. The scan is aborted
      * when an invalid extension is encountered. Returns the index of the first
      * unparsable element of {@code subtags}.
      */
     private static int extractExtensions(String[] subtags, int startIndex, int endIndex,
             Map<Character, String> extensions) {
         int privateUseExtensionIndex = -1;
         int extensionKeyIndex = -1;

         int i = startIndex;
         for (; i < endIndex; i++) {
             final String subtag = subtags[i];

             final boolean parsingPrivateUse = (privateUseExtensionIndex != -1) &&
                     (extensionKeyIndex == privateUseExtensionIndex);

             // Note that private use extensions allow subtags of length 1.
             // Private use extensions *must* come last, so there's no ambiguity
             // in that case.
             if (subtag.length() == 1 && !parsingPrivateUse) {
                 // Emit the last extension we encountered if any. First check
                 // whether we encountered two keys in a row (which is an error).
                 // Also checks if we already have an extension with the same key,
                 // which is again an error.
                 if (extensionKeyIndex != -1) {
                     if ((i - 1) == extensionKeyIndex) {
                         return extensionKeyIndex;
                     }

                     final String key = subtags[extensionKeyIndex];
                     if (extensions.containsKey(key.charAt(0))) {
                         return extensionKeyIndex;
                     }

                     final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
                     extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
                 }

                 // Mark the start of the next extension. Also keep track of whether this
                 // is a private use extension, and throw an error if it doesn't come last.
                 extensionKeyIndex = i;
                 if ("x".equals(subtag)) {
                     privateUseExtensionIndex = i;
                 } else if (privateUseExtensionIndex != -1) {
                     // The private use extension must come last.
                     return privateUseExtensionIndex;
                 }
             } else if (extensionKeyIndex != -1) {
                 // We must have encountered a valid key in order to start parsing
                 // its subtags.
                 if (!isValidBcp47Alphanum(subtag, parsingPrivateUse ? 1 : 2, 8)) {
                     return i;
                 }
             } else {
                 // Encountered a value without a preceding key.
                 return i;
             }
         }

         if (extensionKeyIndex != -1) {
             if ((i - 1) == extensionKeyIndex) {
                 return extensionKeyIndex;
             }

             final String key = subtags[extensionKeyIndex];
             if (extensions.containsKey(key.charAt(0))) {
                 return extensionKeyIndex;
             }

             final String value = concatenateRange(subtags, extensionKeyIndex + 1, i);
             extensions.put(key.charAt(0), value.toLowerCase(Locale.ROOT));
         }

         return i;
     }

     private static Locale forLanguageTag(/* @Nonnull */ String tag, boolean strict) {
         final String converted = convertGrandfatheredTag(tag);
         final String[] subtags = converted.split("-");

         int lastSubtag = subtags.length;
         for (int i = 0; i < subtags.length; ++i) {
             final String subtag = subtags[i];
             if (subtag.isEmpty() || subtag.length() > 8) {
                 if (strict) {
                     throw new IllformedLocaleException("Invalid subtag at index: " + i
                             + " in tag: " + tag);
                 } else {
                     lastSubtag = (i - 1);
                 }

                 break;
             }
         }

         final String languageCode = Builder.normalizeAndValidateLanguage(subtags[0], strict);
         String scriptCode = "";
         int nextSubtag = 1;
         if (lastSubtag > nextSubtag) {
             scriptCode = Builder.normalizeAndValidateScript(subtags[nextSubtag], false /* strict */);
             if (!scriptCode.isEmpty()) {
                 nextSubtag++;
             }
         }

         String regionCode = "";
         if (lastSubtag > nextSubtag) {
             regionCode = Builder.normalizeAndValidateRegion(subtags[nextSubtag], false /* strict */);
             if (!regionCode.isEmpty()) {
                 nextSubtag++;
             }
         }

         List<String> variants = null;
         if (lastSubtag > nextSubtag) {
             variants = new ArrayList<String>();
             extractVariantSubtags(subtags, nextSubtag, lastSubtag, variants);
             nextSubtag += variants.size();
         }

         Map<Character, String> extensions = Collections.EMPTY_MAP;
         if (lastSubtag > nextSubtag) {
             extensions = new TreeMap<Character, String>();
             nextSubtag = extractExtensions(subtags, nextSubtag, lastSubtag, extensions);
         }

         if (nextSubtag != lastSubtag) {
             if (strict) {
                 throw new IllformedLocaleException("Unparseable subtag: " + subtags[nextSubtag]
                         + " from language tag: " + tag);
             }
         }

         Set<String> unicodeKeywords = Collections.EMPTY_SET;
         Map<String, String> unicodeAttributes = Collections.EMPTY_MAP;
         if (extensions.containsKey(UNICODE_LOCALE_EXTENSION)) {
             unicodeKeywords = new TreeSet<String>();
             unicodeAttributes = new TreeMap<String, String>();
             parseUnicodeExtension(extensions.get(UNICODE_LOCALE_EXTENSION).split("-"),
                     unicodeAttributes, unicodeKeywords);
         }

         String variantCode = "";
         if (variants != null && !variants.isEmpty()) {
             StringBuilder variantsBuilder = new StringBuilder(variants.size() * 8);
             for (int i = 0; i < variants.size(); ++i) {
                 if (i != 0) {
                     variantsBuilder.append('_');
                 }
                 variantsBuilder.append(variants.get(i));
             }
             variantCode = variantsBuilder.toString();
         }

         return new Locale(languageCode, regionCode, variantCode, scriptCode,
                 unicodeKeywords, unicodeAttributes, extensions, true /* has validated fields */);
     }
 }