blob: c44896acb2b23e4c7d4c9a99922b1efdadac402f [file] [log] [blame] [edit]
/*
**********************************************************************
* Copyright (c) 2002-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Mark Davis
**********************************************************************
*/
package org.unicode.cldr.util;
import com.google.common.collect.ImmutableList;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UnicodeSet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.unicode.cldr.util.SupplementalDataInfo.ParentLocaleComponent;
public class LocaleIDParser {
/**
* @return Returns the language.
*/
public String getLanguage() {
return language;
}
/**
* @return Returns the language.
*/
public String getLanguageScript() {
if (script.length() != 0) return language + "_" + script;
return language;
}
public static Set<String> getLanguageScript(Collection<String> in) {
return getLanguageScript(in, null);
}
public static Set<String> getLanguageScript(Collection<String> in, Set<String> output) {
if (output == null) output = new TreeSet<>();
LocaleIDParser lparser = new LocaleIDParser();
for (Iterator<String> it = in.iterator(); it.hasNext(); ) {
output.add(lparser.set(it.next()).getLanguageScript());
}
return output;
}
/**
* @return Returns the region.
*/
public String getRegion() {
return region;
}
/**
* @return Returns the script.
*/
public String getScript() {
return script;
}
/**
* @return Returns the variants.
*/
public String[] getVariants() {
return variants.clone();
}
// TODO, update to RFC3066
// http://www.inter-locale.com/ID/draft-phillips-langtags-08.html
private String language;
private String script;
private String region;
private String[] variants;
static final UnicodeSet letters = new UnicodeSet("[a-zA-Z]");
static final UnicodeSet digits = new UnicodeSet("[0-9]");
public LocaleIDParser set(String localeID) {
region = script = "";
variants = new String[0];
String[] pieces = new String[100]; // fix limitation later
Utility.split(localeID, '_', pieces);
int i = 0;
language = pieces[i++];
if (i >= pieces.length) return this;
if (pieces[i].length() == 4) {
script = pieces[i++];
if (i >= pieces.length) return this;
}
if (pieces[i].length() == 2 && letters.containsAll(pieces[i])
|| pieces[i].length() == 3 && digits.containsAll(pieces[i])) {
region = pieces[i++];
if (i >= pieces.length) return this;
}
List<String> al = new ArrayList<>();
while (i < pieces.length && pieces[i].length() > 0) {
al.add(pieces[i++]);
}
variants = new String[al.size()];
al.toArray(variants);
return this;
}
/**
* Get the parent of a locale. If the input is "root", then return null. For example, if
* localeName is "fr_CA", return "fr".
*
* <p>Only works on canonical locale names (right casing, etc.)!
*
* <p>Formerly this function returned an empty string when localeName was "_VETTING". Now it
* returns "root" where it would have returned an empty string. TODO: explain "__VETTING",
* somehow related to SUMMARY_LOCALE. Note that CLDRLocale.process() changes "__" to "_" before
* this function is called. Reference: https://unicode-org.atlassian.net/browse/CLDR-13133
*/
public static final String getParent(String localeId) {
return getParent(localeId, ParentLocaleComponent.main);
}
/**
* Get the parent of a locale. If the input is "root", then return null. For example, if
* localeId is "fr_CA", return "fr". There is a different inheritance chain for certain
* supplemental data elements.
*
* @param localeId Only works on canonical locale names (right casing, etc.)!
* @param component picks the component that indicates the inheritance chain. Is either the
* standard ('main') used for all ldml-dtd items, or is one of the particular elements in
* supplemental data that has a different inheritance, such as collations or plurals
*/
public static String getParent(String localeId, ParentLocaleComponent component) {
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
String explicitParent = sdi.getExplicitParentLocale(localeId, component);
if (explicitParent != null) {
return explicitParent;
}
int pos = localeId.lastIndexOf('_');
if (pos >= 0) {
String truncated = localeId.substring(0, pos);
// if the final item is a script, and it is not the default content, then go directly to
// root
int pos2 = getScriptPosition(localeId);
boolean skipNonLikely = sdi.parentLocalesSkipNonLikely(component);
if (pos2 > 0 && skipNonLikely) {
String script = localeId.substring(pos + 1);
String defaultScript = sdi.getDefaultScript(truncated);
if (!script.equals(defaultScript)) {
return "root";
}
}
if (truncated.length() == 0) {
return "root";
}
return truncated;
}
if (localeId.equals("root")) {
return null;
}
return "root";
}
/**
* Return the base language subtag: en_US => en, en_Latn_US => en, en => en, root => root
*
* @param localeID
* @return
*/
public static String getSimpleBaseLanguage(String localeID) {
int pos = localeID.indexOf('_');
if (pos >= 0) {
return localeID.substring(0, pos);
}
return localeID;
}
/**
* If the locale consists of baseLanguage+script, return the position of the separator,
* otherwise -1.
*
* @param s
*/
public static int getScriptPosition(String locale) {
int pos = locale.indexOf('_');
if (pos >= 0 && pos + 5 == locale.length()) {
int pos2 = locale.indexOf('_', pos + 1);
if (pos2 < 0) {
return pos;
}
}
return -1;
}
/**
* Utility to get the simple parent of a locale. If the input is "root", then the output is
* null. This method is similar to the getParent() method above, except that it does NOT pay any
* attention to the explicit parent locales information. Thus, getParent("zh_Hant") will return
* "root", but getSimpleParent("zh_Hant") would return "zh".
*/
public static String getSimpleParent(String localeName) {
int pos = localeName.lastIndexOf('_');
if (pos >= 0) {
return localeName.substring(0, pos);
}
if (localeName.equals("root") || localeName.equals(CLDRFile.SUPPLEMENTAL_NAME)) return null;
return "root";
}
public LocaleIDParser setLanguage(String language) {
this.language = language;
return this;
}
public LocaleIDParser setRegion(String region) {
this.region = region;
return this;
}
public LocaleIDParser setScript(String script) {
this.script = script;
return this;
}
public LocaleIDParser setVariants(String[] variants) {
this.variants = variants.clone();
return this;
}
public enum Level {
Language,
Script,
Region,
Variants,
Other
}
/**
* Returns an int mask indicating the level
*
* @return (2 if script is present) + (4 if region is present) + (8 if region is present)
*/
public Set<Level> getLevels() {
EnumSet<Level> result = EnumSet.of(Level.Language);
if (getScript().length() != 0) result.add(Level.Script);
if (getRegion().length() != 0) result.add(Level.Region);
if (getVariants().length != 0) result.add(Level.Variants);
return result;
}
public Set<String> getSiblings(Set<String> set) {
Set<Level> myLevel = getLevels();
String localeID = toString();
String parentID = getParent(localeID);
String prefix = (parentID == null || "root".equals(parentID)) ? "" : parentID + "_";
Set<String> siblings = new TreeSet<>();
for (String id : set) {
if (id.startsWith(prefix) && set(id).getLevels().equals(myLevel)) {
siblings.add(id);
}
}
set(localeID); // leave in known state
return siblings;
}
@Override
public String toString() {
StringBuffer result = new StringBuffer(language);
if (script.length() != 0) result.append('_').append(script);
if (region.length() != 0) result.append('_').append(region);
if (variants != null) {
for (int i = 0; i < variants.length; ++i) {
result.append('_').append(variants[i]);
}
}
return result.toString();
}
public static final ImmutableList<String> FALLBACK_CHAIN = ImmutableList.of();
public static final ImmutableList<String> ROOT_PARENT_CHAIN =
ImmutableList.of(XMLSource.ROOT_ID);
/**
* Return localeIds getParent chain. Return null if there is none (localeID == root or
* code-fallback). Note: an L1 locale will have exactly 1 element, and be identical to
* ROOT_PARENT_CHAIN. TODO optimize by caching the chains Returns a
*/
public static List<String> getParentChain(String localeID) {
if (XMLSource.ROOT_ID.equals(localeID)) {
return FALLBACK_CHAIN;
}
List<String> result = null;
while (true) {
String parent = getParent(localeID);
if (parent.equals(XMLSource.ROOT_ID)) {
if (result == null) {
return ROOT_PARENT_CHAIN;
} else {
result.addAll(ROOT_PARENT_CHAIN);
return ImmutableList.copyOf(result);
}
}
if (result == null) {
result = new ArrayList<>();
}
result.add(parent);
localeID = parent;
}
}
public static boolean isL1(String localeId) {
return XMLSource.ROOT_ID.equals(getParent(localeId));
}
}