blob: 5511341819f2b170a1d26415fe49eb8065a820d6 [file] [log] [blame]
package org.unicode.cldr.unittest;
import com.google.common.base.Joiner;
import com.google.common.collect.Sets;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row.R2;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import org.unicode.cldr.draft.ScriptMetadata;
import org.unicode.cldr.draft.ScriptMetadata.Info;
import org.unicode.cldr.tool.GenerateMaximalLocales;
import org.unicode.cldr.tool.LikelySubtags;
import org.unicode.cldr.util.Builder;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.ChainedMap;
import org.unicode.cldr.util.ChainedMap.M3;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Iso3166Data;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
public class TestInheritance extends TestFmwk {
static CLDRConfig testInfo = CLDRConfig.getInstance();
private static boolean DEBUG = CldrUtility.getProperty("DEBUG", false);
private static Matcher pathMatcher =
PatternCache.get(CldrUtility.getProperty("XPATH", ".*")).matcher("");
public static void main(String[] args) throws IOException {
new TestInheritance().run(args);
}
private static final SupplementalDataInfo dataInfo = SupplementalDataInfo.getInstance();
private static final Set<String> defaultContents = dataInfo.getDefaultContentLocales();
private static final boolean EXPECT_EQUALITY = false;
private static Set<String> availableLocales = testInfo.getFullCldrFactory().getAvailable();
public void TestLocalesHaveOfficial() {
// If we have a language, we have all the region locales where the
// language is official
Set<String> SKIP_TERRITORIES = new HashSet<>(Arrays.asList("001", "150"));
SKIP_TERRITORIES.addAll(Iso3166Data.getRegionCodesNotForTranslation());
for (Entry<String, R2<List<String>, String>> s :
dataInfo.getLocaleAliasInfo().get("territory").entrySet()) {
SKIP_TERRITORIES.add(s.getKey());
}
LanguageTagParser ltp = new LanguageTagParser();
Relation<String, String> languageLocalesSeen =
Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
Set<String> testOrg = StandardCodes.make().getLocaleCoverageLocales("google");
ChainedMap.M4<String, OfficialStatus, String, Boolean> languageToOfficialChildren =
ChainedMap.of(
new TreeMap<String, Object>(),
new TreeMap<OfficialStatus, Object>(),
new TreeMap<String, Object>(),
Boolean.class);
// gather the data
for (String language : dataInfo.getLanguagesForTerritoriesPopulationData()) {
for (String territory : dataInfo.getTerritoriesForPopulationData(language)) {
if (SKIP_TERRITORIES.contains(territory)) {
continue;
}
PopulationData data =
dataInfo.getLanguageAndTerritoryPopulationData(language, territory);
OfficialStatus status = data.getOfficialStatus();
if (data.getOfficialStatus() != OfficialStatus.unknown) {
String locale = removeScript(language + "_" + territory);
String lang = removeScript(ltp.set(locale).getLanguage());
languageToOfficialChildren.put(lang, status, locale, Boolean.TRUE);
languageLocalesSeen.put(lang, locale);
}
}
}
// flesh it out by adding 'clean' codes.
// also get the child locales in cldr.
Relation<String, String> languageToChildren =
Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
for (String locale : testInfo.getCldrFactory().getAvailable()) {
String lang = ltp.set(locale).getLanguage();
if (SKIP_TERRITORIES.contains(ltp.getRegion())) {
continue;
}
lang = removeScript(lang);
locale = removeScript(locale);
if (!lang.equals(locale)) {
languageToChildren.put(lang, locale);
Set<String> localesSeen = languageLocalesSeen.get(lang);
if (localesSeen == null || !localesSeen.contains(locale)) {
languageToOfficialChildren.put(
lang, OfficialStatus.unknown, locale, Boolean.TRUE);
}
}
}
for (Entry<String, Set<String>> languageAndChildren : languageToChildren.keyValuesSet()) {
String language = languageAndChildren.getKey();
Set<String> children = languageAndChildren.getValue();
M3<OfficialStatus, String, Boolean> officalStatusToChildren =
languageToOfficialChildren.get(language);
for (Entry<OfficialStatus, Map<String, Boolean>> entry : officalStatusToChildren) {
OfficialStatus status = entry.getKey();
if (status != OfficialStatus.official
&& status != OfficialStatus.de_facto_official) {
continue;
}
Set<String> officalChildren = entry.getValue().keySet();
if (!children.containsAll(officalChildren)) {
Set<String> missing = new TreeSet<>(officalChildren);
missing.removeAll(children);
String message =
"Missing CLDR locales for " + status + " languages: " + missing;
errln(message);
} else {
logln(
"CLDR locales "
+ children
+ " cover "
+ status
+ " locales "
+ officalChildren);
}
}
}
if (DEBUG) {
Set<String> languages = new TreeSet<>(languageToChildren.keySet());
languages.addAll(languageToOfficialChildren.keySet());
System.out.print("\ncode\tlanguage");
for (OfficialStatus status : OfficialStatus.values()) {
System.out.print("\tNo\t" + status);
}
System.out.println();
for (String language : languages) {
if (!testOrg.contains(language)) {
continue;
}
System.out.print(language + "\t" + testInfo.getEnglish().getName(language));
M3<OfficialStatus, String, Boolean> officialChildren =
languageToOfficialChildren.get(language);
for (OfficialStatus status : OfficialStatus.values()) {
Map<String, Boolean> children = officialChildren.get(status);
if (children == null) {
System.out.print("\t" + 0 + "\t");
} else {
System.out.print(
"\t" + children.size() + "\t" + show(children.keySet(), false));
}
}
System.out.println();
}
}
}
private String show(Set<String> joint, boolean showStatus) {
StringBuffer b = new StringBuffer();
for (String s : joint) {
if (b.length() != 0) {
b.append(", ");
}
LanguageTagParser ltp = new LanguageTagParser().set(s);
String script = ltp.getScript();
if (script.length() != 0) {
b.append(testInfo.getEnglish().getName(CLDRFile.SCRIPT_NAME, script));
}
String region = ltp.getRegion();
if (region.length() != 0) {
if (script.length() != 0) {
b.append("-");
}
b.append(testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, region));
}
b.append(" [").append(s);
if (showStatus) {
PopulationData data =
dataInfo.getLanguageAndTerritoryPopulationData(ltp.getLanguage(), region);
if (data == null) {
data =
dataInfo.getLanguageAndTerritoryPopulationData(
ltp.getLanguageScript(), region);
}
b.append("; ");
b.append(data == null ? "?" : data.getOfficialStatus());
}
b.append("]");
}
return b.toString();
}
private String removeScript(String lang) {
if (!lang.contains("_")) {
return lang;
}
LanguageTagParser ltp = new LanguageTagParser().set(lang);
// String ls = ltp.getLanguageScript();
// if (defaultContents.contains(ls)) {
ltp.setScript("");
// }
return ltp.toString();
}
public void TestLikelyAndDefaultConsistency() {
LikelySubtags likelySubtags = new LikelySubtags();
LanguageTagParser ltp = new LanguageTagParser();
// find multiscript locales
Relation<String, String> base2scripts =
Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
Map<String, String> parent2default = new TreeMap<>();
Map<String, String> default2parent = new TreeMap<>();
Relation<String, String> base2locales =
Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
Set<String> knownMultiScriptLanguages = new HashSet<>(Arrays.asList("bm", "ha"));
// get multiscript locales
for (String localeID : availableLocales) {
String script = ltp.set(localeID).getScript();
final String base = ltp.getLanguage();
if (!availableLocales.contains(base) && !base.equals("und")) {
errln("Missing lang-subtag base " + base + " for: " + localeID);
}
base2locales.put(base, localeID);
if (!script.isEmpty() && !base.equals("en")) { // HACK for en
base2scripts.put(base, script);
}
if (script.isEmpty() && knownMultiScriptLanguages.contains(base)) {
base2scripts.put(base, dataInfo.getDefaultScript(base));
}
}
// get default contents
for (String localeID : defaultContents) {
checkLocale(localeID, false);
String parent =
LocaleIDParser.getParent(localeID); // was using getSimpleParent, not sure why
parent2default.put(parent, localeID);
default2parent.put(localeID, parent);
// if (!available.contains(simpleParent)) {
// // verify that base language has locale in CLDR (we don't want
// others)
// errln("Default contents contains locale not in CLDR:\t" +
// simpleParent);
// }
}
// get likely
Map<String, String> likely2Maximized = likelySubtags.getToMaximized();
for (Entry<String, String> likelyAndMaximized : likely2Maximized.entrySet()) {
checkLocale(likelyAndMaximized.getKey(), true);
checkLocale(likelyAndMaximized.getValue(), true);
}
Map<String, String> exceptionDcLikely = new HashMap<>();
Map<String, String> exceptionLikelyDc = new HashMap<>();
for (String[] s :
new String[][] {
{"ar_001", "ar_Arab_EG"}, {"nb", "no_Latn_NO"},
}) {
exceptionDcLikely.put(s[0], s[1]);
exceptionLikelyDc.put(s[1], s[0]);
}
verifyDefaultContentsImplicationsForLikelySubtags(
ltp, parent2default, likely2Maximized, exceptionDcLikely);
verifyLikelySubtagsImplicationsForDefaultContents(
ltp, base2scripts, parent2default, likely2Maximized, exceptionLikelyDc);
verifyScriptsWithDefaultContents(ltp, base2scripts, parent2default, base2locales);
}
public void TestParentLocaleRelationships() {
// Testing invariant relationships between locales - See
// http://unicode.org/cldr/trac/ticket/5758
/* Examples:
<parentLocale parent="no" locales="nb nn no_NO"/>
default content locales distinguish the child locale that has identical content, such as:
ebu_KE ee_GH el_GR en_Dsrt_US en_Shaw_GB en_US eo_001 es_ES et_EE eu_ES ewo_CM
*/
Matcher langScript = PatternCache.get("^[a-z]{2,3}_[A-Z][a-z]{3}$").matcher("");
for (final String loc : availableLocales) {
// we only check locales of the form: lang_script
if (langScript.reset(loc).matches()) {
if (ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc)) {
// Skip any in that list
continue;
}
String languageSubtag = loc.split("_")[0];
String expectedParent = languageSubtag;
if (!defaultContents.contains(loc)) {
expectedParent = "root";
}
String truncationParent = LocaleIDParser.getSimpleParent(loc);
String actualParent = LocaleIDParser.getParent(loc);
boolean hasExplicitParent = !actualParent.equals(truncationParent);
if (!actualParent.equals(expectedParent)) {
errln(
"Unexpected parent locale for locale "
+ loc
+ ". Expected: "
+ expectedParent
+ " Got: "
+ actualParent
+ " "
+ ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE);
}
if (hasExplicitParent && defaultContents.contains(loc)) {
errln(
"Locale "
+ loc
+ " can't have an explicit parent AND be a default content locale");
}
}
}
}
final String ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE =
"See ALLOW_DIFFERENT_PARENT_LOCALE in TestInheritance.java";
public final Set<String> ALLOW_DIFFERENT_PARENT_LOCALE =
Collections.unmodifiableSet(
Sets.newHashSet(
// Update this if additional locales have explicit parents in a
// different language code
// Per CLDR-2698/14493 we allow nb,nn to have an explicit parent no
// which is a different language code.
"nn",
"nb",
// Per CLDR-15276 hi-Latn can have an explicit parent
"hi_Latn"));
public void TestParentLocaleInvariants() {
// Testing invariant relationships in parent locales - See
// http://unicode.org/cldr/trac/ticket/7887
CLDRLocale cldrRoot = CLDRLocale.getInstance("root");
LikelySubtags likely = new LikelySubtags();
for (String loc : availableLocales) {
CLDRLocale cldrLoc = CLDRLocale.getInstance(loc);
CLDRLocale cldrParent = cldrLoc.getParent();
if (cldrParent != null) {
CLDRLocale locLikely = CLDRLocale.getInstance(likely.maximize(loc));
CLDRLocale parentLikely =
CLDRLocale.getInstance(likely.maximize(cldrParent.toString()));
final String locLang = cldrLoc.getLanguage();
final String locScript = cldrLoc.getScript();
final String locRegion = cldrLoc.getCountry();
final String parentLang = cldrParent.getLanguage();
final boolean parentIsRoot = cldrRoot.equals(cldrParent);
if (!parentIsRoot
&& !ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc)
&& !locLang.equals(parentLang)) {
errln(
"Parent locale ["
+ cldrParent
+ "] for locale ["
+ loc
+ "] cannot be a different language code. "
+ ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE);
}
if (!parentIsRoot && !locLikely.getScript().equals(parentLikely.getScript())) {
errln(
"Parent locale ["
+ cldrParent
+ "] for locale ["
+ loc
+ "] cannot have a different script code.");
}
String cldrTruncationParent = LocaleIDParser.getSimpleParent(loc);
boolean hasExplicitParent = !cldrTruncationParent.equals(cldrParent.toString());
if (hasExplicitParent
&& parentIsRoot
&& locScript.length() == 0
&& locRegion.length() == 0
&& !ALLOW_DIFFERENT_PARENT_LOCALE.contains(loc)) {
errln(
"Base language locale ["
+ loc
+ "] cannot have an explicit parent ("
+ cldrParent
+ ") "
+ ALLOW_DIFFERENT_PARENT_LOCALE_MESSAGE);
}
}
}
}
public void TestParentLocalesForCycles() {
// Testing for cyclic relationships in parent locales - See
// http://unicode.org/cldr/trac/ticket/7887
for (String loc : availableLocales) {
String currentLoc = loc;
boolean foundError = false;
List<String> inheritanceChain = new ArrayList<>(Arrays.asList(loc));
while (currentLoc != null && !foundError) {
currentLoc = LocaleIDParser.getParent(currentLoc);
if (inheritanceChain.contains(currentLoc)) {
foundError = true;
inheritanceChain.add(currentLoc);
errln(
"Inheritance chain for locale ["
+ loc
+ "] contains a cyclic relationship. "
+ inheritanceChain.toString());
}
inheritanceChain.add(currentLoc);
}
}
}
private void verifyScriptsWithDefaultContents(
LanguageTagParser ltp,
Relation<String, String> base2scripts,
Map<String, String> parent2default,
Relation<String, String> base2locales) {
Set<String> skip = Builder.with(new HashSet<String>()).addAll("root", "und").freeze();
Set<String> languagesWithOneOrLessLocaleScriptInCommon =
new HashSet<>(Arrays.asList("bm", "ha", "hi", "ms", "iu", "mn"));
Set<String> baseLanguagesWhoseDefaultContentHasNoRegion =
new HashSet<>(Arrays.asList("no"));
// for each base we have to have,
// if multiscript, we have default contents for base+script,
// base+script+region;
// otherwise base+region.
for (String base : base2locales.keySet()) {
if (skip.contains(base)) {
continue;
}
String defaultContent = parent2default.get(base);
// Set<String> likely = base2likely.get(base);
// if (likely == null) {
// errln("Missing likely subtags for: " + base + " " +
// suggestLikelySubtagFor(base));
// }
if (defaultContent == null) {
errln("Missing default content for: " + base + " " + suggestLikelySubtagFor(base));
continue;
}
Set<String> scripts = base2scripts.get(base);
ltp.set(defaultContent);
String script = ltp.getScript();
String region = ltp.getRegion();
if (scripts == null || languagesWithOneOrLessLocaleScriptInCommon.contains(base)) {
if (!script.isEmpty()) {
errln(
"Script should be empty in default content for: "
+ base
+ ","
+ defaultContent);
}
if (region.isEmpty()
&& !baseLanguagesWhoseDefaultContentHasNoRegion.contains(base)) {
errln(
"Region must not be empty in default content for: "
+ base
+ ","
+ defaultContent);
}
} else {
if (script.isEmpty()) {
errln(
"Script should not be empty in default content for: "
+ base
+ ","
+ defaultContent);
}
if (!region.isEmpty()) {
errln(
"Region should be empty in default content for: "
+ base
+ ","
+ defaultContent);
}
String defaultContent2 = parent2default.get(defaultContent);
if (defaultContent2 == null) {
errln("Missing default content for: " + defaultContent);
continue;
}
ltp.set(defaultContent2);
region = ltp.getRegion();
if (region.isEmpty()) {
errln(
"Region must not be empty in default content for: "
+ base
+ ","
+ defaultContent);
}
}
}
}
private void verifyLikelySubtagsImplicationsForDefaultContents(
LanguageTagParser ltp,
Relation<String, String> base2scripts,
Map<String, String> parent2default,
Map<String, String> likely2Maximized,
Map<String, String> exceptionLikelyDc) {
// Now check invariants for all LikelySubtags implications for Default
// Contents
// a) suppose likely max for la_Scrp => la_Scrp_RG
// Then default contents la_Scrp => la_Scrp_RG
// b) suppose likely max for la_RG => la_Scrp_RG
// Then we can draw no conclusions // was default contents la_Scrp =>
// la_Scrp_RG
// c) suppose likely max for la => la_Scrp_RG
// Then default contents la => la_Scrp && la_Scrp => la_Scrp_RG
// or default contents la => la_RG && ! la_Scrp => la_Scrp_RG
TreeSet<String> additionalDefaultContents = new TreeSet<>();
for (Entry<String, String> entry : likely2Maximized.entrySet()) {
String source = entry.getKey();
String likelyMax = entry.getValue();
String sourceLang = ltp.set(source).getLanguage();
if (sourceLang.equals("und") || source.equals("zh_Hani") || source.equals("tl")) {
continue;
}
String sourceScript = ltp.getScript();
String sourceRegion = ltp.getRegion();
String likelyMaxLang = ltp.set(likelyMax).getLanguage();
String likelyMaxScript = ltp.getScript();
String likelyMaxRegion = ltp.getRegion();
String dc = parent2default.get(source);
String possibleException = exceptionLikelyDc.get(likelyMax);
if (possibleException != null && possibleException.equals(dc)) {
continue;
}
String likelyLangScript = likelyMaxLang + "_" + likelyMaxScript;
String dcFromLangScript = parent2default.get(likelyLangScript);
boolean consistent = true;
String caseNumber = null;
if (consistent) {
if (!sourceScript.isEmpty()) {
caseNumber = "a";
if (dc == null) {
if (EXPECT_EQUALITY) {
String expected = likelyMax;
errln(
"Default contents null for "
+ source
+ ", expected:\t"
+ expected);
additionalDefaultContents.add(expected);
}
continue;
}
consistent = likelyMax.equals(dc);
} else if (!sourceRegion.isEmpty()) { // a
caseNumber = "b";
// consistent = likelyMax.equals(dcFromLangScript);
} else { // c
caseNumber = "c";
if (dc == null) {
if (EXPECT_EQUALITY) {
String expected =
base2scripts.get(source) == null
? likelyMaxLang + "_" + likelyMaxRegion
: likelyMaxLang + "_" + likelyMaxScript;
errln(
"Default contents null for "
+ source
+ ", expected:\t"
+ expected);
additionalDefaultContents.add(expected);
}
continue;
}
String dcScript = ltp.set(dc).getScript();
consistent =
likelyLangScript.equals(dc) && likelyMax.equals(dcFromLangScript)
|| dcScript.isEmpty() && !likelyMax.equals(dcFromLangScript);
// || dcScript.isEmpty() && dcRegion.equals(likelyMaxRegion)
// && dcFromLangScript == null;
}
}
if (!consistent) {
errln(
"default contents inconsistent with likely subtag: ("
+ caseNumber
+ ")"
+ "\n\t"
+ source
+ " => (ls) "
+ likelyMax
+ "\n\t"
+ source
+ " => (dc) "
+ dc
+ "\n\t"
+ likelyLangScript
+ " => (dc) "
+ dcFromLangScript);
}
}
if (additionalDefaultContents.size() != 0) {
errln(
"Suggested additions to supplementalMetadata/../defaultContent:\n"
+ Joiner.on(" ").join(additionalDefaultContents));
}
}
private void verifyDefaultContentsImplicationsForLikelySubtags(
LanguageTagParser ltp,
Map<String, String> parent2default,
Map<String, String> likely2Maximized,
Map<String, String> exceptionDcLikely) {
// Now check invariants for all Default Contents implications for
// LikelySubtags
// a) suppose default contents la => la_Scrp.
// Then the likely contents for la => la_Scrp_*
// b) suppose default contents la => la_RG.
// Then the likely contents for la => la_*_RG
// c) suppose default contents la_Scrp => la_Scrp_RG.
// Then the likely contents of la_Scrp => la_Scrp_RG OR likely contents
// for la => la_*_*
for (Entry<String, String> parentAndDefault : parent2default.entrySet()) {
String source = parentAndDefault.getKey();
String dc = parentAndDefault.getValue();
String likelyMax = likely2Maximized.get(source);
// skip special exceptions
String possibleException = exceptionDcLikely.get(dc);
if (possibleException != null && possibleException.equals(likelyMax)) {
continue;
}
String sourceLang = ltp.set(source).getLanguage();
String sourceScript = ltp.getScript();
// there cannot be a sourceRegion
String dcScript = ltp.set(dc).getScript();
String dcRegion = ltp.getRegion();
String likelyMaxLang = "", likelyMaxScript = "", likelyMaxRegion = "";
if (likelyMax != null) {
likelyMaxLang = ltp.set(likelyMax).getLanguage();
likelyMaxScript = ltp.getScript();
likelyMaxRegion = ltp.getRegion();
}
String likelyMax2 = likely2Maximized.get(sourceLang);
boolean consistent = true;
if (sourceScript.isEmpty()) { // a or b
if (!dcScript.isEmpty()) { // a
consistent = likelyMaxLang.equals(source) && likelyMaxScript.equals(dcScript);
} else { // b
consistent = likelyMaxLang.equals(source) && likelyMaxRegion.equals(dcRegion);
}
} else { // c
consistent = dc.equals(likelyMax) || likelyMax2 != null;
}
if (!consistent) {
errln(
"likely subtag inconsistent with default contents: "
+ "\n\t"
+ source
+ " =>( dc) "
+ dc
+ "\n\t"
+ source
+ " => (ls) "
+ likelyMax
+ (source.equals(sourceLang)
? ""
: "\n\t" + sourceLang + " => (ls) " + likelyMax2));
}
}
}
/**
* Suggest a likely subtag
*
* @param base
* @return
*/
static String suggestLikelySubtagFor(String base) {
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
CLDRLocale loc = CLDRLocale.getInstance(base);
if (!loc.getLanguage().equals(base)) {
return " (no suggestion- not a simple language locale)"; // no
// suggestion
// unless
// just
// a
// language
// locale.
}
Set<BasicLanguageData> basicData = sdi.getBasicLanguageData(base);
for (BasicLanguageData bld : basicData) {
if (bld.getType()
== org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type.primary) {
Set<String> scripts = bld.getScripts();
Set<String> territories = bld.getTerritories();
if (scripts.size() == 1) {
if (territories.size() == 1) {
return createSuggestion(
loc,
CLDRLocale.getInstance(
base
+ "_"
+ scripts.iterator().next()
+ "_"
+ territories.iterator().next()));
}
}
return "(no suggestion - multiple scripts or territories)";
}
}
return ("(no suggestion- no data)");
}
/** Format and return a suggested likelysubtag */
private static String createSuggestion(CLDRLocale loc, CLDRLocale toLoc) {
return " Suggest this to likelySubtags.xml: <likelySubtag from=\""
+ loc
+ "\" to=\""
+ toLoc
+ "\"/>\n"
+ " <!--{ "
+ loc.getDisplayName()
+ "; ?; ? } => { "
+ loc.getDisplayName()
+ "; "
+ toLoc.toULocale().getDisplayScript()
+ "; "
+ toLoc.toULocale().getDisplayCountry()
+ " }-->";
}
public void TestDeprecatedTerritoryDataLocaleIds() {
HashSet<String> checked = new HashSet<>();
for (String language : dataInfo.getLanguagesForTerritoriesPopulationData()) {
checkLocale(language, false); // checks la_Scrp and la
for (String region : dataInfo.getTerritoriesForPopulationData(language)) {
if (!checked.contains(region)) {
checkValidCode(language + "_" + region, "territory", region, false);
checked.add(region);
}
}
}
for (String language : dataInfo.getBasicLanguageDataLanguages()) {
checkLocale(language, false); // checks la_Scrp and la
Set<BasicLanguageData> data = dataInfo.getBasicLanguageData(language);
for (BasicLanguageData datum : data) {
for (String script : datum.getScripts()) {
checkValidCode(language + "_" + script, "script", script, false);
checked.add(script);
}
for (String region : datum.getTerritories()) {
checkValidCode(language + "_" + region, "territory", region, false);
checked.add(region);
}
}
}
}
public void TestBasicLanguageDataAgainstScriptMetadata() {
// the invariants are:
// if there is primary data, the script must be there
// otherwise it must be in the secondary
main:
for (String script : ScriptMetadata.getScripts()) {
Info info = ScriptMetadata.getInfo(script);
String language = info.likelyLanguage;
if (language.equals("und")) {
continue;
}
Map<Type, BasicLanguageData> data = dataInfo.getBasicLanguageDataMap(language);
if (data == null) {
logln(
"Warning: ScriptMetadata has "
+ language
+ " for "
+ script
+ ","
+ " but "
+ language
+ " is missing in language_script.txt");
continue;
}
for (BasicLanguageData entry : data.values()) {
if (entry.getScripts().contains(script)) {
continue main;
}
continue;
}
logln(
"Warning: ScriptMetadata has "
+ language
+ " for "
+ script
+ ","
+ " but "
+ language
+ " doesn't have "
+ script
+ " in language_script.txt");
}
}
public void TestCldrFileConsistency() {
boolean haveErrors = false;
for (String locale : testInfo.getCldrFactory().getAvailable()) {
CLDRFile cldrFileToCheck = testInfo.getCLDRFile(locale, false);
int errors = 0;
for (String path : cldrFileToCheck) {
if (!pathMatcher.reset(path).find()) {
continue;
}
String fullPath = cldrFileToCheck.getFullXPath(path);
if (fullPath == null) {
// try again, for debugging
fullPath = cldrFileToCheck.getFullXPath(path);
String value = cldrFileToCheck.getStringValue(path);
if (DEBUG) {
errln(
"Invalid full path\t"
+ locale
+ ", "
+ path
+ ", "
+ fullPath
+ ", "
+ value);
}
errors++;
haveErrors = true;
}
}
if (errors != 0) {
errln(locale + (errors != 0 ? "\tinvalid getFullXPath() values:" + errors : ""));
} else {
logln(locale);
}
}
if (haveErrors && !DEBUG) {
errln("Use -DDEBUG to see details");
}
}
static SupplementalDataInfo info = SupplementalDataInfo.getInstance();
LanguageTagParser ltp = new LanguageTagParser();
Matcher aliasMatcher = PatternCache.get("//ldml.*/alias.*").matcher("");
private String minimize(Map<String, String> likelySubtags, String locale) {
String result = GenerateMaximalLocales.minimize(locale, likelySubtags, false);
if (result == null) {
LanguageTagParser ltp3 = new LanguageTagParser().set(locale);
List<String> variants = ltp3.getVariants();
Map<String, String> extensions = ltp3.getExtensions();
Set<String> emptySet = Collections.emptySet();
ltp3.setVariants(emptySet);
Map<String, String> emptyMap = Collections.emptyMap();
ltp3.setExtensions(emptyMap);
String newLocale = ltp3.toString();
result = GenerateMaximalLocales.minimize(newLocale, likelySubtags, false);
if (result != null) {
ltp3.set(result);
ltp3.setVariants(variants);
ltp3.setExtensions(extensions);
result = ltp3.toString();
}
}
return result;
}
private String maximize(Map<String, String> likelySubtags, String locale) {
String result = GenerateMaximalLocales.maximize(locale, likelySubtags);
if (result == null) {
LanguageTagParser ltp3 = new LanguageTagParser().set(locale);
List<String> variants = ltp3.getVariants();
Map<String, String> extensions = ltp3.getExtensions();
Set<String> emptySet = Collections.emptySet();
ltp3.setVariants(emptySet);
Map<String, String> emptyMap = Collections.emptyMap();
ltp3.setExtensions(emptyMap);
String newLocale = ltp3.toString();
result = GenerateMaximalLocales.maximize(newLocale, likelySubtags);
if (result != null) {
ltp3.set(result);
ltp3.setVariants(variants);
ltp3.setExtensions(extensions);
result = ltp3.toString();
}
}
return result;
}
// TODO move this into central utilities
public static boolean equals(CharSequence string, int codePoint) {
if (string == null) {
return false;
}
switch (string.length()) {
case 1:
return codePoint == string.charAt(0);
case 2:
return codePoint >= 0x10000 && codePoint == Character.codePointAt(string, 0);
default:
return false;
}
}
// TODO move this into central utilities
private static final StandardCodes STANDARD_CODES = StandardCodes.make();
private static final Map<String, Map<String, R2<List<String>, String>>> DEPRECATED_INFO =
dataInfo.getLocaleAliasInfo();
private void checkLocale(String localeID, boolean allowDeprecated) {
// verify that the localeID is valid
LanguageTagParser ltp = new LanguageTagParser().set(localeID);
String language = ltp.getLanguage();
String script = ltp.getScript();
String region = ltp.getRegion();
// TODO check variants, extensions also.
checkValidCode(localeID, "language", language, allowDeprecated);
checkValidCode(localeID, "script", script, allowDeprecated);
checkValidCode(localeID, "territory", region, allowDeprecated);
}
private void checkValidCode(
String localeID, String subtagType, String subtag, boolean allowDeprecated) {
if (subtagType.equals("language")) {
if (subtag.equals("und")) {
return;
}
} else {
if (subtag.isEmpty()) {
return;
}
}
if (!STANDARD_CODES.getAvailableCodes(subtagType).contains(subtag)) {
errln("Locale " + localeID + " contains illegal " + showCode(subtagType, subtag));
} else if (!allowDeprecated) {
// "language" -> "sh" -> <{"sr_Latn"}, reason>
R2<List<String>, String> deprecatedInfo = DEPRECATED_INFO.get(subtagType).get(subtag);
if (deprecatedInfo != null) {
errln(
"Locale "
+ localeID
+ " contains deprecated "
+ showCode(subtagType, subtag)
+ " "
+ deprecatedInfo.get1()
+ "; suggest "
+ showName(deprecatedInfo.get0(), subtagType));
}
}
}
private String showName(List<String> deprecatedInfo, String subtagType) {
StringBuilder result = new StringBuilder();
for (String s : deprecatedInfo) {
result.append(showName(subtagType, s)).append(" ");
}
return result.toString();
}
private String showCode(String subtagType, String subtag) {
return subtagType + " code: " + showName(subtagType, subtag);
}
private String showName(String subtagType, String subtag) {
return subtag + " (" + getName(subtagType, subtag) + ")";
}
private String getName(String subtagType, String subtag) {
Map<String, String> data = STANDARD_CODES.getLangData(subtagType, subtag);
if (data == null) {
return "<no name>";
}
return data.get("Description");
}
// TODO move this into central utilities
public static boolean equals(int codePoint, CharSequence string) {
return equals(string, codePoint);
}
// TODO move this into central utilities
public static boolean equals(Object a, Object b) {
return a == b ? true : a == null || b == null ? false : a.equals(b);
}
// TODO move this into central utilities
private <K, V> String showDifferences(Map<K, V> a, Map<K, V> b) {
StringBuilder result = new StringBuilder();
Set<K> keys = new LinkedHashSet<>();
keys.addAll(a.keySet());
keys.addAll(b.keySet());
for (K key : keys) {
if (!a.containsKey(key)) {
result.append(key).append("→‹").append(a.get(key)).append("›,∅; ");
} else if (!b.containsKey(key)) {
result.append(key).append("→∅,‹").append(b.get(key)).append("›; ");
} else {
V aKey = a.get(key);
V bKey = b.get(key);
if (!equals(aKey, bKey)) {
result.append(key)
.append("→‹")
.append(a.get(key))
.append("›,‹")
.append(b.get(key))
.append("›; ");
}
}
}
return result.toString();
}
public void TestLanguageTagParser() {
LanguageTagParser ltp = new LanguageTagParser();
ltp.set("en-Cyrl-US");
assertEquals(null, "en", ltp.getLanguage());
assertEquals(null, "en_Cyrl", ltp.getLanguageScript());
assertEquals(null, "Cyrl", ltp.getScript());
assertEquals(null, "US", ltp.getRegion());
try {
ltp.set("$");
assertFalse("expected exception", true);
} catch (Exception e) {
logln(e.getMessage());
}
}
public void TestParentChain() {
String[][] tests = {
{"en_DE", "[en_150, en_001, en, root]"},
{"fr_CA", "[fr, root]"},
{"fr", "[root]"},
{"root", "[]"},
};
for (String[] test : tests) {
assertEquals(test[0], test[1], LocaleIDParser.getParentChain(test[0]).toString());
}
}
}