| package org.unicode.cldr.unittest; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| |
| import org.unicode.cldr.draft.ScriptMetadata; |
| import org.unicode.cldr.draft.ScriptMetadata.Info; |
| import org.unicode.cldr.tool.GenerateMaximalLocales; |
| import org.unicode.cldr.tool.LikelySubtags; |
| import org.unicode.cldr.util.Builder; |
| import org.unicode.cldr.util.CLDRConfig; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRLocale; |
| import org.unicode.cldr.util.ChainedMap; |
| import org.unicode.cldr.util.ChainedMap.M3; |
| import org.unicode.cldr.util.CldrUtility; |
| import org.unicode.cldr.util.LanguageTagParser; |
| import org.unicode.cldr.util.LocaleIDParser; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.StandardCodes; |
| import org.unicode.cldr.util.SupplementalDataInfo; |
| import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData; |
| import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type; |
| import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; |
| import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; |
| import org.unicode.cldr.util.XPathParts; |
| |
| import com.ibm.icu.dev.test.TestFmwk; |
| import com.ibm.icu.dev.util.CollectionUtilities; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.impl.Row.R2; |
| |
| public class TestInheritance extends TestFmwk { |
| |
| static CLDRConfig testInfo = CLDRConfig.getInstance(); |
| |
| private static boolean DEBUG = CldrUtility.getProperty("DEBUG", false); |
| |
| private static Matcher pathMatcher = PatternCache.get( |
| CldrUtility.getProperty("XPATH", ".*")).matcher(""); |
| |
| public static void main(String[] args) throws IOException { |
| new TestInheritance().run(args); |
| } |
| |
| private static final SupplementalDataInfo dataInfo = SupplementalDataInfo |
| .getInstance(); |
| private static final Set<String> defaultContents = dataInfo |
| .getDefaultContentLocales(); |
| |
| private static final boolean EXPECT_EQUALITY = false; |
| |
| private static Set<String> availableLocales = testInfo.getFullCldrFactory().getAvailable(); |
| |
| public void TestLocalesHaveOfficial() { |
| // If we have a language, we have all the region locales where the |
| // language is official |
| Set<String> SKIP_TERRITORIES = new HashSet<String>(Arrays.asList("001", |
| "150")); |
| for (Entry<String, R2<List<String>, String>> s : dataInfo |
| .getLocaleAliasInfo().get("territory").entrySet()) { |
| SKIP_TERRITORIES.add(s.getKey()); |
| } |
| |
| LanguageTagParser ltp = new LanguageTagParser(); |
| |
| Relation<String, String> languageLocalesSeen = Relation.of( |
| new TreeMap<String, Set<String>>(), TreeSet.class); |
| |
| Set<String> testOrg = testInfo.getStandardCodes() |
| .getLocaleCoverageLocales("google"); |
| ChainedMap.M4<String, OfficialStatus, String, Boolean> languageToOfficialChildren = ChainedMap |
| .of(new TreeMap<String, Object>(), |
| new TreeMap<OfficialStatus, Object>(), |
| new TreeMap<String, Object>(), Boolean.class); |
| |
| // gather the data |
| |
| for (String language : dataInfo |
| .getLanguagesForTerritoriesPopulationData()) { |
| for (String territory : dataInfo |
| .getTerritoriesForPopulationData(language)) { |
| if (SKIP_TERRITORIES.contains(territory)) { |
| continue; |
| } |
| PopulationData data = dataInfo |
| .getLanguageAndTerritoryPopulationData(language, |
| territory); |
| OfficialStatus status = data.getOfficialStatus(); |
| if (data.getOfficialStatus() != OfficialStatus.unknown) { |
| String locale = removeScript(language + "_" + territory); |
| String lang = removeScript(ltp.set(locale).getLanguage()); |
| languageToOfficialChildren.put(lang, status, locale, |
| Boolean.TRUE); |
| languageLocalesSeen.put(lang, locale); |
| } |
| } |
| } |
| |
| // flesh it out by adding 'clean' codes. |
| // also get the child locales in cldr. |
| |
| Relation<String, String> languageToChildren = Relation.of( |
| new TreeMap<String, Set<String>>(), TreeSet.class); |
| for (String locale : testInfo.getCldrFactory().getAvailable()) { |
| String lang = ltp.set(locale).getLanguage(); |
| if (SKIP_TERRITORIES.contains(ltp.getRegion())) { |
| continue; |
| } |
| lang = removeScript(lang); |
| locale = removeScript(locale); |
| |
| if (!lang.equals(locale)) { |
| languageToChildren.put(lang, locale); |
| Set<String> localesSeen = languageLocalesSeen.get(lang); |
| if (localesSeen == null || !localesSeen.contains(locale)) { |
| languageToOfficialChildren.put(lang, |
| OfficialStatus.unknown, locale, Boolean.TRUE); |
| } |
| } |
| } |
| |
| for (Entry<String, Set<String>> languageAndChildren : languageToChildren |
| .keyValuesSet()) { |
| String language = languageAndChildren.getKey(); |
| Set<String> children = languageAndChildren.getValue(); |
| M3<OfficialStatus, String, Boolean> officalStatusToChildren = languageToOfficialChildren |
| .get(language); |
| for (Entry<OfficialStatus, Map<String, Boolean>> entry : officalStatusToChildren) { |
| OfficialStatus status = entry.getKey(); |
| if (status != OfficialStatus.official |
| && status != OfficialStatus.de_facto_official) { |
| continue; |
| } |
| Set<String> officalChildren = entry.getValue().keySet(); |
| if (!children.containsAll(officalChildren)) { |
| Set<String> missing = new TreeSet<String>(officalChildren); |
| missing.removeAll(children); |
| String message = "Missing CLDR locales for " + status |
| + " languages: " + missing; |
| errln(message); |
| } else { |
| logln("CLDR locales " + children + " cover " + status |
| + " locales " + officalChildren); |
| } |
| |
| } |
| } |
| |
| if (DEBUG) { |
| Set<String> languages = new TreeSet<String>( |
| languageToChildren.keySet()); |
| languages.addAll(languageToOfficialChildren.keySet()); |
| System.out.print("\ncode\tlanguage"); |
| for (OfficialStatus status : OfficialStatus.values()) { |
| System.out.print("\tNo\t" + status); |
| } |
| System.out.println(); |
| for (String language : languages) { |
| if (!testOrg.contains(language)) { |
| continue; |
| } |
| System.out.print(language + "\t" |
| + testInfo.getEnglish().getName(language)); |
| |
| M3<OfficialStatus, String, Boolean> officialChildren = languageToOfficialChildren |
| .get(language); |
| for (OfficialStatus status : OfficialStatus.values()) { |
| Map<String, Boolean> children = officialChildren |
| .get(status); |
| if (children == null) { |
| System.out.print("\t" + 0 + "\t"); |
| } else { |
| System.out.print("\t" + children.size() + "\t" |
| + show(children.keySet(), false)); |
| } |
| } |
| System.out.println(); |
| } |
| } |
| } |
| |
| private String show(Set<String> joint, boolean showStatus) { |
| StringBuffer b = new StringBuffer(); |
| for (String s : joint) { |
| if (b.length() != 0) { |
| b.append(", "); |
| } |
| LanguageTagParser ltp = new LanguageTagParser().set(s); |
| String script = ltp.getScript(); |
| if (script.length() != 0) { |
| b.append(testInfo.getEnglish().getName(CLDRFile.SCRIPT_NAME, |
| script)); |
| } |
| String region = ltp.getRegion(); |
| if (region.length() != 0) { |
| if (script.length() != 0) { |
| b.append("-"); |
| } |
| b.append(testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, |
| region)); |
| } |
| b.append(" [").append(s); |
| if (showStatus) { |
| PopulationData data = dataInfo |
| .getLanguageAndTerritoryPopulationData( |
| ltp.getLanguage(), region); |
| if (data == null) { |
| data = dataInfo.getLanguageAndTerritoryPopulationData( |
| ltp.getLanguageScript(), region); |
| } |
| b.append("; "); |
| b.append(data == null ? "?" : data.getOfficialStatus()); |
| } |
| b.append("]"); |
| |
| } |
| return b.toString(); |
| } |
| |
| private String removeScript(String lang) { |
| if (!lang.contains("_")) { |
| return lang; |
| } |
| LanguageTagParser ltp = new LanguageTagParser().set(lang); |
| // String ls = ltp.getLanguageScript(); |
| // if (defaultContents.contains(ls)) { |
| ltp.setScript(""); |
| // } |
| return ltp.toString(); |
| } |
| |
| public void TestLikelyAndDefaultConsistency() { |
| LikelySubtags likelySubtags = new LikelySubtags(); |
| LanguageTagParser ltp = new LanguageTagParser(); |
| // find multiscript locales |
| Relation<String, String> base2scripts = Relation.of( |
| new TreeMap<String, Set<String>>(), TreeSet.class); |
| Map<String, String> parent2default = new TreeMap<String, String>(); |
| Map<String, String> default2parent = new TreeMap<String, String>(); |
| Relation<String, String> base2locales = Relation.of( |
| new TreeMap<String, Set<String>>(), TreeSet.class); |
| |
| Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("bm", "ha")); |
| // get multiscript locales |
| for (String localeID : availableLocales) { |
| String script = ltp.set(localeID).getScript(); |
| final String base = ltp.getLanguage(); |
| if (!availableLocales.contains(base)) { |
| errln("Missing base locale for: " + localeID); |
| } |
| base2locales.put(base, localeID); |
| if (!script.isEmpty() && !base.equals("en")) { // HACK for en |
| base2scripts.put(base, script); |
| } |
| if (script.isEmpty() && knownMultiScriptLanguages.contains(base)) { |
| base2scripts.put(base, dataInfo.getDefaultScript(base)); |
| } |
| } |
| |
| // get default contents |
| for (String localeID : defaultContents) { |
| checkLocale(localeID, false); |
| String simpleParent = LocaleIDParser.getSimpleParent(localeID); |
| parent2default.put(simpleParent, localeID); |
| default2parent.put(localeID, simpleParent); |
| // if (!available.contains(simpleParent)) { |
| // // verify that base language has locale in CLDR (we don't want |
| // others) |
| // errln("Default contents contains locale not in CLDR:\t" + |
| // simpleParent); |
| // } |
| } |
| |
| // get likely |
| Map<String, String> likely2Maximized = likelySubtags.getToMaximized(); |
| for (Entry<String, String> likelyAndMaximized : likely2Maximized |
| .entrySet()) { |
| checkLocale(likelyAndMaximized.getKey(), true); |
| checkLocale(likelyAndMaximized.getValue(), true); |
| } |
| Map<String, String> exceptionDcLikely = new HashMap<String, String>(); |
| Map<String, String> exceptionLikelyDc = new HashMap<String, String>(); |
| for (String[] s : new String[][] { { "ar_001", "ar_Arab_EG" }, }) { |
| exceptionDcLikely.put(s[0], s[1]); |
| exceptionLikelyDc.put(s[1], s[0]); |
| } |
| |
| verifyDefaultContentsImplicationsForLikelySubtags(ltp, parent2default, |
| likely2Maximized, exceptionDcLikely); |
| |
| verifyLikelySubtagsImplicationsForDefaultContents(ltp, base2scripts, |
| parent2default, likely2Maximized, exceptionLikelyDc); |
| |
| verifyScriptsWithDefaultContents(ltp, base2scripts, parent2default, |
| base2locales); |
| } |
| |
| public void TestParentLocaleRelationships() { |
| // Testing invariant relationships between locales - See |
| // http://unicode.org/cldr/trac/ticket/5758 |
| Matcher langScript = PatternCache.get("^[a-z]{2,3}_[A-Z][a-z]{3}$") |
| .matcher(""); |
| for (String loc : availableLocales) { |
| if (langScript.reset(loc).matches()) { |
| String expectedParent = loc.split("_")[0]; |
| if (!defaultContents.contains(loc)) { |
| expectedParent = "root"; |
| } |
| String actualParent = dataInfo.getExplicitParentLocale(loc); |
| if (actualParent == null) { |
| actualParent = loc.split("_")[0]; |
| } |
| if (!actualParent.equals(expectedParent)) { |
| errln("Unexpected parent locale for locale " + loc |
| + ". Expected: " + expectedParent + " Got: " |
| + actualParent); |
| } |
| |
| if (dataInfo.getExplicitParentLocale(loc) != null |
| && defaultContents.contains(loc)) { |
| errln("Locale " |
| + loc |
| + " can't have an explicit parent AND be a default content locale"); |
| } |
| } |
| } |
| } |
| |
| public void TestParentLocaleInvariants() { |
| // Testing invariant relationships in parent locales - See |
| // http://unicode.org/cldr/trac/ticket/7887 |
| LocaleIDParser lp = new LocaleIDParser(); |
| for (String loc : availableLocales) { |
| String parentLocale = dataInfo.getExplicitParentLocale(loc); |
| if (parentLocale != null) { |
| if (!"root".equals(parentLocale) |
| && !lp.set(loc).getLanguage() |
| .equals(lp.set(parentLocale).getLanguage())) { |
| errln("Parent locale [" + parentLocale + "] for locale [" |
| + loc + "] cannot be a different language code."); |
| } |
| if (!"root".equals(parentLocale) |
| && !lp.set(loc).getScript() |
| .equals(lp.set(parentLocale).getScript())) { |
| errln("Parent locale [" + parentLocale + "] for locale [" |
| + loc + "] cannot be a different script code."); |
| } |
| lp.set(loc); |
| if (lp.getScript().length() == 0 && lp.getRegion().length() == 0) { |
| errln("Base language locale [" + loc + "] cannot have an explicit parent."); |
| } |
| |
| } |
| } |
| } |
| |
| public void TestParentLocalesForCycles() { |
| // Testing for cyclic relationships in parent locales - See |
| // http://unicode.org/cldr/trac/ticket/7887 |
| for (String loc : availableLocales) { |
| String currentLoc = loc; |
| boolean foundError = false; |
| List<String> inheritanceChain = new ArrayList<String>(Arrays.asList(loc)); |
| while (currentLoc != null && !foundError) { |
| currentLoc = LocaleIDParser.getParent(currentLoc); |
| if (inheritanceChain.contains(currentLoc)) { |
| foundError = true; |
| inheritanceChain.add(currentLoc); |
| errln("Inheritance chain for locale [" + loc + "] contains a cyclic relationship. " + inheritanceChain.toString()); |
| } |
| inheritanceChain.add(currentLoc); |
| } |
| } |
| } |
| |
| private void verifyScriptsWithDefaultContents(LanguageTagParser ltp, |
| Relation<String, String> base2scripts, |
| Map<String, String> parent2default, |
| Relation<String, String> base2locales) { |
| Set<String> skip = Builder.with(new HashSet<String>()) |
| .addAll("root", "und") |
| .freeze(); |
| Set<String> languagesWithOneOrLessLocaleScriptInCommon = new HashSet<String>(Arrays.asList("bm", "ha", "ms", "iu", "mn")); |
| // for each base we have to have, |
| // if multiscript, we have default contents for base+script, |
| // base+script+region; |
| // otherwise base+region. |
| for (String base : base2locales.keySet()) { |
| if (skip.contains(base)) { |
| continue; |
| } |
| String defaultContent = parent2default.get(base); |
| // Set<String> likely = base2likely.get(base); |
| // if (likely == null) { |
| // errln("Missing likely subtags for: " + base + " " + |
| // suggestLikelySubtagFor(base)); |
| // } |
| if (defaultContent == null) { |
| errln("Missing default content for: " + base + " " |
| + suggestLikelySubtagFor(base)); |
| continue; |
| } |
| Set<String> scripts = base2scripts.get(base); |
| ltp.set(defaultContent); |
| String script = ltp.getScript(); |
| String region = ltp.getRegion(); |
| if (scripts == null || languagesWithOneOrLessLocaleScriptInCommon.contains(base)) { |
| if (!script.isEmpty()) { |
| errln("Script should be empty in default content for: " |
| + base + "," + defaultContent); |
| } |
| if (region.isEmpty()) { |
| errln("Region must not be empty in default content for: " |
| + base + "," + defaultContent); |
| } |
| } else { |
| if (script.isEmpty()) { |
| errln("Script should not be empty in default content for: " |
| + base + "," + defaultContent); |
| } |
| if (!region.isEmpty()) { |
| errln("Region should be empty in default content for: " |
| + base + "," + defaultContent); |
| } |
| String defaultContent2 = parent2default.get(defaultContent); |
| if (defaultContent2 == null) { |
| errln("Missing default content for: " + defaultContent); |
| continue; |
| } |
| ltp.set(defaultContent2); |
| region = ltp.getRegion(); |
| if (region.isEmpty()) { |
| errln("Region must not be empty in default content for: " |
| + base + "," + defaultContent); |
| } |
| } |
| } |
| } |
| |
| private void verifyLikelySubtagsImplicationsForDefaultContents( |
| LanguageTagParser ltp, Relation<String, String> base2scripts, |
| Map<String, String> parent2default, |
| Map<String, String> likely2Maximized, |
| Map<String, String> exceptionLikelyDc) { |
| // Now check invariants for all LikelySubtags implications for Default |
| // Contents |
| // a) suppose likely max for la_Scrp => la_Scrp_RG |
| // Then default contents la_Scrp => la_Scrp_RG |
| // b) suppose likely max for la_RG => la_Scrp_RG |
| // Then we can draw no conclusions // was default contents la_Scrp => |
| // la_Scrp_RG |
| // c) suppose likely max for la => la_Scrp_RG |
| // Then default contents la => la_Scrp && la_Scrp => la_Scrp_RG |
| // or default contents la => la_RG && ! la_Scrp => la_Scrp_RG |
| |
| TreeSet<String> additionalDefaultContents = new TreeSet<String>(); |
| |
| for (Entry<String, String> entry : likely2Maximized.entrySet()) { |
| String source = entry.getKey(); |
| String likelyMax = entry.getValue(); |
| String sourceLang = ltp.set(source).getLanguage(); |
| if (sourceLang.equals("und") || source.equals("zh_Hani") |
| || source.equals("tl")) { |
| continue; |
| } |
| String sourceScript = ltp.getScript(); |
| String sourceRegion = ltp.getRegion(); |
| |
| String likelyMaxLang = ltp.set(likelyMax).getLanguage(); |
| String likelyMaxScript = ltp.getScript(); |
| String likelyMaxRegion = ltp.getRegion(); |
| |
| String dc = parent2default.get(source); |
| String possibleException = exceptionLikelyDc.get(likelyMax); |
| if (possibleException != null && possibleException.equals(dc)) { |
| continue; |
| } |
| String likelyLangScript = likelyMaxLang + "_" + likelyMaxScript; |
| String dcFromLangScript = parent2default.get(likelyLangScript); |
| |
| boolean consistent = true; |
| String caseNumber = null; |
| if (consistent) { |
| if (!sourceScript.isEmpty()) { |
| caseNumber = "a"; |
| if (dc == null) { |
| if (EXPECT_EQUALITY) { |
| String expected = likelyMax; |
| errln("Default contents null for " + source |
| + ", expected:\t" + expected); |
| additionalDefaultContents.add(expected); |
| } |
| continue; |
| } |
| consistent = likelyMax.equals(dc); |
| } else if (!sourceRegion.isEmpty()) { // a |
| caseNumber = "b"; |
| // consistent = likelyMax.equals(dcFromLangScript); |
| } else { // c |
| caseNumber = "c"; |
| if (dc == null) { |
| if (EXPECT_EQUALITY) { |
| String expected = base2scripts.get(source) == null ? likelyMaxLang |
| + "_" + likelyMaxRegion |
| : likelyMaxLang + "_" + likelyMaxScript; |
| errln("Default contents null for " + source |
| + ", expected:\t" + expected); |
| additionalDefaultContents.add(expected); |
| } |
| continue; |
| } |
| String dcScript = ltp.set(dc).getScript(); |
| consistent = likelyLangScript.equals(dc) |
| && likelyMax.equals(dcFromLangScript) |
| || dcScript.isEmpty() |
| && !likelyMax.equals(dcFromLangScript); |
| // || dcScript.isEmpty() && dcRegion.equals(likelyMaxRegion) |
| // && dcFromLangScript == null; |
| } |
| } |
| if (!consistent) { |
| errln("default contents inconsistent with likely subtag: (" |
| + caseNumber + ")" + "\n\t" + source + " => (ls) " |
| + likelyMax + "\n\t" + source + " => (dc) " + dc |
| + "\n\t" + likelyLangScript + " => (dc) " |
| + dcFromLangScript); |
| } |
| } |
| if (additionalDefaultContents.size() != 0) { |
| errln("Suggested additions to supplementalMetadata/../defaultContent:\n" |
| + CollectionUtilities.join(additionalDefaultContents, " ")); |
| } |
| } |
| |
| private void verifyDefaultContentsImplicationsForLikelySubtags( |
| LanguageTagParser ltp, Map<String, String> parent2default, |
| Map<String, String> likely2Maximized, |
| Map<String, String> exceptionDcLikely) { |
| // Now check invariants for all Default Contents implications for |
| // LikelySubtags |
| // a) suppose default contents la => la_Scrp. |
| // Then the likely contents for la => la_Scrp_* |
| // b) suppose default contents la => la_RG. |
| // Then the likely contents for la => la_*_RG |
| // c) suppose default contents la_Scrp => la_Scrp_RG. |
| // Then the likely contents of la_Scrp => la_Scrp_RG OR likely contents |
| // for la => la_*_* |
| for (Entry<String, String> parentAndDefault : parent2default.entrySet()) { |
| String source = parentAndDefault.getKey(); |
| String dc = parentAndDefault.getValue(); |
| String likelyMax = likely2Maximized.get(source); |
| |
| // skip special exceptions |
| String possibleException = exceptionDcLikely.get(dc); |
| if (possibleException != null |
| && possibleException.equals(likelyMax)) { |
| continue; |
| } |
| |
| String sourceLang = ltp.set(source).getLanguage(); |
| String sourceScript = ltp.getScript(); |
| // there cannot be a sourceRegion |
| |
| String dcScript = ltp.set(dc).getScript(); |
| String dcRegion = ltp.getRegion(); |
| |
| String likelyMaxLang = "", likelyMaxScript = "", likelyMaxRegion = ""; |
| if (likelyMax != null) { |
| likelyMaxLang = ltp.set(likelyMax).getLanguage(); |
| likelyMaxScript = ltp.getScript(); |
| likelyMaxRegion = ltp.getRegion(); |
| } |
| |
| String likelyMax2 = likely2Maximized.get(sourceLang); |
| |
| boolean consistent = true; |
| |
| if (sourceScript.isEmpty()) { // a or b |
| if (!dcScript.isEmpty()) { // a |
| consistent = likelyMaxLang.equals(source) |
| && likelyMaxScript.equals(dcScript); |
| } else { // b |
| consistent = likelyMaxLang.equals(source) |
| && likelyMaxRegion.equals(dcRegion); |
| } |
| } else { // c |
| consistent = dc.equals(likelyMax) || likelyMax2 != null; |
| } |
| if (!consistent) { |
| errln("likely subtag inconsistent with default contents: " |
| + "\n\t" |
| + source |
| + " =>( dc) " |
| + dc |
| + "\n\t" |
| + source |
| + " => (ls) " |
| + likelyMax |
| + (source.equals(sourceLang) ? "" : "\n\t" + sourceLang |
| + " => (ls) " + likelyMax2)); |
| } |
| } |
| } |
| |
| /** |
| * Suggest a likely subtag |
| * |
| * @param base |
| * @return |
| */ |
| static String suggestLikelySubtagFor(String base) { |
| SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(); |
| |
| CLDRLocale loc = CLDRLocale.getInstance(base); |
| |
| if (!loc.getLanguage().equals(base)) { |
| return " (no suggestion- not a simple language locale)"; // no |
| // suggestion |
| // unless |
| // just |
| // a |
| // language |
| // locale. |
| } |
| Set<BasicLanguageData> basicData = sdi.getBasicLanguageData(base); |
| |
| for (BasicLanguageData bld : basicData) { |
| if (bld.getType() == org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type.primary) { |
| Set<String> scripts = bld.getScripts(); |
| Set<String> territories = bld.getTerritories(); |
| |
| if (scripts.size() == 1) { |
| if (territories.size() == 1) { |
| return createSuggestion( |
| loc, |
| CLDRLocale.getInstance(base + "_" |
| + scripts.iterator().next() + "_" |
| + territories.iterator().next())); |
| } |
| } |
| return "(no suggestion - multiple scripts or territories)"; |
| } |
| } |
| return ("(no suggestion- no data)"); |
| } |
| |
| /** |
| * Format and return a suggested likelysubtag |
| */ |
| private static String createSuggestion(CLDRLocale loc, CLDRLocale toLoc) { |
| return " Suggest this to likelySubtags.xml: <likelySubtag from=\"" |
| + loc |
| + "\" to=\"" |
| + toLoc |
| + "\"/>\n" |
| + " <!--{ " |
| + loc.getDisplayName() |
| + "; ?; ? } => { " |
| + loc.getDisplayName() |
| + "; " |
| + toLoc.toULocale().getDisplayScript() |
| + "; " |
| + toLoc.toULocale().getDisplayCountry() + " }-->"; |
| |
| } |
| |
| public void TestDeprecatedTerritoryDataLocaleIds() { |
| HashSet<String> checked = new HashSet<String>(); |
| for (String language : dataInfo |
| .getLanguagesForTerritoriesPopulationData()) { |
| checkLocale(language, false); // checks la_Scrp and la |
| for (String region : dataInfo |
| .getTerritoriesForPopulationData(language)) { |
| if (!checked.contains(region)) { |
| checkValidCode(language + "_" + region, "territory", |
| region, false); |
| checked.add(region); |
| } |
| } |
| } |
| for (String language : dataInfo.getBasicLanguageDataLanguages()) { |
| checkLocale(language, false); // checks la_Scrp and la |
| Set<BasicLanguageData> data = dataInfo |
| .getBasicLanguageData(language); |
| for (BasicLanguageData datum : data) { |
| for (String script : datum.getScripts()) { |
| checkValidCode(language + "_" + script, "script", script, |
| false); |
| checked.add(script); |
| } |
| for (String region : datum.getTerritories()) { |
| checkValidCode(language + "_" + region, "territory", |
| region, false); |
| checked.add(region); |
| } |
| } |
| } |
| |
| } |
| |
| public void TestBasicLanguageDataAgainstScriptMetadata() { |
| // the invariants are: |
| // if there is primary data, the script must be there |
| // otherwise it must be in the secondary |
| main: for (String script : ScriptMetadata.getScripts()) { |
| Info info = ScriptMetadata.getInfo(script); |
| String language = info.likelyLanguage; |
| if (language.equals("und")) { |
| continue; |
| } |
| Map<Type, BasicLanguageData> data = dataInfo |
| .getBasicLanguageDataMap(language); |
| if (data == null) { |
| logln("Warning: ScriptMetadata has " + language + " for " |
| + script + "," + " but " + language |
| + " is missing in language_script.txt"); |
| continue; |
| } |
| for (BasicLanguageData entry : data.values()) { |
| if (entry.getScripts().contains(script)) { |
| continue main; |
| } |
| continue; |
| } |
| logln("Warning: ScriptMetadata has " + language + " for " + script |
| + "," + " but " + language + " doesn't have " + script |
| + " in language_script.txt"); |
| } |
| } |
| |
| public void TestCldrFileConsistency() { |
| boolean haveErrors = false; |
| for (String locale : testInfo.getCldrFactory().getAvailable()) { |
| CLDRFile cldrFileToCheck = testInfo.getCLDRFile(locale, |
| false); |
| int errors = 0; |
| for (String path : cldrFileToCheck) { |
| if (!pathMatcher.reset(path).find()) { |
| continue; |
| } |
| String fullPath = cldrFileToCheck.getFullXPath(path); |
| if (fullPath == null) { |
| // try again, for debugging |
| fullPath = cldrFileToCheck.getFullXPath(path); |
| String value = cldrFileToCheck.getStringValue(path); |
| if (DEBUG) { |
| errln("Invalid full path\t" + locale + ", " + path |
| + ", " + fullPath + ", " + value); |
| } |
| errors++; |
| haveErrors = true; |
| } |
| } |
| if (errors != 0) { |
| errln(locale |
| + (errors != 0 ? "\tinvalid getFullXPath() values:" |
| + errors : "")); |
| } else { |
| logln(locale); |
| } |
| } |
| if (haveErrors && !DEBUG) { |
| errln("Use -DDEBUG to see details"); |
| } |
| } |
| |
| static SupplementalDataInfo info = SupplementalDataInfo.getInstance(); |
| LanguageTagParser ltp = new LanguageTagParser(); |
| |
| // public void TestAliases() { |
| // Factory factory = Factory.make(CldrUtility.MAIN_DIRECTORY, fileMatcher); |
| // Set<String> allLocales = Factory.make(CldrUtility.MAIN_DIRECTORY, |
| // ".*").getAvailable(); |
| // |
| // LanguageTagCanonicalizer languageTagCanonicalizer = new |
| // LanguageTagCanonicalizer(); |
| // |
| // Set<String> defaultContents = info.getDefaultContentLocales(); |
| // |
| // Map<String, String> likelySubtags = info.getLikelySubtags(); |
| // |
| // XPathParts xpp = new XPathParts(); |
| // |
| // // get the top level aliases, and verify that they are consistent with |
| // // maximization |
| // Map<String, String> topLevelAliases = new TreeMap<String, String>(); |
| // Set<String> crossScriptSet = new TreeSet<String>(); |
| // Set<String> aliasPaths = new TreeSet<String>(); |
| // Set<String> locales = factory.getAvailable(); |
| // |
| // // get the languages that need scripts |
| // // TODO broaden to beyond CLDR |
| // Set<String> needScripts = new TreeSet<String>(); |
| // for (String locale : locales) { |
| // String script = ltp.set(locale).getScript(); |
| // if (script.length() != 0) { |
| // needScripts.add(ltp.getLanguage()); |
| // } |
| // } |
| // |
| // logln("Languages that have scripts:\t" + needScripts); |
| // |
| // for (String locale : locales) { |
| // |
| // // get alias locale |
| // String aliasLocale = locale; |
| // String explicitAlias = null; |
| // String aliasPathNew = null; |
| // CLDRFile cldrFileToCheck = factory.make(locale, false); |
| // aliasPaths.clear(); |
| // // examples: |
| // // in: <alias source="id" path="//ldml"/> |
| // // ar_IR: <alias source="az_Arab_IR" path="//ldml"/> |
| // |
| // cldrFileToCheck.getPaths("//ldml/alias", null, aliasPaths); |
| // if (aliasPaths.size() != 0) { |
| // String aliasPath = aliasPaths.iterator().next(); |
| // String fullPath = cldrFileToCheck.getFullXPath(aliasPath); |
| // explicitAlias = aliasLocale = xpp.set(fullPath).getAttributeValue(1, |
| // "source"); |
| // String aliasParent = LocaleIDParser.getParent(aliasLocale); |
| // if (!aliasParent.equals("root")) { |
| // topLevelAliases.put(locale, aliasParent); |
| // } |
| // aliasPathNew = xpp.set(fullPath).getAttributeValue(1, "path"); |
| // if ("//ldml/".equals(aliasPathNew)) { |
| // errln("Bad alias path:\t" + fullPath); |
| // } |
| // } |
| // |
| // checkAliasValues(cldrFileToCheck, allLocales); |
| // |
| // // get canonicalized |
| // String canonicalizedLocale = languageTagCanonicalizer.transform(locale); |
| // if (!locale.equals(canonicalizedLocale)) { |
| // logln("Locale\t" + locale + " => " + canonicalizedLocale); |
| // } |
| // |
| // String base = ltp.set(canonicalizedLocale).getLanguage(); |
| // String script = ltp.getScript(); |
| // if (canonicalizedLocale.equals(base)) { // eg, id, az |
| // continue; |
| // } |
| // |
| // // see if the locale's default script is the same as the base locale's |
| // |
| // String maximized = maximize(likelySubtags, canonicalizedLocale); |
| // if (maximized == null) { |
| // errln("Missing likely subtags for:\t" + locale + " " + |
| // suggestLikelySubtagFor(locale)); |
| // continue; |
| // } |
| // String maximizedScript = ltp.set(maximized).getScript(); |
| // |
| // String minimized = minimize(likelySubtags, canonicalizedLocale); |
| // |
| // String baseMaximized = maximize(likelySubtags, base); |
| // String baseScript = ltp.set(baseMaximized).getScript(); |
| // |
| // if (script.length() != 0 && !script.equals(baseScript)) { |
| // crossScriptSet.add(ltp.set(locale).getLanguageScript()); |
| // } |
| // |
| // // Finally, put together the expected alias for comparison. |
| // // It is the "best" alias, in that the default-content locales are |
| // skipped in favor of their parents |
| // |
| // String expectedAlias = |
| // !baseScript.equals(maximizedScript) ? minimized : |
| // !locale.equals(canonicalizedLocale) ? canonicalizedLocale : |
| // // needScripts.contains(base) ? ltp.getLanguageScript() : |
| // locale; |
| // |
| // if (!equals(aliasLocale, expectedAlias)) { |
| // String aliasMaximized = maximize(likelySubtags, aliasLocale); |
| // String expectedMaximized = maximize(likelySubtags, expectedAlias); |
| // if (!equals(aliasMaximized, expectedMaximized)) { |
| // errln("For locale:\t" + locale |
| // + ",\tbase-script:\t" + baseScript |
| // + ",\texpected alias Locale != actual alias Locale:\t" |
| // + expectedAlias + ", " + aliasLocale); |
| // } else if (explicitAlias == null) { |
| // // skip, we don't care in this case |
| // // but we emit warnings if the other conditions are true. The aliasing |
| // could be simpler. |
| // } else if (equals(expectedAlias, locale)) { |
| // logln("Warning; alias could be omitted. For locale:\t" + locale |
| // + ",\tbase-script:\t" + baseScript |
| // + ",\texpected alias Locale != actual alias Locale:\t" |
| // + expectedAlias + ", " + aliasLocale); |
| // } else { |
| // logln("Warning; alias could be minimized. For locale:\t" + locale |
| // + ",\tbase-script:\t" + baseScript |
| // + ",\texpected alias Locale != actual alias Locale:\t" |
| // + expectedAlias + ", " + aliasLocale); |
| // } |
| // } |
| // } |
| // |
| // // check the LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES value and make sure |
| // it matches what is in the files in main/ |
| // |
| // if (!topLevelAliases.equals(LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES) |
| // && locales.equals(allLocales)) { |
| // String diff = showDifferences(LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES, |
| // topLevelAliases); |
| // if (!diff.isEmpty()) { |
| // errln("LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES ≠ topLevelAliases: " + |
| // diff); |
| // } |
| // StringBuilder result = new StringBuilder( |
| // "Suggest changing LocaleIDParser.TOP_LEVEL_ALIAS_LOCALES to:\n"); |
| // for (Entry<String, String> entry : topLevelAliases.entrySet()) { |
| // result.append("\t.put(\"") |
| // .append(entry.getKey()) |
| // .append("\", \"") |
| // .append(entry.getValue()) |
| // .append("\")\n"); |
| // } |
| // errln(result.toString()); |
| // } else { |
| // logln("Top Level Aliases:\t" + topLevelAliases); |
| // } |
| // |
| // // verify that they are the same as what we would get if we were to |
| // maximize |
| // // all the locales and check against default_contents |
| // |
| // for (String locale : defaultContents) { |
| // CLDRFile cldrFileToCheck = null; |
| // try { |
| // cldrFileToCheck = factory.make(locale, false); |
| // } catch (Exception e) {} |
| // if (cldrFileToCheck == null) { |
| // logln("Present in default contents but has no XML file:\t" + locale); |
| // continue; |
| // } |
| // logln("Locale:\t" + locale); |
| // // verify empty, except for identity elements and alias |
| // for (String path : cldrFileToCheck) { |
| // if (path.contains("/identity/")) { |
| // continue; |
| // } |
| // errln("Default content locale not empty:\t" + locale + ", " + path); |
| // break; |
| // } |
| // } |
| // } |
| |
| Matcher aliasMatcher = PatternCache.get("//ldml.*/alias.*").matcher(""); |
| |
| private void checkAliasValues(CLDRFile cldrFileToCheck, Set<String> locales) { |
| Set<String> aliasPaths = new TreeSet<String>(); |
| Set<String> allAliasPaths = cldrFileToCheck.getPaths("//ldml/", |
| aliasMatcher, aliasPaths); |
| XPathParts xpp = new XPathParts(); |
| for (String aliasPath : allAliasPaths) { |
| if (aliasPath.startsWith("//ldml/alias")) { |
| continue; // we have different tests elsewhere |
| } |
| String fullPath = cldrFileToCheck.getFullXPath(aliasPath); |
| String aliasLocale = xpp.set(fullPath).getAttributeValue(-1, |
| "source"); |
| // just check to make sure that the alias is in the locales |
| if (aliasLocale != null && !aliasLocale.equals("locale")) { |
| if (!locales.contains(aliasLocale)) { |
| errln("Unknown Alias:\t" + aliasLocale + "\t in\t" |
| + fullPath); |
| } |
| } |
| String aliasPathNew = xpp.set(fullPath).getAttributeValue(-1, |
| "path"); |
| // just one check |
| if (".".equals(aliasPathNew)) { |
| errln("Illegal path, must not be .:\t" + aliasLocale |
| + "\t in\t" + fullPath); |
| } |
| |
| } |
| } |
| |
| private String minimize(Map<String, String> likelySubtags, String locale) { |
| String result = GenerateMaximalLocales.minimize(locale, likelySubtags, |
| false); |
| if (result == null) { |
| LanguageTagParser ltp3 = new LanguageTagParser().set(locale); |
| List<String> variants = ltp3.getVariants(); |
| Map<String, String> extensions = ltp3.getExtensions(); |
| Set<String> emptySet = Collections.emptySet(); |
| ltp3.setVariants(emptySet); |
| Map<String, String> emptyMap = Collections.emptyMap(); |
| ltp3.setExtensions(emptyMap); |
| String newLocale = ltp3.toString(); |
| result = GenerateMaximalLocales.minimize(newLocale, likelySubtags, |
| false); |
| if (result != null) { |
| ltp3.set(result); |
| ltp3.setVariants(variants); |
| ltp3.setExtensions(extensions); |
| result = ltp3.toString(); |
| } |
| } |
| return result; |
| } |
| |
| private String maximize(Map<String, String> likelySubtags, String locale) { |
| String result = GenerateMaximalLocales.maximize(locale, likelySubtags); |
| if (result == null) { |
| LanguageTagParser ltp3 = new LanguageTagParser().set(locale); |
| List<String> variants = ltp3.getVariants(); |
| Map<String, String> extensions = ltp3.getExtensions(); |
| Set<String> emptySet = Collections.emptySet(); |
| ltp3.setVariants(emptySet); |
| Map<String, String> emptyMap = Collections.emptyMap(); |
| ltp3.setExtensions(emptyMap); |
| String newLocale = ltp3.toString(); |
| result = GenerateMaximalLocales.maximize(newLocale, likelySubtags); |
| if (result != null) { |
| ltp3.set(result); |
| ltp3.setVariants(variants); |
| ltp3.setExtensions(extensions); |
| result = ltp3.toString(); |
| } |
| } |
| return result; |
| } |
| |
| // TODO move this into central utilities |
| public static boolean equals(CharSequence string, int codePoint) { |
| if (string == null) { |
| return false; |
| } |
| switch (string.length()) { |
| case 1: |
| return codePoint == string.charAt(0); |
| case 2: |
| return codePoint >= 0x10000 |
| && codePoint == Character.codePointAt(string, 0); |
| default: |
| return false; |
| } |
| } |
| |
| // TODO move this into central utilities |
| |
| private static final StandardCodes STANDARD_CODES = testInfo.getStandardCodes(); |
| private static final Map<String, Map<String, R2<List<String>, String>>> DEPRECATED_INFO = dataInfo |
| .getLocaleAliasInfo(); |
| |
| private void checkLocale(String localeID, boolean allowDeprecated) { |
| // verify that the localeID is valid |
| LanguageTagParser ltp = new LanguageTagParser().set(localeID); |
| String language = ltp.getLanguage(); |
| String script = ltp.getScript(); |
| String region = ltp.getRegion(); |
| // TODO check variants, extensions also. |
| checkValidCode(localeID, "language", language, allowDeprecated); |
| checkValidCode(localeID, "script", script, allowDeprecated); |
| checkValidCode(localeID, "territory", region, allowDeprecated); |
| } |
| |
| private void checkValidCode(String localeID, String subtagType, |
| String subtag, boolean allowDeprecated) { |
| if (subtagType.equals("language")) { |
| if (subtag.equals("und")) { |
| return; |
| } |
| } else { |
| if (subtag.isEmpty()) { |
| return; |
| } |
| } |
| if (!STANDARD_CODES.getAvailableCodes(subtagType).contains(subtag)) { |
| errln("Locale " + localeID + " contains illegal " |
| + showCode(subtagType, subtag)); |
| } else if (!allowDeprecated) { |
| // "language" -> "sh" -> <{"sr_Latn"}, reason> |
| R2<List<String>, String> deprecatedInfo = DEPRECATED_INFO.get( |
| subtagType).get(subtag); |
| if (deprecatedInfo != null) { |
| errln("Locale " + localeID + " contains deprecated " |
| + showCode(subtagType, subtag) + " " |
| + deprecatedInfo.get1() + "; suggest " |
| + showName(deprecatedInfo.get0(), subtagType)); |
| } |
| } |
| } |
| |
| private String showName(List<String> deprecatedInfo, String subtagType) { |
| StringBuilder result = new StringBuilder(); |
| for (String s : deprecatedInfo) { |
| result.append(showName(subtagType, s)).append(" "); |
| } |
| return result.toString(); |
| } |
| |
| private String showCode(String subtagType, String subtag) { |
| return subtagType + " code: " + showName(subtagType, subtag); |
| } |
| |
| private String showName(String subtagType, String subtag) { |
| return subtag + " (" + getName(subtagType, subtag) + ")"; |
| } |
| |
| private String getName(String subtagType, String subtag) { |
| Map<String, String> data = STANDARD_CODES.getLangData(subtagType, |
| subtag); |
| if (data == null) { |
| return "<no name>"; |
| } |
| return data.get("Description"); |
| } |
| |
| // TODO move this into central utilities |
| public static boolean equals(int codePoint, CharSequence string) { |
| return equals(string, codePoint); |
| } |
| |
| // TODO move this into central utilities |
| public static boolean equals(Object a, Object b) { |
| return a == b ? true : a == null || b == null ? false : a.equals(b); |
| } |
| |
| // TODO move this into central utilities |
| private <K, V> String showDifferences(Map<K, V> a, Map<K, V> b) { |
| StringBuilder result = new StringBuilder(); |
| Set<K> keys = new LinkedHashSet<K>(); |
| keys.addAll(a.keySet()); |
| keys.addAll(b.keySet()); |
| for (K key : keys) { |
| if (!a.containsKey(key)) { |
| result.append(key).append("→‹").append(a.get(key)) |
| .append("›,∅; "); |
| } else if (!b.containsKey(key)) { |
| result.append(key).append("→∅,‹").append(b.get(key)) |
| .append("›; "); |
| } else { |
| V aKey = a.get(key); |
| V bKey = b.get(key); |
| if (!equals(aKey, bKey)) { |
| result.append(key).append("→‹").append(a.get(key)) |
| .append("›,‹").append(b.get(key)).append("›; "); |
| } |
| } |
| } |
| return result.toString(); |
| } |
| |
| public void TestLanguageTagParser() { |
| LanguageTagParser ltp = new LanguageTagParser(); |
| ltp.set("en-Cyrl-US"); |
| assertEquals(null, "en", ltp.getLanguage()); |
| assertEquals(null, "en_Cyrl", ltp.getLanguageScript()); |
| assertEquals(null, "Cyrl", ltp.getScript()); |
| assertEquals(null, "US", ltp.getRegion()); |
| try { |
| ltp.set("$"); |
| assertFalse("expected exception", true); |
| } catch (Exception e) { |
| logln(e.getMessage()); |
| } |
| } |
| } |