| package org.unicode.cldr.unittest; |
| |
| import com.google.common.base.Joiner; |
| import com.google.common.collect.ImmutableListMultimap; |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.ImmutableSet; |
| import com.google.common.collect.LinkedHashMultimap; |
| import com.google.common.collect.Multimap; |
| import com.google.common.collect.Sets; |
| import com.google.common.collect.TreeMultimap; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.impl.Row; |
| import com.ibm.icu.impl.Row.R2; |
| import com.ibm.icu.impl.Row.R4; |
| import com.ibm.icu.text.CompactDecimalFormat; |
| import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; |
| import com.ibm.icu.text.Transform; |
| import com.ibm.icu.util.Calendar; |
| import com.ibm.icu.util.Output; |
| import com.ibm.icu.util.ULocale; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Date; |
| import java.util.EnumSet; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.regex.Pattern; |
| import org.unicode.cldr.draft.ScriptMetadata; |
| import org.unicode.cldr.test.CoverageLevel2; |
| import org.unicode.cldr.tool.LikelySubtags; |
| import org.unicode.cldr.util.CLDRConfig; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRLocale; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.ChainedMap; |
| import org.unicode.cldr.util.ChainedMap.M4; |
| import org.unicode.cldr.util.Counter2; |
| import org.unicode.cldr.util.DtdData; |
| import org.unicode.cldr.util.DtdData.Element; |
| import org.unicode.cldr.util.DtdType; |
| import org.unicode.cldr.util.GrammarInfo; |
| import org.unicode.cldr.util.LanguageTagParser; |
| import org.unicode.cldr.util.Level; |
| import org.unicode.cldr.util.LocaleNames; |
| import org.unicode.cldr.util.LogicalGrouping; |
| import org.unicode.cldr.util.LogicalGrouping.PathType; |
| import org.unicode.cldr.util.Organization; |
| import org.unicode.cldr.util.PathHeader; |
| import org.unicode.cldr.util.PathHeader.Factory; |
| import org.unicode.cldr.util.PathStarrer; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.RegexLookup; |
| import org.unicode.cldr.util.RegexLookup.Finder; |
| import org.unicode.cldr.util.StandardCodes; |
| import org.unicode.cldr.util.SupplementalDataInfo; |
| import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; |
| import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; |
| import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; |
| import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; |
| import org.unicode.cldr.util.VoteResolver; |
| import org.unicode.cldr.util.XPathParts; |
| |
| public class TestCoverageLevel extends TestFmwkPlus { |
| |
| private static final boolean SHOW_LSR_DATA = false; |
| |
| private static CLDRConfig testInfo = CLDRConfig.getInstance(); |
| private static final StandardCodes STANDARD_CODES = StandardCodes.make(); |
| private static final CLDRFile ENGLISH = testInfo.getEnglish(); |
| private static final SupplementalDataInfo SDI = testInfo.getSupplementalDataInfo(); |
| private static final String TC_VOTES = |
| Integer.toString(VoteResolver.Level.tc.getVotes(Organization.apple)); |
| |
| public static void main(String[] args) { |
| new TestCoverageLevel().run(args); |
| } |
| |
| public void testSpecificPaths() { |
| String[][] rows = { |
| { |
| "//ldml/characters/parseLenients[@scope=\"number\"][@level=\"lenient\"]/parseLenient[@sample=\",\"]", |
| "moderate", |
| TC_VOTES |
| } |
| }; |
| doSpecificPathTest("fr", rows); |
| } |
| |
| public void testSpecificPathsPersCal() { |
| String[][] rows = { |
| { |
| "//ldml/dates/calendars/calendar[@type=\"persian\"]/eras/eraAbbr/era[@type=\"0\"]", |
| "moderate", |
| "4" |
| }, |
| { |
| "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", |
| "moderate", |
| "4" |
| } |
| }; |
| doSpecificPathTest("ckb_IR", rows); |
| } |
| |
| public void testSpecificPathsDeFormatLength() { |
| String[][] rows = { |
| /* For German (de) these should be high-bar (20) per https://unicode-org.atlassian.net/browse/CLDR-14988 */ |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", |
| "modern", |
| TC_VOTES |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", |
| "modern", |
| TC_VOTES |
| }, |
| /* not high-bar (20): wrong number of zeroes, or count many*/ |
| { |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100\"][@count=\"other\"]", |
| "comprehensive", |
| "8" |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000000\"][@count=\"other\"]", |
| "modern", |
| "8" |
| }, |
| { |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"many\"]", |
| "modern", |
| "8" |
| }, |
| }; |
| doSpecificPathTest("de", rows); |
| } |
| |
| private void doSpecificPathTest(String localeStr, String[][] rows) { |
| Factory phf = PathHeader.getFactory(ENGLISH); |
| CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SDI, localeStr); |
| CLDRLocale loc = CLDRLocale.getInstance(localeStr); |
| for (String[] row : rows) { |
| String path = row[0]; |
| Level expectedLevel = Level.fromString(row[1]); |
| Level level = coverageLevel.getLevel(path); |
| assertEquals("Level for " + path, expectedLevel, level); |
| |
| int expectedRequiredVotes = Integer.parseInt(row[2]); |
| int votes = SDI.getRequiredVotes(loc, phf.fromPath(path)); |
| assertEquals("Votes for " + path, expectedRequiredVotes, votes); |
| } |
| } |
| |
| public void oldTestInvariantPaths() { |
| org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); |
| PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); |
| SupplementalDataInfo sdi = |
| SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); |
| |
| Set<String> allPaths = new HashSet<>(); |
| M4<String, String, Level, Boolean> starredToLocalesToLevels = |
| ChainedMap.of( |
| new TreeMap<String, Object>(), |
| new TreeMap<String, Object>(), |
| new TreeMap<Level, Object>(), |
| Boolean.class); |
| |
| for (String locale : factory.getAvailableLanguages()) { |
| logln(locale); |
| CLDRFile cldrFileToCheck = factory.make(locale, true); |
| for (String path : cldrFileToCheck.fullIterable()) { |
| allPaths.add(path); |
| String starred = pathStarrer.set(path); |
| Level level = sdi.getCoverageLevel(path, locale); |
| starredToLocalesToLevels.put(starred, locale, level, true); |
| } |
| } |
| |
| Set<Level> levelsFound = EnumSet.noneOf(Level.class); |
| Set<String> localesWithUniqueLevels = new TreeSet<>(); |
| for (Entry<String, Map<String, Map<Level, Boolean>>> entry : starredToLocalesToLevels) { |
| String starred = entry.getKey(); |
| Map<String, Map<Level, Boolean>> localesToLevels = entry.getValue(); |
| int maxLevelCount = 0; |
| double localeCount = 0; |
| levelsFound.clear(); |
| localesWithUniqueLevels.clear(); |
| |
| for (Entry<String, Map<Level, Boolean>> entry2 : localesToLevels.entrySet()) { |
| String locale = entry2.getKey(); |
| Map<Level, Boolean> levels = entry2.getValue(); |
| levelsFound.addAll(levels.keySet()); |
| if (levels.size() > maxLevelCount) { |
| maxLevelCount = levels.size(); |
| } |
| if (levels.size() == 1) { |
| localesWithUniqueLevels.add(locale); |
| } |
| localeCount++; |
| } |
| System.out.println( |
| maxLevelCount |
| + "\t" |
| + localesWithUniqueLevels.size() / localeCount |
| + "\t" |
| + starred |
| + "\t" |
| + Joiner.on(", ").join(levelsFound) |
| + "\t" |
| + (maxLevelCount == 1 |
| ? "all" |
| : localesWithUniqueLevels.size() == 0 |
| ? "none" |
| : Joiner.on(", ").join(localesWithUniqueLevels))); |
| } |
| } |
| |
| enum LanguageStatus { |
| Lit100M("P1"), |
| Lit10MandOfficial("P2"), |
| Lit1MandOneThird("P3"); |
| final String name; |
| |
| LanguageStatus(String name) { |
| this.name = name; |
| } |
| } |
| |
| static Relation<String, LanguageStatus> languageStatus = |
| Relation.of(new HashMap<String, Set<LanguageStatus>>(), TreeSet.class); |
| static Counter2<String> languageLiteratePopulation = new Counter2<>(); |
| static Map<String, Date> currencyToLast = new HashMap<>(); |
| static Set<String> officialSomewhere = new HashSet<>(); |
| |
| static { |
| Counter2<String> territoryLiteratePopulation = new Counter2<>(); |
| LanguageTagParser parser = new LanguageTagParser(); |
| // cf |
| // http://cldr.unicode.org/development/development-process/design-proposals/languages-to-show-for-translation |
| for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { |
| String base = parser.set(language).getLanguage(); |
| boolean isOfficial = false; |
| double languageLiterate = 0; |
| for (String territory : SDI.getTerritoriesForPopulationData(language)) { |
| PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); |
| OfficialStatus officialStatus = pop.getOfficialStatus(); |
| if (officialStatus.compareTo(OfficialStatus.de_facto_official) >= 0) { |
| isOfficial = true; |
| languageStatus.put(base + "_" + territory, LanguageStatus.Lit10MandOfficial); |
| officialSomewhere.add(base); |
| } |
| double litPop = pop.getLiteratePopulation(); |
| languageLiterate += litPop; |
| territoryLiteratePopulation.add(territory, litPop); |
| languageLiteratePopulation.add(base + "_" + territory, litPop); |
| } |
| languageLiteratePopulation.add(base, languageLiterate); |
| if (languageLiterate > 100000000) { |
| languageStatus.put(base, LanguageStatus.Lit100M); |
| } |
| if (languageLiterate > 10000000 && isOfficial) { |
| languageStatus.put(base, LanguageStatus.Lit10MandOfficial); |
| } |
| } |
| for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { |
| if (languageLiteratePopulation.getCount(language) < 1000000) { |
| continue; |
| } |
| String base = parser.set(language).getLanguage(); |
| for (String territory : SDI.getTerritoriesForPopulationData(language)) { |
| PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); |
| double litPop = pop.getLiteratePopulation(); |
| double total = territoryLiteratePopulation.getCount(territory); |
| if (litPop > total / 3) { |
| languageStatus.put(base, LanguageStatus.Lit1MandOneThird); |
| } |
| } |
| } |
| for (String territory : STANDARD_CODES.getAvailableCodes("territory")) { |
| Set<CurrencyDateInfo> cdateInfo = SDI.getCurrencyDateInfo(territory); |
| if (cdateInfo == null) { |
| continue; |
| } |
| for (CurrencyDateInfo dateInfo : cdateInfo) { |
| String currency = dateInfo.getCurrency(); |
| Date last = dateInfo.getEnd(); |
| Date old = currencyToLast.get(currency); |
| if (old == null || old.compareTo(last) < 0) { |
| currencyToLast.put(currency, last); |
| } |
| } |
| } |
| } |
| |
| static CompactDecimalFormat cdf = |
| CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactStyle.SHORT); |
| |
| static String isBigLanguage(String lang) { |
| Set<LanguageStatus> status = languageStatus.get(lang); |
| Double size = languageLiteratePopulation.getCount(lang); |
| String sizeString = size == null ? "?" : cdf.format(size); |
| String off = officialSomewhere.contains(lang) ? "o" : ""; |
| if (status == null || status.isEmpty()) { |
| return "P4-" + sizeString + off; |
| } |
| return status.iterator().next().name + "-" + sizeString + off; |
| } |
| |
| static final Date NOW = new Date(); |
| |
| private static final boolean DEBUG = false; |
| |
| static class TypeName implements Transform<String, String> { |
| private final int field; |
| private final Map<String, R2<List<String>, String>> dep; |
| |
| public TypeName(int field) { |
| this.field = field; |
| switch (field) { |
| case CLDRFile.LANGUAGE_NAME: |
| dep = SDI.getLocaleAliasInfo().get("language"); |
| break; |
| case CLDRFile.TERRITORY_NAME: |
| dep = SDI.getLocaleAliasInfo().get("territory"); |
| break; |
| case CLDRFile.SCRIPT_NAME: |
| dep = SDI.getLocaleAliasInfo().get("script"); |
| break; |
| default: |
| dep = null; |
| break; |
| } |
| } |
| |
| @Override |
| public String transform(String source) { |
| String result = ENGLISH.getName(field, source); |
| String extra = ""; |
| if (field == CLDRFile.LANGUAGE_NAME) { |
| String lang = isBigLanguage(source); |
| extra = lang == null ? "X" : lang; |
| } else if (field == CLDRFile.CURRENCY_NAME) { |
| Date last = currencyToLast.get(source); |
| extra = last == null ? "?" : last.compareTo(NOW) < 0 ? "old" : ""; |
| } |
| R2<List<String>, String> depValue = dep == null ? null : dep.get(source); |
| if (depValue != null) { |
| extra += extra.isEmpty() ? "" : "-"; |
| extra += depValue.get1(); |
| } |
| return result + (extra.isEmpty() ? "" : "\t" + extra); |
| } |
| } |
| |
| RegexLookup<Level> exceptions = |
| RegexLookup.of( |
| null, |
| new Transform<String, Level>() { |
| @Override |
| public Level transform(String source) { |
| return Level.fromLevel(Integer.parseInt(source)); |
| } |
| }, |
| null) |
| .loadFromFile(TestCoverageLevel.class, "TestCoverageLevel.txt"); |
| |
| public void TestExceptions() { |
| for (Map.Entry<Finder, Level> x : exceptions) { |
| logln(x.getKey().toString() + " => " + x.getValue()); |
| } |
| } |
| |
| public void TestNarrowCurrencies() { |
| String path = "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol[@alt=\"narrow\"]"; |
| String value = ENGLISH.getStringValue(path); |
| assertEquals("Narrow $", "$", value); |
| SupplementalDataInfo sdi = |
| SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); |
| Level level = sdi.getCoverageLevel(path, "en"); |
| assertEquals("Narrow $", Level.MODERATE, level); |
| } |
| |
| public void TestA() { |
| String path = "//ldml/characterLabels/characterLabel[@type=\"other\"]"; |
| SupplementalDataInfo sdi = |
| SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); |
| Level level = sdi.getCoverageLevel(path, "en"); |
| assertEquals("Quick Check for any attribute", Level.MODERN, level); |
| } |
| |
| public void TestCoverageCompleteness() { |
| /** |
| * Check that English paths are, except for known cases, at least modern coverage. We filter |
| * out the things we know about and have determined are OK to be in comprehensive. If we add |
| * a path that doesn't get its coverage set, this test should complain about it. |
| */ |
| final ImmutableSet<String> inactiveMetazones = |
| ImmutableSet.of( |
| "Greenland", // TODO: New metazone added for tz2023d update, |
| // In CLDR 45, we don't want to include this one in modern coverage because |
| // we don't open ST for translating display names for this metazone. |
| // After 45, we will include "Greenland" in modern coverage. |
| "Bering", |
| "Dominican", |
| "Shevchenko", |
| "Alaska_Hawaii", |
| "Yerevan", |
| "Africa_FarWestern", |
| "British", |
| "Sverdlovsk", |
| "Karachi", |
| "Malaya", |
| "Oral", |
| "Frunze", |
| "Dutch_Guiana", |
| "Irish", |
| "Uralsk", |
| "Tashkent", |
| "Kwajalein", |
| "Ashkhabad", |
| "Kizilorda", |
| "Kuybyshev", |
| "Baku", |
| "Dushanbe", |
| "Goose_Bay", |
| "Liberia", |
| "Samarkand", |
| "Tbilisi", |
| "Borneo", |
| "Greenland_Central", |
| "Dacca", |
| "Aktyubinsk", |
| "Turkey", |
| "Urumqi", |
| "Acre", |
| "Almaty", |
| "Anadyr", |
| "Aqtau", |
| "Aqtobe", |
| "Kamchatka", |
| "Macau", |
| "Qyzylorda", |
| "Samara", |
| "Casey", |
| "Guam", |
| "Lanka", |
| "North_Mariana"); |
| |
| final Pattern calendar100 = |
| PatternCache.get("(coptic|ethiopic-amete-alem|islamic-(rgsa|tbla|umalqura))"); |
| |
| // Warning: shorter strings must come AFTER longer ones. Can process with MinimizeRegex to |
| // reorder |
| final Pattern language100 = |
| PatternCache.get( |
| "(" // start |
| + "nds_NL|fa_AF|ro_MD|sr_ME|sw_CD" |
| // Length 4 |
| + "|root" |
| // Length 3 |
| + "|ace|ach|ada|ady|aeb|afh|agq|ain|akk|akz|ale|aln|alt|ang|ann|anp|apc|arc|arn|aro|arp|arq|ars|arw|ary|arz|asa|ase|atj|avk|awa" |
| + "|bal|ban|bar|bax|bbc|bbj|bej|bem|bew|bez|bfd|bfq|bgc|bgn|bho|bik|bin|bjn|bkm|bla|blo|blt|bpy|bqi|bra|brh|bss|bua|bug|bum|byn|byv" |
| + "|cad|car|cay|cch|ccp|cgg|chb|chg|chk|chm|chn|cho|chp|chy|cic|ckb|clc|cop|cps|crg|crh|crj|crk|crl|crm|crr|crs|csb|csw|cwd" |
| + "|dak|dar|dav|del|den|dgr|din|dje|doi|dtp|dua|dum|dyo|dyu|dzg" |
| + "|ebu|efi|egl|egy|eka|elx|enm|esu|ext|fan|fat|fit|fon|frc|frm|fro|frp|frr|frs|fur" |
| + "|gaa|gag|gan|gay|gba|gbz|gez|gil|glk|gmh|goh|gom|gon|gor|got|grb|grc|gsw|guc|gur|guz|gwi" |
| + "|hai|hak|haw|hax|hdn|hif|hil|hit|hnj|hsn|hup|hur|iba|ilo|inh|izh|jam|jbo|jgo|jmc|jpr|jrb|jut" |
| + "|kaa|kab|kac|kaj|kam|kaw|kbd|kbl|kcg|kde|ken|kfo|kgp|kha|kho|khq|khw|kiu|kln|kmb|koi|kos|kpe|krc|kri|krj|krl|kru|ksb|ksf|ksh|kum|kut|kwk|kxv" |
| + "|lad|lag|lah|lam|lez|lfn|lij|lil|liv|lkt|lmo|lol|lou|loz|lrc|ltg|lua|lui|lun|luo|lus|luy|lzh|lzz" |
| + "|mad|maf|mag|mai|mak|man|mas|mde|mdf|mdr|men|mer|mfe|mga|mgh|mgo|mic|min|mnc|mni|moe|moh|mos|mrj|mua|mus|mwl|mwr|mwv|mye|myv|mzn" |
| + "|nan|nap|naq|nds|new|nia|niu|njo|nmg|nog|non|nov|nqo|nso|nus|nwc|nym|nyn|nyo|nzi|oka|osa|ota" |
| + "|pag|pal|pam|pap|pau|pcd|pcm|pdc|pdt|peo|pfl|phn|pms|pnt|pon|pqm|prg|pro|quc|qug|raj|rap|rar|rgn|rif|rof|rom|rtm|rue|rug|rup|rwk" |
| + "|sad|sam|saq|sas|sat|saz|sba|sbp|sdc|sdh|see|seh|sei|sel|ses|sga|sgs|shi|shn|shu|sid|skr|slh|sli|sly|sma|smj|smn|sms|snk|sog|srn|srr|stq|str|suk|sus|sux|swb|syc|syr|szl" |
| + "|tce|tcy|tem|teo|ter|tet|tgx|tht|tig|tiv|tkl|tkr|tlh|tli|tly|tmh|tog|tok|tpi|tru|trv|trw|tsd|tsi|ttm|ttt|tum|tvl|tzm" |
| + "|udm|uga|umb|vai|vec|vep|vls|vmf|vmw|vot|vro|vun|wae|wal|war|was|wbp|wuu|xal|xmf|xnr|xog|yao|yap|yrl|zap|zbl|zea|zen|zgh|zun|zza" |
| + "|ike|ojg|ssy|pis|twq" |
| // Length 2 |
| + "|aa|ab|ae|ak|an|av|ay|ba|bi|bm|bo|ce|ch|cr|cu|cv|dv|dz|ee|eo|fj|gn|gv|ho|hz|ie|ii|ik|io|iu|kg|ki|kj|kl|kv|kw|lg|li|ln|lu" |
| + "|mg|mh|na|nb|nd|ng|no|nr|nv|oc|oj|om|os|pi|rn|rw|sc|se|sg|sh|sn|ss|tl|tn|ts|tw|ty|ve|vo|wa|yi|za" |
| // end |
| + ")"); |
| |
| /** |
| * Recommended scripts that are allowed for comprehensive coverage. Not-recommended scripts |
| * (according to ScriptMetadata) are filtered out automatically. |
| */ |
| final Pattern script100 = PatternCache.get("(Zinh)"); |
| |
| final Pattern keys100 = |
| PatternCache.get( |
| "(col(Alternate|Backwards|CaseFirst|CaseLevel|HiraganaQuaternary|" |
| + "Normalization|Numeric|Reorder|Strength)|kv|sd|mu|timezone|va|variableTop|x|d0|h0|i0|k0|m0|s0)"); |
| |
| final Pattern numberingSystem100 = |
| PatternCache.get( |
| "(" |
| + "finance|native|traditional|adlm|ahom|bali|bhks|brah|cakm|cham|cyrl|diak|" |
| + "gong|gonm|hanidays|hmng|hmnp|java|jpanyear|kali|kawi|lana(tham)?|lepc|limb|" |
| + "math(bold|dbl|mono|san[bs])|modi|mong|mroo|mtei|mymr(shan|tlng)|" |
| + "nagm|newa|nkoo|olck|osma|rohg|saur|segment|shrd|sin[dh]|sora|sund|" |
| + "takr|talu|tirh|tnsa|vaii|wara|wcho)"); |
| |
| final Pattern collation100 = |
| PatternCache.get( |
| "(" |
| + "big5han|compat|dictionary|emoji|eor|gb2312han|phonebook|phonetic|pinyin|searchjl|stroke|traditional|unihan|zhuyin)"); |
| |
| SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); |
| CLDRFile english = testInfo.getEnglish(); |
| |
| // Calculate date of the upcoming CLDR release, minus 5 years (deprecation policy) |
| final int versionNumber = Integer.valueOf((CLDRFile.GEN_VERSION).split("\\.")[0]); |
| Calendar cal = Calendar.getInstance(); |
| cal.set(versionNumber / 2 + versionNumber % 2 + 2001, 8 - (versionNumber % 2) * 6, 15); |
| Date cldrReleaseMinus5Years = cal.getTime(); |
| Set<String> modernCurrencies = |
| SDI.getCurrentCurrencies(SDI.getCurrencyTerritories(), cldrReleaseMinus5Years, NOW); |
| |
| Set<String> needsNumberSystem = new HashSet<>(); |
| DtdData dtdData = DtdData.getInstance(DtdType.ldml); |
| Element numbersElement = dtdData.getElementFromName().get("numbers"); |
| for (Element childOfNumbers : numbersElement.getChildren().keySet()) { |
| if (childOfNumbers.containsAttribute("numberSystem")) { |
| needsNumberSystem.add(childOfNumbers.name); |
| } |
| } |
| |
| for (String path : english.fullIterable()) { |
| logln("Testing path => " + path); |
| XPathParts xpp = XPathParts.getFrozenInstance(path); |
| if (path.endsWith("/alias") |
| || path.matches( |
| "//ldml/(identity|contextTransforms|layout|localeDisplayNames/transformNames)/.*")) { |
| continue; |
| } |
| if (sdi.isDeprecated(DtdType.ldml, path)) { |
| continue; |
| } |
| Level lvl = sdi.getCoverageLevel(path, "en"); |
| if (lvl == Level.UNDETERMINED) { |
| errln("Undetermined coverage value for path => " + path); |
| continue; |
| } |
| if (lvl.compareTo(Level.MODERN) <= 0) { |
| logln("Level OK [" + lvl.toString() + "] for path => " + path); |
| continue; |
| } |
| |
| if (path.startsWith("//ldml/numbers")) { |
| // Paths in numbering systems outside "latn" are specifically excluded. |
| String numberingSystem = xpp.findFirstAttributeValue("numberSystem"); |
| if (numberingSystem != null && !numberingSystem.equals("latn")) { |
| continue; |
| } |
| if (xpp.containsElement("currencySpacing") || xpp.containsElement("list")) { |
| continue; |
| } |
| if (xpp.containsElement("currency")) { |
| String currencyType = xpp.findAttributeValue("currency", "type"); |
| if (!modernCurrencies.contains(currencyType)) { |
| continue; // old currency or not tender, so we don't care |
| } |
| } |
| // Currently not collecting timeSeparator data in SurveyTool |
| if (xpp.containsElement("timeSeparator")) { |
| continue; |
| } |
| // Other paths in numbers without a numbering system are deprecated. |
| // if (numberingSystem == null) { |
| // continue; |
| // } |
| if (needsNumberSystem.contains(xpp.getElement(2))) { |
| continue; |
| } |
| } else if (xpp.containsElement("zone")) { |
| String zoneType = xpp.findAttributeValue("zone", "type"); |
| if ((zoneType.startsWith("Etc/GMT") || zoneType.equals("Etc/UTC")) |
| && path.endsWith("exemplarCity")) { |
| continue; |
| } |
| // We don't survey for short timezone names or at least some alts |
| if (path.contains("/short/") || path.contains("[@alt=\"formal\"]")) { |
| continue; |
| } |
| } else if (xpp.containsElement("metazone")) { |
| // We don't survey for short metazone names |
| if (path.contains("/short/")) { |
| continue; |
| } |
| String mzName = xpp.findAttributeValue("metazone", "type"); |
| // Skip inactive metazones. |
| if (inactiveMetazones.contains(mzName)) { |
| continue; |
| } |
| // Skip paths for daylight or generic mz strings where |
| // the mz doesn't use DST. |
| if ((path.endsWith("daylight") || path.endsWith("generic")) |
| && !LogicalGrouping.metazonesDSTSet.contains(mzName)) { |
| continue; |
| } |
| } else if (path.startsWith("//ldml/dates/fields")) { |
| if ("variant".equals(xpp.findAttributeValue("displayName", "alt"))) { |
| continue; |
| } |
| // relative day/week/month, etc. short or narrow |
| if (xpp.getElement(-1).equals("relative")) { |
| String fieldType = xpp.findAttributeValue("field", "type"); |
| if (fieldType.matches(".*-(short|narrow)|quarter")) { |
| continue; |
| } |
| } |
| } else if (xpp.containsElement("language")) { |
| // Comprehensive coverage is OK for some languages. |
| String languageType = xpp.findAttributeValue("language", "type"); |
| if (language100.matcher(languageType).matches()) { |
| continue; |
| } |
| } else if (xpp.containsElement("script")) { |
| // Skip user defined script codes and alt=short |
| String scriptType = xpp.findAttributeValue("script", "type"); |
| if (scriptType.startsWith("Q") |
| || "short".equals(xpp.findAttributeValue("script", "alt"))) { |
| continue; |
| } |
| ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(scriptType); |
| if (scriptInfo == null |
| || scriptInfo.idUsage != ScriptMetadata.IdUsage.RECOMMENDED) { |
| continue; |
| } |
| if (script100.matcher(scriptType).matches()) { |
| continue; |
| } |
| } else if (xpp.containsElement("territory")) { |
| String territoryType = xpp.findAttributeValue("territory", "type"); |
| if (territoryType.equals("CQ")) { // Exceptionally reserved by ISO-3166 |
| continue; |
| } |
| } else if (xpp.containsElement("key")) { |
| // Comprehensive coverage is OK for some key/types. |
| String keyType = xpp.findAttributeValue("key", "type"); |
| if (keys100.matcher(keyType).matches()) { |
| continue; |
| } |
| } else if (xpp.containsElement("type")) { |
| if ("short".equals(xpp.findAttributeValue("type", "alt"))) { |
| continue; |
| } |
| // Comprehensive coverage is OK for some key/types. |
| String keyType = xpp.findAttributeValue("type", "key"); |
| if (keys100.matcher(keyType).matches()) { |
| continue; |
| } |
| if (keyType.equals("numbers")) { |
| String ns = xpp.findAttributeValue("type", "type"); |
| if (numberingSystem100.matcher(ns).matches()) { |
| continue; |
| } |
| } |
| if (keyType.equals("collation")) { |
| String ct = xpp.findAttributeValue("type", "type"); |
| if (collation100.matcher(ct).matches()) { |
| continue; |
| } |
| } |
| if (keyType.equals("calendar")) { |
| String ct = xpp.findAttributeValue("type", "type"); |
| if (calendar100.matcher(ct).matches()) { |
| continue; |
| } |
| } |
| } else if (xpp.containsElement("variant")) { |
| // All variant names are comprehensive coverage |
| continue; |
| } else if (path.startsWith("//ldml/dates/calendars")) { |
| String calType = xpp.findAttributeValue("calendar", "type"); |
| if (!calType.matches("(gregorian|generic)")) { |
| continue; |
| } |
| // So far we are generating datetimeSkeleton mechanically, no coverage |
| if (xpp.containsElement("datetimeSkeleton")) { |
| continue; |
| } |
| // The alt="ascii" time patterns are hopefully short-lived. We do not survey |
| // for them, they can be generated mechanically from the non-alt patterns. |
| // CLDR-16606 |
| if (path.contains("[@alt=\"ascii\"]")) { |
| continue; |
| } |
| String element = xpp.getElement(-1); |
| // Skip things that shouldn't normally exist in the generic calendar |
| // days, dayPeriods, quarters, and months |
| if (calType.equals("generic")) { |
| if (element.matches("(day(Period)?|month|quarter|era|appendItem)")) { |
| continue; |
| } |
| if (xpp.containsElement("intervalFormatItem")) { |
| String intervalFormatID = |
| xpp.findAttributeValue("intervalFormatItem", "id"); |
| // "Time" related, so shouldn't be in generic calendar. |
| if (intervalFormatID.matches("(h|H).*")) { |
| continue; |
| } |
| } |
| if (xpp.containsElement("dateFormatItem")) { |
| String dateFormatID = xpp.findAttributeValue("dateFormatItem", "id"); |
| // "Time" related, so shouldn't be in generic calendar. |
| if (dateFormatID.matches("E?(h|H|m).*")) { |
| continue; |
| } |
| } |
| if (xpp.containsElement("timeFormat")) { |
| continue; |
| } |
| } else { // Gregorian calendar |
| if (xpp.containsElement("eraNarrow")) { |
| continue; |
| } |
| if (element.equals("appendItem")) { |
| String request = xpp.findAttributeValue("appendItem", "request"); |
| if (!request.equals("Timezone")) { |
| continue; |
| } |
| } else if (element.equals("dayPeriod")) { |
| if ("variant".equals(xpp.findAttributeValue("dayPeriod", "alt"))) { |
| continue; |
| } |
| } else if (element.equals("dateFormatItem")) { |
| // ldml/dates/calendars/calendar[@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[@id='%dateFormatItems'] |
| assertEquals(path, Level.BASIC, lvl); |
| continue; |
| } |
| } |
| } else if (path.startsWith("//ldml/units")) { |
| // Skip paths for narrow unit fields. |
| if ("narrow".equals(xpp.findAttributeValue("unitLength", "type")) |
| || path.endsWith("/compoundUnitPattern1")) { |
| continue; |
| } |
| } else if (xpp.contains("posix")) { |
| continue; |
| } |
| |
| errln("Comprehensive & no exception for path =>\t" + path); |
| } |
| } |
| |
| public static class TargetsAndSublocales { |
| public final CoverageVariableInfo cvi; |
| public Set<String> scripts; |
| public Set<String> regions; |
| |
| public TargetsAndSublocales(String localeLanguage) { |
| cvi = SDI.getCoverageVariableInfo(localeLanguage); |
| scripts = new TreeSet<>(); |
| regions = new TreeSet<>(); |
| } |
| |
| public boolean addScript(String localeScript) { |
| return scripts.add(localeScript); |
| } |
| |
| public boolean addRegion(String localeRegion) { |
| return regions.add(localeRegion); |
| } |
| } |
| |
| public void TestCoverageVariableInfo() { |
| /** |
| * Compare the targetScripts and targetTerritories for a language to what we actually have |
| * in locales |
| */ |
| Map<String, TargetsAndSublocales> langToTargetsAndSublocales = new TreeMap<>(); |
| org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); |
| for (CLDRLocale locale : factory.getAvailableCLDRLocales()) { |
| String language = locale.getLanguage(); |
| if (language.length() == 0 || language.equals("root")) { |
| continue; |
| } |
| TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); |
| if (targetsAndSublocales == null) { |
| targetsAndSublocales = new TargetsAndSublocales(language); |
| langToTargetsAndSublocales.put(language, targetsAndSublocales); |
| } |
| String script = locale.getScript(); |
| if (script.length() > 0) { |
| targetsAndSublocales.addScript(script); |
| } |
| String region = locale.getCountry(); |
| if (region.length() > 0 |
| && region.length() < 3) { // do not want numeric codes like 001, 419 |
| targetsAndSublocales.addRegion(region); |
| } |
| } |
| |
| for (String language : langToTargetsAndSublocales.keySet()) { |
| TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); |
| if (targetsAndSublocales == null) { |
| continue; |
| } |
| Set<String> targetScripts = new TreeSet<>(targetsAndSublocales.cvi.targetScripts); |
| Set<String> localeScripts = targetsAndSublocales.scripts; |
| localeScripts.removeAll(targetScripts); |
| if (localeScripts.size() > 0) { |
| errln( |
| "Missing scripts for language: " |
| + language |
| + ", target scripts: " |
| + targetScripts |
| + ", but locales also have: " |
| + localeScripts); |
| } |
| Set<String> targetRegions = new TreeSet<>(targetsAndSublocales.cvi.targetTerritories); |
| Set<String> localeRegions = targetsAndSublocales.regions; |
| localeRegions.removeAll(targetRegions); |
| if (localeRegions.size() > 0) { |
| errln( |
| "Missing regions for language: " |
| + language |
| + ", target regions: " |
| + targetRegions |
| + ", but locales also have: " |
| + localeRegions); |
| } |
| } |
| } |
| |
| public void testBreakingLogicalGrouping() { |
| checkBreakingLogicalGrouping("en"); |
| checkBreakingLogicalGrouping("ar"); |
| checkBreakingLogicalGrouping("de"); |
| checkBreakingLogicalGrouping("pl"); |
| } |
| |
| private void checkBreakingLogicalGrouping(String localeId) { |
| SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); |
| CLDRFile cldrFile = testInfo.getCldrFactory().make(localeId, true); |
| HashSet<String> seen = new HashSet<>(); |
| Multimap<Level, String> levelToPaths = TreeMultimap.create(); |
| int count = 0; |
| for (String path : cldrFile.fullIterable()) { |
| if (seen.contains(path)) { |
| continue; |
| } |
| Set<String> grouping = LogicalGrouping.getPaths(cldrFile, path); |
| seen.add(path); |
| if (grouping == null) { |
| continue; |
| } |
| seen.addAll(grouping); |
| levelToPaths.clear(); |
| for (String groupingPath : grouping) { |
| if (LogicalGrouping.isOptional(cldrFile, groupingPath)) { |
| continue; |
| } |
| Level level = sdi.getCoverageLevel(groupingPath, localeId); |
| levelToPaths.put(level, groupingPath); |
| } |
| if (levelToPaths.keySet().size() <= 1) { |
| continue; |
| } |
| // we have a failure |
| for (Entry<Level, Collection<String>> entry : levelToPaths.asMap().entrySet()) { |
| errln( |
| localeId |
| + " (" |
| + count |
| + ") Broken Logical Grouping: " |
| + entry.getKey() |
| + " => " |
| + entry.getValue()); |
| } |
| ++count; |
| } |
| } |
| |
| public void testLogicalGroupingSamples() { |
| getLogger().fine(GrammarInfo.getGrammarLocales().toString()); |
| String[][] test = { |
| { |
| "de", "SINGLETON", "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", |
| }, |
| { |
| "de", |
| "METAZONE", |
| "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/generic", |
| "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/standard", |
| "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/daylight", |
| }, |
| { |
| "de", |
| "DAYS", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sun\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"mon\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"tue\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"wed\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"thu\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"fri\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sat\"]", |
| }, |
| { |
| "nl", |
| "DAY_PERIODS", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"afternoon1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"evening1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"night1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"midnight\"]", |
| }, |
| { |
| "de", |
| "QUARTERS", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"2\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"3\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"4\"]", |
| }, |
| { |
| "de", |
| "MONTHS", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"2\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"3\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"4\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"5\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"6\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"7\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"8\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"9\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"10\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"11\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"12\"]", |
| }, |
| { |
| "de", |
| "RELATIVE", |
| "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"-1\"]", |
| "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"0\"]", |
| "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"1\"]", |
| }, |
| { |
| "de", |
| "DECIMAL_FORMAT_LENGTH", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", |
| }, |
| { |
| "cs", |
| "COUNT", |
| "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"one\"]", |
| "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"few\"]", |
| "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"many\"]", |
| "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"other\"]", |
| }, |
| { |
| "de", |
| "COUNT", |
| "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"one\"]", |
| "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"other\"]", |
| }, |
| { |
| "de", |
| "COUNT_CASE", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"]", |
| }, |
| { |
| "hi", |
| "COUNT_CASE_GENDER", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"oblique\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"oblique\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"oblique\"]", |
| "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"oblique\"]" |
| } |
| }; |
| Set<PathType> seenPt = new TreeSet<>(Arrays.asList(PathType.values())); |
| for (String[] row : test) { |
| String locale = row[0]; |
| PathType expectedPathType = PathType.valueOf(row[1]); |
| CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); |
| List<String> paths = Arrays.asList(row); |
| paths = paths.subList(2, paths.size()); |
| Set<String> expected = new TreeSet<>(paths); |
| Set<Multimap<String, String>> seen = new LinkedHashSet<>(); |
| for (String path : expected) { |
| Set<String> grouping = new TreeSet<>(LogicalGrouping.getPaths(cldrFile, path)); |
| final Multimap<String, String> deltaValue = delta(expected, grouping); |
| if (seen.add(deltaValue)) { |
| assertEquals( |
| "Logical group for " + locale + ", " + path, |
| ImmutableListMultimap.of(), |
| deltaValue); |
| } |
| PathType actualPathType = PathType.getPathTypeFromPath(path); |
| assertEquals("PathType", expectedPathType, actualPathType); |
| } |
| seenPt.remove(expectedPathType); |
| } |
| assertEquals("PathTypes tested", Collections.emptySet(), seenPt); |
| } |
| |
| private Multimap<String, String> delta(Set<String> expected, Set<String> grouping) { |
| if (expected.equals(grouping)) { |
| return ImmutableListMultimap.of(); |
| } |
| Multimap<String, String> result = LinkedHashMultimap.create(); |
| TreeSet<String> aMinusB = new TreeSet<>(expected); |
| aMinusB.removeAll(grouping); |
| result.putAll("expected-actual", aMinusB); |
| TreeSet<String> bMinusA = new TreeSet<>(grouping); |
| bMinusA.removeAll(expected); |
| result.putAll("actual-expected", bMinusA); |
| return result; |
| } |
| |
| static class CoverageStatus { |
| |
| private Level level; |
| private boolean inRoot; |
| private boolean inId; |
| private Level languageLevel; |
| private String displayName; |
| |
| public CoverageStatus( |
| Level level, |
| boolean inRoot, |
| boolean inId, |
| Level languageLevel, |
| String displayName) { |
| this.level = level; |
| this.inRoot = inRoot; |
| this.inId = inId; |
| this.languageLevel = languageLevel == null ? Level.UNDETERMINED : languageLevel; |
| this.displayName = displayName; |
| } |
| |
| @Override |
| public String toString() { |
| return (inRoot ? "root" : "x") |
| + "\t" |
| + (inId ? "ids" : "x") |
| + "\t" |
| + stringForm(languageLevel) |
| + "\t" |
| + stringForm(level) |
| + "\t" |
| + displayName; |
| } |
| |
| private String stringForm(Level level2) { |
| if (level == null) { |
| return "Ï…nd"; |
| } |
| switch (level2) { |
| case UNDETERMINED: |
| return "Ï…nd"; |
| case COMPREHENSIVE: |
| return "ϲomp"; |
| default: |
| return level2.toString(); |
| } |
| } |
| } |
| |
| public void testLSR() { |
| SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo(); |
| org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); |
| CLDRFile root = factory.make(LocaleNames.ROOT, true); |
| CoverageLevel2 coverageLevel = |
| CoverageLevel2.getInstance(supplementalData, "qtz"); // non-existent locale |
| |
| Set<String> langsRoot = new TreeSet<>(); |
| Set<String> scriptsRoot = new TreeSet<>(); |
| Set<String> regionsRoot = new TreeSet<>(); |
| |
| // Get root LSR codes |
| |
| for (String path : root) { |
| if (!path.startsWith("//ldml/localeDisplayNames/")) { |
| continue; |
| } |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| String code = parts.getAttributeValue(3, "type"); |
| if (code == null || code.contains("_")) { |
| continue; |
| } |
| switch (parts.getElement(3)) { |
| case "language": |
| langsRoot.add(code); |
| break; |
| case "script": |
| scriptsRoot.add(code); |
| break; |
| case "territory": |
| regionsRoot.add(code); |
| break; |
| } |
| } |
| langsRoot = ImmutableSet.copyOf(langsRoot); |
| scriptsRoot = ImmutableSet.copyOf(scriptsRoot); |
| regionsRoot = ImmutableSet.copyOf(regionsRoot); |
| |
| // get CLDR locale IDs' codes |
| |
| Map<String, Level> langs = new TreeMap<>(); |
| Map<String, Level> scripts = new TreeMap<>(); |
| Map<String, Level> regions = new TreeMap<>(); |
| LikelySubtags likely = new LikelySubtags(); |
| |
| LanguageTagParser ltp = new LanguageTagParser(); |
| for (String locale : factory.getAvailable()) { |
| Level languageLevel = STANDARD_CODES.getLocaleCoverageLevel(Organization.cldr, locale); |
| if (languageLevel == null || languageLevel == Level.UNDETERMINED) { |
| languageLevel = Level.CORE; |
| } |
| ltp.set(locale); |
| likely.maximize(ltp); |
| addBestLevel(langs, ltp.getLanguage(), languageLevel); |
| addBestLevel(scripts, ltp.getScript(), languageLevel); |
| addBestLevel(regions, ltp.getRegion(), languageLevel); |
| } |
| regions.remove(""); |
| scripts.remove(""); |
| |
| // get the data |
| |
| Map<String, CoverageStatus> data = new TreeMap<>(); |
| |
| ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo = |
| ImmutableMap.of( |
| CLDRFile.LANGUAGE_NAME, |
| Row.of("language", langs, langsRoot, Level.MODERN), |
| CLDRFile.SCRIPT_NAME, |
| Row.of("script", scripts, scriptsRoot, Level.MODERATE), |
| CLDRFile.TERRITORY_NAME, |
| Row.of("region", regions, regionsRoot, Level.MODERATE)); |
| |
| for (Entry<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo : |
| typeToInfo.entrySet()) { |
| int type = typeAndInfo.getKey(); |
| String name = typeAndInfo.getValue().get0(); |
| Map<String, Level> idPartMap = typeAndInfo.getValue().get1(); |
| Set<String> setRoot = typeAndInfo.getValue().get2(); |
| Level targetLevel = typeAndInfo.getValue().get3(); |
| for (String code : Sets.union(idPartMap.keySet(), setRoot)) { |
| String displayName = testInfo.getEnglish().getName(type, code); |
| String path = CLDRFile.getKey(type, code); |
| Level level = coverageLevel.getLevel(path); |
| data.put( |
| name + "\t" + code, |
| new CoverageStatus( |
| level, |
| setRoot.contains(code), |
| idPartMap.containsKey(code), |
| idPartMap.get(code), |
| displayName)); |
| } |
| } |
| if (SHOW_LSR_DATA) { |
| |
| System.out.println( |
| "\nType\tCode\tIn Root\tIn CLDR Locales\tCLDR TargeLevel\tRoot Path Level\tCombinations"); |
| for (Entry<String, CoverageStatus> entry : data.entrySet()) { |
| System.out.println(entry.getKey() + "\t" + entry.getValue()); |
| } |
| System.out.println(); |
| for (Entry<String, CoverageStatus> entry : data.entrySet()) { |
| final String key = entry.getKey(); |
| if (!key.startsWith("language")) { |
| continue; |
| } |
| final CoverageStatus value = entry.getValue(); |
| if (value.inId) { |
| continue; |
| } |
| String[] parts = key.split("\t"); |
| PopulationData population = SDI.getBaseLanguagePopulationData(parts[1]); |
| if (population == null) { |
| System.out.println(key + "\t" + value.displayName + "\t" + value + "\t-1\t-1"); |
| } else { |
| System.out.println( |
| key |
| + "\t" |
| + value.displayName |
| + "\t" |
| + value |
| + "\t" |
| + population.getPopulation() |
| + "\t" |
| + population.getLiteratePopulation()); |
| } |
| } |
| } |
| |
| Set<String> ids = new TreeSet<>(); |
| Set<String> missing = new TreeSet<>(); |
| for (Entry<String, CoverageStatus> entry : data.entrySet()) { |
| final String key = entry.getKey(); |
| if (!key.startsWith("language")) { |
| continue; |
| } |
| final CoverageStatus value = entry.getValue(); |
| if (value.inId) { |
| String[] parts = key.split("\t"); |
| ids.add(parts[1]); |
| if (!value.inRoot) { |
| missing.add(parts[1]); |
| } |
| } |
| } |
| if (!assertEquals( |
| "Language subtags that are in a CLDR locale's ID are in root (" |
| + missing.size() |
| + ")", |
| "", |
| Joiner.on(' ').join(missing))) { |
| warnln( |
| "Full set for resetting $language in attributeValueValidity.xml (" |
| + ids.size() |
| + "):" |
| + breakLines(ids, "\n ")); |
| } |
| } |
| |
| private String breakLines(Set<String> ids, String indent) { |
| StringBuilder result = new StringBuilder(); |
| int lastFirstChar = 0; |
| for (String id : ids) { |
| int firstChar = id.codePointAt(0); |
| result.append(firstChar == lastFirstChar ? " " : indent); |
| result.append(id); |
| lastFirstChar = firstChar; |
| } |
| return result.toString(); |
| } |
| |
| private void addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level) { |
| if (level != Level.UNDETERMINED) { |
| int debug = 0; |
| } |
| Level old = codeToBestLevel.get(code); |
| if (old == null) { |
| codeToBestLevel.put(code, level); |
| } else if (level.compareTo(old) > 0) { |
| codeToBestLevel.put(code, level); |
| } else if (level != old) { |
| int debug = 0; |
| } |
| } |
| |
| public void TestEnglishCoverage() { |
| Output<String> pathWhereFound = new Output<>(); |
| Output<String> localeWhereFound = new Output<>(); |
| Set<Row.R5<String, String, Boolean, Boolean, Level>> inherited = new TreeSet<>(); |
| for (String path : ENGLISH) { |
| String value = ENGLISH.getStringValueWithBailey(path, pathWhereFound, localeWhereFound); |
| final boolean samePath = path.equals(pathWhereFound.value); |
| final boolean sameLocale = "en".equals(localeWhereFound.value); |
| if (!samePath) { |
| Level level = SDI.getCoverageLevel(path, "en"); |
| if (level.compareTo(Level.MODERN) <= 0) { |
| inherited.add(Row.of(path, value, samePath, sameLocale, level)); |
| } |
| } |
| } |
| if (!assertEquals("English has sideways inheritance:", 0, inherited.size())) { |
| System.out.println("Check the following, then use in modify_config.txt\n"); |
| String pattern = "locale=en ; action=add ; new_path=%s ; new_value=%s"; |
| for (Row.R5<String, String, Boolean, Boolean, Level> row : inherited) { |
| System.out.println(String.format(pattern, row.get0(), row.get1())); |
| if (DEBUG) { |
| System.out.println( |
| String.format( |
| "%s\t%s\t%s\t%s\t%s", |
| row.get0(), row.get1(), row.get2(), row.get3(), row.get4())); |
| } |
| } |
| } |
| } |
| } |