tools/cldr-code/src/test/java/org/unicode/cldr/unittest/LikelySubtagsTest.java - platform/external/cldr - Git at Google

 package org.unicode.cldr.unittest;

 import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Sets;
 import com.google.common.collect.TreeMultimap;
 import com.ibm.icu.dev.test.TestFmwk;
 import com.ibm.icu.dev.util.UnicodeMap;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.VersionInfo;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import org.unicode.cldr.draft.ScriptMetadata;
 import org.unicode.cldr.draft.ScriptMetadata.Info;
 import org.unicode.cldr.tool.LikelySubtags;
 import org.unicode.cldr.util.CLDRConfig;
 import org.unicode.cldr.util.CLDRFile;
 import org.unicode.cldr.util.CLDRFile.ExemplarType;
 import org.unicode.cldr.util.CLDRFile.WinningChoice;
 import org.unicode.cldr.util.CLDRLocale;
 import org.unicode.cldr.util.CalculatedCoverageLevels;
 import org.unicode.cldr.util.ChainedMap;
 import org.unicode.cldr.util.ChainedMap.M3;
 import org.unicode.cldr.util.CldrUtility;
 import org.unicode.cldr.util.Containment;
 import org.unicode.cldr.util.Factory;
 import org.unicode.cldr.util.LanguageTagParser;
 import org.unicode.cldr.util.Level;
 import org.unicode.cldr.util.ScriptToExemplars;
 import org.unicode.cldr.util.StandardCodes;
 import org.unicode.cldr.util.StandardCodes.LstrType;
 import org.unicode.cldr.util.SupplementalDataInfo;
 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
 import org.unicode.cldr.util.Validity;
 import org.unicode.cldr.util.Validity.Status;

 public class LikelySubtagsTest extends TestFmwk {

     private static final Validity VALIDITY = Validity.getInstance();
     private boolean DEBUG = false;
     private static boolean SHOW_EXEMPLARS = System.getProperty("SHOW_EXEMPLARS") != null;
     private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance();
     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
             CLDR_CONFIG.getSupplementalDataInfo();
     static final Map<String, String> likely = SUPPLEMENTAL_DATA_INFO.getLikelySubtags();
     static final LikelySubtags LIKELY = new LikelySubtags();

     public static void main(String[] args) {
         new LikelySubtagsTest().run(args);
     }

     static class Tags {
         final Set<String> languages = new TreeSet<>();
         final Set<String> scripts = new TreeSet<>();
         final Set<String> regions = new TreeSet<>();
         final Set<String> scriptRegion = new TreeSet<>();
         final Set<String> languageScript = new TreeSet<>();
         final Set<String> languageRegion = new TreeSet<>();
         final Set<String> all = new TreeSet<>();
         final ChainedMap.M4<String, String, String, Boolean> languageToScriptToRegions =
                 ChainedMap.of(
                         new TreeMap<String, Object>(),
                         new TreeMap<String, Object>(),
                         new TreeMap<String, Object>(),
                         Boolean.class);
         final ChainedMap.M3<String, String, Boolean> languageToRegions =
                 ChainedMap.of(
                         new TreeMap<String, Object>(),
                         new TreeMap<String, Object>(),
                         Boolean.class);

         public Tags() {
             final LanguageTagParser ltp = new LanguageTagParser();
             for (Entry<String, String> entry : likely.entrySet()) {
                 add(ltp.set(entry.getKey()), true);
                 add(ltp.set(entry.getValue()), false);
             }
             // add unfamiliar script, unfamiliar region
             for (String lang : languageToScriptToRegions.keySet()) {
                 if (lang.equals("und")) {
                     continue;
                 }
                 M3<String, String, Boolean> scriptToRegion = languageToScriptToRegions.get(lang);
                 final Set<String> scriptsFor = scriptToRegion.keySet();
                 final Set<String> regionsFor = languageToRegions.get(lang).keySet();

                 String firstScriptNotIn = getNonEmptyNotIn(scripts, scriptsFor);
                 String firstRegionNotIn = getNonEmptyNotIn(regions, regionsFor);

                 languageToScriptToRegions.put(
                         lang, firstScriptNotIn, firstRegionNotIn, Boolean.TRUE);
                 // clone for safety before iterating
                 for (String script : new HashSet<>(scriptsFor)) {
                     languageToScriptToRegions.put(lang, script, firstRegionNotIn, Boolean.TRUE);
                 }
                 for (String region : new HashSet<>(regionsFor)) {
                     languageToScriptToRegions.put(lang, firstScriptNotIn, region, Boolean.TRUE);
                 }
             }

             // System.out.println("all: " + all);
             // System.out.println("scriptRegion: " + scriptRegion);
             // System.out.println("languageScript: " + languageScript);
             // System.out.println("languageRegion: " + languageRegion);
         }

         private static <T> T getNonEmptyNotIn(Iterable<T> a, Set<T> b) {
             for (T x : a) {
                 if (!b.contains(x) && !x.toString().isEmpty()) {
                     return x;
                 }
             }
             throw new IllegalArgumentException();
         }

         void add(LanguageTagParser ltp, boolean source) {
             String sourceLanguage = ltp.getLanguage();
             String sourceScript = ltp.getScript();
             String sourceRegion = ltp.getRegion();
             languageToScriptToRegions.put(sourceLanguage, sourceScript, sourceRegion, Boolean.TRUE);
             languageToScriptToRegions.put(sourceLanguage, sourceScript, "", Boolean.TRUE);
             languageToScriptToRegions.put(sourceLanguage, "", "", Boolean.TRUE);
             languageToRegions.put(sourceLanguage, "", Boolean.TRUE);
             if (StandardCodes.isCountry(sourceRegion)) {
                 languageToScriptToRegions.put(sourceLanguage, "", sourceRegion, Boolean.TRUE);
                 languageToRegions.put(sourceLanguage, sourceRegion, Boolean.TRUE);
             }

             // capture all cases of 2 items
             if (source) {
                 if (!sourceScript.isEmpty() && !sourceRegion.isEmpty()) {
                     if (!sourceLanguage.equals("und")) {
                         all.add(ltp.toString());
                     } else {
                         scriptRegion.add(ltp.toString());
                     }
                 } else if (!sourceLanguage.equals("und")) {
                     if (!sourceScript.isEmpty()) {
                         languageScript.add(ltp.toString());
                     } else if (!sourceRegion.isEmpty()) {
                         languageRegion.add(ltp.toString());
                     }
                 }
             }
             languages.add(sourceLanguage);
             scripts.add(sourceScript);
             if (StandardCodes.isCountry(sourceRegion) || sourceRegion.isEmpty()) {
                 regions.add(sourceRegion);
             }
         }
     }

     static final Tags TAGS = new Tags();

     final LanguageTagParser maxLtp = new LanguageTagParser();
     final LanguageTagParser sourceLtp = new LanguageTagParser();

     /**
      * Return false if we should skip the language
      *
      * @param source
      * @return
      */
     public boolean checkAdding(String source) {
         // if X maps to Y, then adding a field from Y to X will still map to Y
         // Example:
         // und_AF => fa_Arab_AF
         // therefore, the following should also be true:
         // und_Arab_AF => fa_Arab_AF
         // fa_AF => fa_Arab_AF
         // fa_Arab_AF => fa_Arab_AF

         String max = LIKELY.maximize(source);
         if (!assertNotEquals("Maximize " + source, null, max)) {
             return source.contains("_");
         }
         sourceLtp.set(source);
         if (!sourceLtp.getRegion().isEmpty() && !StandardCodes.isCountry(sourceLtp.getRegion())) {
             return true;
         }
         maxLtp.set(max);
         for (int i = 1; i < 8; ++i) {
             if ((i & 1) != 0) {
                 if (!sourceLtp.getLanguage().equals("und")) continue;
                 sourceLtp.setLanguage(maxLtp.getLanguage());
             }
             if ((i & 2) != 0) {
                 if (!sourceLtp.getScript().isEmpty()) continue;
                 sourceLtp.setScript(maxLtp.getScript());
             }
             if ((i & 4) != 0) {
                 if (!sourceLtp.getRegion().isEmpty()) continue;
                 sourceLtp.setRegion(maxLtp.getRegion());
             }
             String test = sourceLtp.toString();
             final String maximize = LIKELY.maximize(test);
             if (!max.equals(maximize)) {
                 // max(source) = max, max(test) ≠ max
                 if (!assertEquals(
                         String.format(
                                 "checkAdding: max(%s)->%s, however max(%s)->", source, max, test),
                         max,
                         maximize)) {
                     // LIKELY.maximize(test); // Could step into this for debugging.
                 }
             }
             sourceLtp.set(source); // restore
         }
         return true;
     }

     public void TestCompleteness() {
         final LanguageTagParser ltp = new LanguageTagParser();
         if (DEBUG) {
             System.out.println(TAGS.languages.size() + "\t" + TAGS.languages);
             System.out.println(TAGS.scripts.size() + "\t" + TAGS.scripts);
             System.out.println(TAGS.regions.size() + "\t" + TAGS.regions);
         }
         main:
         for (Entry<String, Map<String, Map<String, Boolean>>> languageScriptRegion :
                 TAGS.languageToScriptToRegions) {
             String language = languageScriptRegion.getKey();
             ltp.set(language); // clears script, region
             for (Entry<String, Map<String, Boolean>> scriptRegion :
                     languageScriptRegion.getValue().entrySet()) {
                 String script = scriptRegion.getKey();
                 ltp.setScript(script);
                 for (String region : scriptRegion.getValue().keySet()) {
                     ltp.setRegion(region);
                     String testTag = ltp.toString();
                     // System.out.println(testTag);
                     if (!testTag.equals("und") && !checkAdding(testTag)) {
                         checkAdding(testTag); // for debugging
                         continue main;
                     }
                 }
             }
         }
     }

     static Set<String> exceptions =
             new HashSet<>(
                     Arrays.asList(
                             "Zyyy", "Zinh", "Zzzz", "Brai",
                             "Cpmn")); // scripts with no default language

     public void TestStability() {
         // when maximized must never change
         // first get all the subtags
         // then test all the combinations
         LanguageTagParser ltp = new LanguageTagParser();
         for (Entry<String, String> entry : likely.entrySet()) {
             ltp.set(entry.getKey());
             String sourceLanguage = ltp.getLanguage();
             if (sourceLanguage.equals("und")) {
                 sourceLanguage = "";
             }
             String sourceScript = ltp.getScript();
             String sourceRegion = ltp.getRegion();
             ltp.set(entry.getValue());
             String targetLanguage = ltp.getLanguage();
             String targetScript = ltp.getScript();
             String targetRegion = ltp.getRegion();
             if (!sourceLanguage.isEmpty()) {
                 assertEquals("language", sourceLanguage, targetLanguage);
             }
             if (!sourceScript.isEmpty()) {
                 assertEquals("script", sourceScript, targetScript);
             }
             if (!sourceRegion.isEmpty()) {
                 if (Containment.isLeaf(sourceRegion)) {
                     assertEquals("region", sourceRegion, targetRegion);
                 }
             }
         }
     }

     public void TestForMissingScriptMetadata() {
         TreeSet<String> metadataScripts = new TreeSet<>(ScriptMetadata.getScripts());
         UnicodeSet current = new UnicodeSet(0, 0x10FFFF);
         UnicodeSet toRemove = new UnicodeSet();

         while (!current.isEmpty()) {
             int ch = current.charAt(0);
             int script = UScript.getScript(ch);
             String shortName = UScript.getShortName(script);
             Info i = ScriptMetadata.getInfo(shortName);
             if (i == null) {
                 errln("Script Metadata is missing: " + shortName);
                 continue;
             }
             if (i.likelyLanguage.equals("und") && !exceptions.contains(shortName)) {
                 errln("Script has no likely language: " + shortName);
             }
             toRemove.applyIntPropertyValue(UProperty.SCRIPT, script);
             current.removeAll(toRemove);
             metadataScripts.remove(shortName);
         }
         metadataScripts.removeAll(
                 Arrays.asList("Hans", "Hant", "Hanb", "Jamo", "Jpan", "Kore")); // remove
         // "combo"
         // scripts
         if (!metadataScripts.isEmpty()) {
             // Warning, not error, so that we can add scripts to the script metadata
             // and later update to the Unicode version that has characters for those scripts.
             warnln("Script Metadata for characters not in Unicode: " + metadataScripts);
         }
     }

     public void TestMissingInfoForLanguage() {
         CLDRFile english = CLDR_CONFIG.getEnglish().getUnresolved();

         CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance();

         for (String language : CLDR_CONFIG.getCldrFactory().getAvailableLanguages()) {
             if (language.contains("_") || language.equals("root")) {
                 continue;
             }
             String likelyExpansion = likely.get(language);
             if (likelyExpansion == null) {
                 errln("Missing likely subtags for: " + language);
             } else {
                 logln("Likely subtags for " + language + ":\t " + likely);
             }
             String path = CLDRFile.getKey(CLDRFile.LANGUAGE_NAME, language);
             String englishName = english.getStringValue(path);
             if (englishName == null) {
                 Level covLevel = ccl.getEffectiveCoverageLevel(language);
                 if (covLevel == null || !covLevel.isAtLeast(Level.BASIC)) {
                     // https://unicode-org.atlassian.net/browse/CLDR-15663
                     if (logKnownIssue(
                             "CLDR-15663",
                             "English translation should not be required for sub-basic language name")) {
                         continue; // skip error
                     }
                 }
                 errln("Missing English translation for: " + language + " which is at " + covLevel);
             }
         }
     }

     public void TestMissingInfoForRegion() {
         CLDRFile english = CLDR_CONFIG.getEnglish();

         for (String region : StandardCodes.make().getGoodAvailableCodes("territory")) {
             String likelyExpansion = likely.get("und_" + region);
             if (likelyExpansion == null) {
                 if (SUPPLEMENTAL_DATA_INFO.getContained(region) == null) { // not
                     // container
                     String likelyTag = LikelySubtags.maximize("und_" + region, likely);
                     if (likelyTag == null) { //  || !likelyTag.startsWith("en_Latn_")
                         logln(
                                 "Missing likely subtags for region: "
                                         + region
                                         + "\t"
                                         + english.getName("territory", region));
                     }
                 } else { // container
                     logln(
                             "Missing likely subtags for macroregion (fix to exclude regions having 'en'): "
                                     + region
                                     + "\t"
                                     + english.getName("territory", region));
                 }
             } else {
                 logln("Likely subtags for region: " + region + ":\t " + likely);
             }
             String path = CLDRFile.getKey(CLDRFile.TERRITORY_NAME, region);
             String englishName = english.getStringValue(path);
             if (englishName == null) {
                 errln("Missing English translation for: " + region);
             }
         }
     }

     // typically historical script that don't need to  be in likely subtags

     static final Set<String> KNOWN_SCRIPTS_WITHOUT_LIKELY_SUBTAGS =
             ImmutableSet.of("Hatr", "Cpmn", "Ougr");

     public void TestMissingInfoForScript() {
         VersionInfo icuUnicodeVersion = UCharacter.getUnicodeVersion();
         TreeSet<String> sorted = new TreeSet<>(ScriptMetadata.getScripts());
         Set<String> exceptions2 =
                 new HashSet<>(
                         Arrays.asList("zh_Hans_CN", "hnj_Hmnp_US", "hnj_Hmng_LA", "iu_Cans_CA"));
         for (String script : sorted) {
             if (exceptions.contains(script) || script.equals("Latn") || script.equals("Dsrt")) {
                 // we minimize away und_X, when the code puts in en...US
                 continue;
             }
             Info i = ScriptMetadata.getInfo(script);
             // System.out.println(i);
             String likelyLanguage = i.likelyLanguage;
             String originCountry = i.originCountry;
             String undScript = "und_" + script;
             String langScript = likelyLanguage + "_" + script + "_";
             String likelyExpansion = likely.get(undScript);
             if (likelyExpansion == null) {
                 if (!KNOWN_SCRIPTS_WITHOUT_LIKELY_SUBTAGS.contains(script)) {
                     String msg =
                             "likelySubtags.xml missing language for script (und_"
                                     + script
                                     + "). Script Metadata suggests that it should be something like:\t "
                                     + showOverride(script, originCountry, langScript);
                     if (i.age.compareTo(icuUnicodeVersion) <= 0) {
                         // Error: Missing data for a script in ICU's Unicode version.
                         errln(msg);
                     } else {
                         // Warning: Missing data for a script in a future Unicode version.
                         warnln(msg);
                     }
                 }
             } else if (!exceptions2.contains(likelyExpansion)
                     && !likelyExpansion.startsWith(langScript)) {
                 // if
                 // (logKnownIssue("Cldrbug:7181","Missing script metadata for "
                 // + script)
                 // && (script.equals("Tfng") || script.equals("Brah"))) {
                 // logln("Wrong likely language for script (und_" + script +
                 // "). Should not be " + likelyExpansion
                 // + ", but something like:\t " + showOverride(script,
                 // originCountry, langScript));
                 // } else {
                 errln(
                         "likelySubtags.xml has wrong language for script (und_"
                                 + script
                                 + "). Should not be "
                                 + likelyExpansion
                                 + ", but Script Metadata suggests something like:\t "
                                 + showOverride(script, originCountry, langScript));
                 // }
             } else {
                 logln("OK: " + undScript + " => " + likelyExpansion);
             }
         }
         /**
          * und_Bopo => zh_Bopo_TW und_Copt => cop_Copt_EG // fix 002 und_Dsrt => en_Dsrt_US // fix
          * US
          */
     }

     public String showOverride(String script, String originCountry, String langScript) {
         return "{\"und_" + script + "\", \"" + langScript + originCountry + "\"},";
     }

     /**
      * Test two issues:
      *
      * <ul>
      *   <li>That the script of the locale's examplars matches the script derived from the locale's
      *       identifier.
      *   <li>That the union of the exemplar sets (main+aux) for all locales with the script matches
      *       what is in ltp.getResolvedScript()
      * </ul>
      *
      * Written as one test, to avoid the overhead of iterating over all locales twice.
      */
     public void testGetResolvedScriptVsExemplars() {
         Factory factory = CLDR_CONFIG.getCldrFactory();
         LanguageTagParser ltp = new LanguageTagParser();
         Multimap<String, UnicodeSet> scriptToMains = TreeMultimap.create();
         Multimap<String, UnicodeSet> scriptToAuxes = TreeMultimap.create();
         UnicodeSet collectedBad = new UnicodeSet();
         for (String locale : factory.getAvailable()) {
             if ("root".equals(locale)) {
                 continue;
             }
             CLDRFile cldrFile = factory.make(locale, true);
             UnicodeSet main = cldrFile.getRawExemplarSet(ExemplarType.main, WinningChoice.WINNING);
             main = checkSet("main", locale, main, collectedBad);
             UnicodeSet aux =
                     cldrFile.getRawExemplarSet(ExemplarType.auxiliary, WinningChoice.WINNING);
             aux = checkSet("aux", locale, aux, collectedBad);
             String script = null;
             int uScript = 0;
             for (String s : main) {
                 uScript = UScript.getScript(s.codePointAt(0));
                 if (uScript > UScript.INHERITED) {
                     script = UScript.getShortName(uScript);
                     break;
                 }
             }
             if (script == null) {
                 errln("No script for " + locale);
                 continue;
             }
             String ltpScript = ltp.set(locale).getResolvedScript();
             switch (uScript) {
                 case UScript.HAN:
                     switch (ltp.getLanguage()) {
                         case "ja":
                             script = "Jpan";
                             break;
                         case "yue":
                             script = ltp.getScript();
                             if (script.isEmpty()) {
                                 script = "Hant";
                             }
                             break;
                         case "zh":
                             script = ltp.getScript();
                             if (script.isEmpty()) {
                                 script = "Hans";
                             }
                             break;
                     }
                     break;
                 case UScript.HANGUL:
                     switch (ltp.getLanguage()) {
                         case "ko":
                             script = "Kore";
                             break;
                     }
             }
             if (!assertEquals(locale, script, ltpScript)) {
                 ltp.getResolvedScript(); // for debugging
             }
             scriptToMains.put(ltpScript, main.freeze());
             if (!aux.isEmpty()) {
                 scriptToAuxes.put(ltpScript, aux.freeze());
             }
         }

         if (!collectedBad.isEmpty()) {
             warnln(
                     "Locales have "
                             + collectedBad.size()
                             + " unexpected characters in main and/or aux:\t"
                             + collectedBad.toPattern(false)
                             + "\n Use -DSHOW_EXEMPLARS for details");
         }

         // now check that ScriptToExemplars.getExemplars matches the data

         Set<String> problemScripts = new LinkedHashSet<>();
         Map<String, UnicodeSet> expected = new TreeMap<>();
         for (Entry<String, Collection<UnicodeSet>> entry : scriptToMains.asMap().entrySet()) {
             String script = entry.getKey();
             Collection<UnicodeSet> mains = entry.getValue();
             Collection<UnicodeSet> auxes = scriptToAuxes.get(script);

             UnicodeSet flattened;
             if (mains.size() <= 1 && auxes.size() <= 1) {
                 continue;
             } else {
                 UnicodeMap<Integer> counts = new UnicodeMap<>();
                 getCounts(mains, counts);
                 flattened = getUncommon(counts, mains.size());
                 if (counts.size() < 32) {
                     getCounts(auxes, counts);
                     flattened = getUncommon(counts, mains.size());
                 }
             }
             expected.put(script, flattened.freeze());
         }
         for (Entry<String, UnicodeSet> entry : expected.entrySet()) {
             String script = entry.getKey();
             UnicodeSet flattened = entry.getValue();

             // now compare to what we get from the cached file, to make sure the latter is up to
             // date

             if (!assertEquals(
                     script,
                     flattened.toPattern(false),
                     ScriptToExemplars.getExemplars(script).toPattern(false))) {
                 problemScripts.add(script);
             }
         }

         if (!problemScripts.isEmpty()) {
             warnln(
                     "Adjust the data in scriptToExemplars.txt. Use -DSHOW_EXEMPLARS to get a fresh copy, or reset to expected value for: "
                             + problemScripts);
             if (SHOW_EXEMPLARS) {
                 for (Entry<String, UnicodeSet> entry : expected.entrySet()) {
                     String script = entry.getKey();
                     UnicodeSet flattened = entry.getValue();
                     if (!flattened.isEmpty()) {
                         System.out.println(
                                 script
                                         + " ;\t"
                                         + flattened.size()
                                         + " ;\t"
                                         + flattened.toPattern(false));
                     }
                 }
             }
         }
     }

     static final UnicodeSet MAIN_AUX_EXPECTED = new UnicodeSet("[\\p{L}\\p{M}\\p{Cf}·]").freeze();

     private UnicodeSet checkSet(
             String title, String locale, UnicodeSet main, UnicodeSet collected) {
         UnicodeSet bad = new UnicodeSet();
         for (String s : main) {
             if (!MAIN_AUX_EXPECTED.containsAll(s)) {
                 bad.add(s);
             }
         }
         if (!bad.isEmpty()) {
             if (SHOW_EXEMPLARS) {
                 warnln(
                         "\t"
                                 + title
                                 + "\tLocale\t"
                                 + locale
                                 + "\thas "
                                 + bad.size()
                                 + " unexpected exemplar characters:\t"
                                 + bad.toPattern(false));
             }
             collected.addAll(bad);
         }
         return CldrUtility.flatten(new UnicodeSet(main).removeAll(bad));
     }

     /**
      * Remove items with a count equal to size (they are common to all locales), and flatten
      * (against the whole set)
      */
     private UnicodeSet getUncommon(UnicodeMap<Integer> counts, int size) {
         UnicodeSet flattenedAll =
                 CldrUtility.flatten(counts.keySet()); // we flatten against the whole set
         UnicodeSet result = new UnicodeSet();
         for (String s : flattenedAll) {
             int count = counts.get(s);
             if (count != size) {
                 result.add(s);
             }
         }
         return result.freeze();
     }

     private void getCounts(Collection<UnicodeSet> usets, UnicodeMap<Integer> counts) {
         for (UnicodeSet uset : usets) {
             for (String s : uset) {
                 Integer old = counts.get(s);
                 if (old == null) {
                     counts.put(s, 1);
                 } else {
                     counts.put(s, old + 1);
                 }
             }
         }
     }

     public void testUndAllScriptsAndRegions() {
         Set<String> regions = new TreeSet<>();
         Set<String> scripts = new TreeSet<>();
         Set<String> regularCountries =
                 VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular);
         Set<String> macroRegions =
                 Set
                         .of(); // Validity.getInstance().getStatusToCodes(LstrType.region).get(Status.macroregion);

         for (String country : Sets.union(regularCountries, macroRegions)) {
             regions.add(country);
         }

         // for Scripts, just test the ones in CLDR
         for (String localeString : CLDR_CONFIG.getCldrFactory().getAvailable()) {
             if (localeString.equals("root")) {
                 continue;
             }
             CLDRLocale cLocale = CLDRLocale.getInstance(localeString);
             final String script = cLocale.getScript();
             if (script.equals("Dsrt")) {
                 continue; // toy script
             }
             final String country = cLocale.getCountry();
             if (!country.isEmpty() && !country.equals("001")) {
                 regions.add(country);
             }
             if (!script.isEmpty()) {
                 scripts.add(script);
                 //                if (!country.isEmpty()) {
                 //                    // we only need this if the value from script + country is
                 // different from the value of script
                 //                    combinations.add("und_" + script + "_" + country);
                 //                }
             }
         }
         for (String script : scripts) {
             if (script.equals("Latn")) {
                 assertTrue("contains und_" + script, likely.containsKey("und"));
             } else if (!assertTrue("contains und_" + script, likely.containsKey("und_" + script))) {

             }
         }
         LanguageTagParser ltp = new LanguageTagParser();
         Set<String> possibleFixes = new TreeSet<>();
         for (String region : regions) {
             final String undRegion = "und_" + region;
             if (region.equals("150") && likely.containsKey("und")) {
                 // skip
             } else if (!assertTrue("contains und_" + region, likely.containsKey(undRegion))) {
                 Set<String> languages =
                         SUPPLEMENTAL_DATA_INFO.getLanguagesForTerritoryWithPopulationData(region);
                 double biggest = -1;
                 String biggestLang = null;
                 for (String language : languages) {
                     PopulationData popData =
                             SUPPLEMENTAL_DATA_INFO.getLanguageAndTerritoryPopulationData(
                                     language, region);
                     if (popData.getLiteratePopulation() > biggest) {
                         biggest = popData.getLiteratePopulation();
                         biggestLang = language;
                     }
                 }
                 if (biggestLang != null) {
                     ltp.set(biggestLang);
                     if (ltp.getScript().isEmpty()) {
                         String biggestMax = likely.get(biggestLang);
                         ltp.set(biggestMax);
                     }
                     ltp.setRegion(region);
                     possibleFixes.add(
                             "<likelySubtag from=\"" + undRegion + "\" to=\"" + ltp + "\"/>");
                 }
             }
         }
         System.out.println("\t\t" + Joiner.on("\n\t\t").join(possibleFixes));
     }

     public void testToAttributeValidityStatus() {
         Set<String> okLanguages = VALIDITY.getStatusToCodes(LstrType.language).get(Status.regular);
         Set<String> okScripts = VALIDITY.getStatusToCodes(LstrType.script).get(Status.regular);
         Set<String> okRegions = VALIDITY.getStatusToCodes(LstrType.region).get(Status.regular);
         Multimap<String, String> badFieldsToLocales = TreeMultimap.create();
         Set<String> knownExceptions = Set.of("in", "iw", "ji", "jw", "mo", "tl");
         for (String s : likely.values()) {
             CLDRLocale cLocale = CLDRLocale.getInstance(s);
             final String language = cLocale.getLanguage();
             final String script = cLocale.getScript();
             final String region = cLocale.getCountry();
             if (!okLanguages.contains(language)) {
                 if (knownExceptions.contains(language)) {
                     continue;
                 }
                 badFieldsToLocales.put(language, s);
             }
             if (!okScripts.contains(script)) {
                 badFieldsToLocales.put(script, s);
             }
             if (!okRegions.contains(region)) {
                 badFieldsToLocales.put(region, s);
             }
         }
         if (!badFieldsToLocales.isEmpty()) {
             Multimap<Status, String> statusToExamples = TreeMultimap.create();
             for (String field : badFieldsToLocales.keySet()) {
                 Status status = VALIDITY.getCodeToStatus(LstrType.language).get(field);
                 if (status == null) {
                     status = VALIDITY.getCodeToStatus(LstrType.script).get(field);
                 }
                 if (status == null) {
                     status = VALIDITY.getCodeToStatus(LstrType.region).get(field);
                 }
                 statusToExamples.put(status, field);
             }
             Map<String, String> fieldToOrigin = new TreeMap<>();
             for (Entry<Status, Collection<String>> entry : statusToExamples.asMap().entrySet()) {
                 //                for (String value : entry.getValue()) {
                 //                    String origin =
                 // SUPPLEMENTAL_DATA_INFO.getLikelyOrigins().get(value);
                 //                    fieldToOrigin.put(value, origin == null ? "n/a" : origin);
                 //                }
                 warnln("Bad status=" + entry.getKey() + " for " + entry.getValue());
             }
         }
     }

     /**
      * Test whether any of the mapping lines in likelySubtags.xml are superfluous. <br>
      * For example, with the following mappings, #2 and #3 are superfluous, since they would be
      * produced by the algorithm anyway.
      *
      * <ol>
      *   <li>ll => ll_Sss1_R1
      *   <li>ll_Sss2 => ll_Sss2_RR
      *   <li>ll_R2 => ll_Ssss_R2
      * </ol>
      *
      * On the other hand, the following are not:
      *
      * <ol>
      *   <li>ll_Sss2 => ll_Sss2_R3
      *   <li>ll_R2 => ll_Sss3_R2
      * </ol>
      */
     public void testSuperfluous() {
         Map<String, String> origins = SUPPLEMENTAL_DATA_INFO.getLikelyOrigins();

         // collect all items with same language
         LanguageTagParser ltp = new LanguageTagParser();
         TreeMap<String, TreeMap<String, String>> langToLikelySubset = new TreeMap<>();
         for (Entry<String, String> entry : likely.entrySet()) {
             String lang = ltp.set(entry.getKey()).getLanguage();
             if (lang.equals("und")) {
                 continue;
             }
             TreeMap<String, String> subtree = langToLikelySubset.get(lang);
             if (subtree == null) {
                 langToLikelySubset.put(lang, subtree = new TreeMap<>());
             }
             subtree.put(entry.getKey(), entry.getValue());
         }
         boolean first = true;

         for (Entry<String, TreeMap<String, String>> langAndMap : langToLikelySubset.entrySet()) {
             String lang0 = langAndMap.getKey();
             Map<String, String> goldenMap = ImmutableMap.copyOf(langAndMap.getValue());
             if (goldenMap.size() == 1) {
                 continue;
             }

             // get test sets and build probe data

             Set<String> scripts = new TreeSet<>();
             scripts.add("Egyp");
             scripts.add("");
             Set<String> regions = new TreeSet<>();
             regions.add("AQ");
             regions.add("");
             for (String key : Sets.union(goldenMap.keySet(), new TreeSet<>(goldenMap.values()))) {
                 scripts.add(ltp.set(key).getScript());
                 regions.add(ltp.getRegion());
             }
             scripts = ImmutableSet.copyOf(scripts);
             regions = ImmutableSet.copyOf(regions);

             TreeSet<String> probeData = new TreeSet<>();
             ltp.setLanguage(lang0); // clear;
             for (String script : scripts) {
                 ltp.setScript(script); // clear;
                 for (String region : regions) {
                     ltp.setRegion(region);
                     probeData.add(ltp.toString());
                 }
             }

             // see if the omission of a <key,value> makes no difference

             String omittableKey = null;

             for (String keyToTryOmitting : goldenMap.keySet()) {
                 if (!keyToTryOmitting.contains("_")) {
                     continue;
                 }
                 TreeMap<String, String> mapWithOmittedKey = new TreeMap<>(goldenMap);
                 mapWithOmittedKey.remove(keyToTryOmitting);

                 boolean makesADifference = false;
                 for (String probe : probeData) {
                     String expected = LikelySubtags.maximize(probe, goldenMap);
                     String actual = LikelySubtags.maximize(probe, mapWithOmittedKey);
                     if (!Objects.equal(expected, actual)) {
                         makesADifference = true;
                         break;
                     }
                 }
                 if (!makesADifference) {
                     omittableKey = keyToTryOmitting;
                     break;
                 }
             }

             // show the value that doesn't make a difference
             // NOTE: there may be more than one, but it is sufficient to find one.
             if (omittableKey != null) {
                 final String origin = origins.get(omittableKey);
                 if (origin != null) { // only check the non-sil for now
                     logKnownIssue("CLDR-17084", "Remove superfluous lines in likelySubtags.txt");
                     continue;
                 }
                 if (first) {
                     warnln("\tMaps\tKey to omit\tvalue\torigin");
                     first = false;
                 }
                 assertFalse(
                         "\t"
                                 + goldenMap
                                 + "\t"
                                 + omittableKey
                                 + "\t"
                                 + goldenMap.get(omittableKey)
                                 + "\t"
                                 + (origin == null ? "" : origin)
                                 + "\t",
                         true);
             }
         }
     }
 }