| package org.unicode.cldr.unittest; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.EnumSet; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashSet; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| |
| import javax.xml.xpath.XPathException; |
| |
| import org.unicode.cldr.test.ExampleGenerator; |
| import org.unicode.cldr.util.AttributeValueValidity; |
| import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; |
| import org.unicode.cldr.util.CLDRConfig; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.Iso639Data; |
| import org.unicode.cldr.util.Iso639Data.Scope; |
| import org.unicode.cldr.util.Iso639Data.Type; |
| import org.unicode.cldr.util.LanguageTagCanonicalizer; |
| import org.unicode.cldr.util.LanguageTagParser; |
| import org.unicode.cldr.util.LanguageTagParser.Format; |
| import org.unicode.cldr.util.SimpleXMLSource; |
| import org.unicode.cldr.util.StandardCodes.CodeType; |
| import org.unicode.cldr.util.SupplementalDataInfo; |
| import org.unicode.cldr.util.XPathExpressionParser; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.Node; |
| |
| import com.google.common.collect.ImmutableSet; |
| import com.ibm.icu.dev.util.CollectionUtilities; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.impl.Row; |
| import com.ibm.icu.impl.Row.R2; |
| |
| public class TestLocale extends TestFmwkPlus { |
| static CLDRConfig testInfo = CLDRConfig.getInstance(); |
| private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = testInfo.getSupplementalDataInfo(); |
| |
| public static void main(String[] args) { |
| new TestLocale().run(args); |
| } |
| |
| static Set<Type> ALLOWED_LANGUAGE_TYPES = EnumSet.of(Type.Ancient, |
| Type.Living, Type.Constructed, Type.Historical, Type.Extinct, Type.Special); |
| static Set<Scope> ALLOWED_LANGUAGE_SCOPES = EnumSet.of(Scope.Individual, |
| Scope.Macrolanguage, Scope.Special); // , Special, Collection, PrivateUse, Unknown |
| static Set<String> ALLOWED_SCRIPTS = testInfo.getStandardCodes() |
| .getGoodAvailableCodes(CodeType.script); |
| static Set<String> ALLOWED_REGIONS = testInfo.getStandardCodes() |
| .getGoodAvailableCodes(CodeType.territory); |
| |
| /** |
| * XPath expression that will find all alias tags |
| */ |
| static String XPATH_ALIAS_STRING = "//alias"; |
| |
| public void TestLanguageRegions() { |
| Set<String> missingLanguageRegion = new LinkedHashSet<String>(); |
| // TODO This should be derived from metadata: https://unicode.org/cldr/trac/ticket/11224 |
| Set<String> knownMultiScriptLanguages = new HashSet<String>(Arrays.asList("az", "ff", "bs", "pa", "shi", "sr", "vai", "uz", "yue", "zh")); |
| Set<String> available = testInfo.getCldrFactory().getAvailable(); |
| LanguageTagParser ltp = new LanguageTagParser(); |
| Set<String> defaultContents = SUPPLEMENTAL_DATA_INFO |
| .getDefaultContentLocales(); |
| for (String locale : available) { |
| String base = ltp.set(locale).getLanguage(); |
| String script = ltp.getScript(); |
| String region = ltp.getRegion(); |
| if (script.isEmpty()) { |
| continue; |
| } |
| ltp.setRegion(""); |
| String baseScript = ltp.toString(); |
| if (!knownMultiScriptLanguages.contains(base)) { |
| assertFalse("Should NOT have " + locale, |
| defaultContents.contains(baseScript)); |
| } |
| if (region.isEmpty()) { |
| continue; |
| } |
| ltp.setScript(""); |
| ltp.setRegion(region); |
| String baseRegion = ltp.toString(); |
| if (knownMultiScriptLanguages.contains(base)) { |
| continue; |
| } |
| if (!missingLanguageRegion.contains(baseRegion) |
| && !assertTrue("Should have " + baseRegion, |
| available.contains(baseRegion))) { |
| missingLanguageRegion.add(baseRegion); |
| } |
| } |
| } |
| |
| /** |
| * Determine whether the file should be checked for aliases; this is |
| * currently not done for Keyboard definitions or DTD's |
| * |
| * @param f |
| * the file to check |
| * @return |
| */ |
| protected boolean shouldCheckForAliases(File f) { |
| if (!f.canRead()) { |
| return false; |
| } |
| String absPath = f.getAbsolutePath(); |
| return absPath.endsWith("xml") && !absPath.contains("dtd") |
| && !absPath.contains("keyboard") |
| && !absPath.contains("Keyboard"); |
| } |
| |
| /** |
| * Check a single file for aliases, on a content level, the only check that |
| * is done is that the one for readability. |
| * |
| * @param localeName |
| * - the localename |
| * @param file |
| * - the file to check |
| * @param localesWithAliases |
| * - a set of locale strings the files of which contain aliases |
| */ |
| private void checkForAliases(final String localeName, File file, |
| final Set<String> localesWithAliases) { |
| try { |
| if (file.canRead()) { |
| XPathExpressionParser parser = new XPathExpressionParser(file); |
| parser.iterateThroughNodeSet(XPATH_ALIAS_STRING, |
| new XPathExpressionParser.NodeHandlingInterface() { |
| |
| // Handle gets called for every node of the node set |
| @Override |
| public void handle(Node result) { |
| if (result instanceof Element) { |
| Element el = (Element) result; |
| // this node likely has an attribute source |
| if (el.hasAttributes()) { |
| String sourceAttr = el |
| .getAttribute("source"); |
| if (sourceAttr != null |
| && !sourceAttr.isEmpty()) { |
| localesWithAliases.add(localeName); |
| } |
| } |
| } |
| } |
| }); |
| } |
| } catch (IOException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } catch (XPathException e) { |
| // TODO Auto-generated catch block |
| e.printStackTrace(); |
| } |
| } |
| |
| /** |
| * Tests the validity of the file names and of the English localeDisplayName |
| * types. Also tests for aliases outside root |
| */ |
| public void TestLocalePartsValidity() { |
| LanguageTagParser ltp = new LanguageTagParser(); |
| final Set<String> localesWithAliases = new HashSet<>(); |
| for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith( |
| ".xml")) { |
| String parent = file.getParent(); |
| if (parent.contains("transform") |
| || parent.contains("bcp47") |
| || parent.contains("supplemental") |
| || parent.contains("validity")) { |
| continue; |
| } |
| String localeName = file.getName(); |
| localeName = localeName.substring(0, localeName.length() - 4); // remove |
| // .xml |
| if (localeName.equals("root") || localeName.equals("_platform")) { |
| continue; |
| } |
| String fileString = file.toString(); |
| checkLocale(fileString, localeName, ltp); |
| // check for aliases |
| if (shouldCheckForAliases(file)) { |
| checkForAliases(localeName, file, localesWithAliases); |
| } |
| } |
| // we ran through all of them |
| if (!localesWithAliases.isEmpty()) { |
| StringBuilder sb = new StringBuilder(); |
| sb.append("\r\n"); |
| sb.append("The following locales have aliases, but must not: "); |
| Iterator<String> lIter = localesWithAliases.iterator(); |
| while (lIter.hasNext()) { |
| sb.append(lIter.next()); |
| sb.append(" "); |
| } |
| System.out.println(sb.toString()); |
| } |
| // now check English-resolved |
| CLDRFile english = testInfo.getEnglish(); |
| for (String xpath : english) { |
| if (!xpath.startsWith("//ldml/localeDisplayNames/")) { |
| continue; |
| } |
| switch (CLDRFile.getNameType(xpath)) { |
| case 0: |
| checkLocale("English xpath", CLDRFile.getCode(xpath), ltp); |
| break; |
| case 1: |
| checkScript("English xpath", CLDRFile.getCode(xpath)); |
| break; |
| case 2: |
| checkRegion("English xpath", CLDRFile.getCode(xpath)); |
| break; |
| } |
| } |
| } |
| |
| public void checkLocale(String fileString, String localeName, |
| LanguageTagParser ltp) { |
| ltp.set(localeName); |
| checkLanguage(fileString, ltp.getLanguage()); |
| checkScript(fileString, ltp.getScript()); |
| checkRegion(fileString, ltp.getRegion()); |
| } |
| |
| public void checkRegion(String file, String region) { |
| if (!region.isEmpty() && !region.equals("AN") |
| && !region.equals("XA") && !region.equals("XB")) { |
| assertRelation("Region ok? " + region + " in " + file, true, |
| ALLOWED_REGIONS, TestFmwkPlus.CONTAINS, region); |
| } |
| } |
| |
| final MatcherPattern SCRIPT_NON_UNICODE = AttributeValueValidity.getMatcherPattern("$scriptNonUnicode"); |
| |
| public void checkScript(String file, String script) { |
| if (!script.isEmpty()) { |
| if (!ALLOWED_SCRIPTS.contains(script) && SCRIPT_NON_UNICODE.matches(script, null)) { |
| return; |
| } |
| assertRelation("Script ok? " + script + " in " + file, true, |
| ALLOWED_SCRIPTS, TestFmwkPlus.CONTAINS, script); |
| } |
| } |
| |
| public void checkLanguage(String file, String language) { |
| if (!language.equals("root")) { |
| Scope scope = Iso639Data.getScope(language); |
| if (assertRelation("Language ok? " + language + " in " + file, |
| true, ALLOWED_LANGUAGE_SCOPES, TestFmwkPlus.CONTAINS, scope)) { |
| Type type = Iso639Data.getType(language); |
| assertRelation("Language ok? " + language + " in " + file, |
| true, ALLOWED_LANGUAGE_TYPES, TestFmwkPlus.CONTAINS, |
| type); |
| } |
| } |
| } |
| |
| public void TestConsistency() { |
| LanguageTagParser ltp = new LanguageTagParser(); |
| SupplementalDataInfo supplementalDataInfo = SUPPLEMENTAL_DATA_INFO; |
| Set<String> defaultContentLocales = supplementalDataInfo |
| .getDefaultContentLocales(); |
| Map<String, String> likelySubtags = supplementalDataInfo |
| .getLikelySubtags(); |
| |
| for (String locale : testInfo.getCldrFactory().getAvailable()) { |
| if (locale.equals("root")) { |
| continue; |
| } |
| ltp.set(locale); |
| boolean isDefaultContent = defaultContentLocales.contains(locale); |
| boolean hasScript = !ltp.getScript().isEmpty(); |
| boolean hasRegion = !ltp.getRegion().isEmpty(); |
| String language = ltp.getLanguage(); |
| String maximized = likelySubtags.get(language); |
| boolean hasLikelySubtag = maximized != null; |
| |
| // verify that the parent locales are consistent with the default |
| // locales, for scripts |
| // that is, if zh-Hant has a parent of root, then it is not the |
| // default content locale, and vice versa |
| |
| if (hasScript && !hasRegion) { |
| boolean parentIsRoot = "root".equals(supplementalDataInfo |
| .getExplicitParentLocale(locale)); |
| if (parentIsRoot == isDefaultContent) { |
| errln("Inconsistency between parentLocales and defaultContents: " |
| + locale |
| + (parentIsRoot ? " +" : " -") |
| + "parentIsRoot" |
| + (isDefaultContent ? " +" : " -") |
| + "isDefaultContent"); |
| } |
| |
| // we'd better have a separate likelySubtag |
| if (parentIsRoot && !hasLikelySubtag) { |
| errln("Missing likely subtags for: " + locale + " " |
| + TestInheritance.suggestLikelySubtagFor(locale)); |
| } |
| } |
| |
| // verify that likelySubtags has all the languages |
| |
| if (!hasScript && !hasRegion) { |
| if (!hasLikelySubtag) { |
| errln("Missing likely subtags for: " + locale + " " |
| + TestInheritance.suggestLikelySubtagFor(locale)); |
| } |
| } |
| } |
| } |
| |
| public void TestCanonicalizer() { |
| LanguageTagCanonicalizer canonicalizer = new LanguageTagCanonicalizer(); |
| String[][] tests = { { "iw", "he" }, { "no-YU", "nb_RS" }, |
| { "no", "nb" }, { "eng-833", "en_IM" }, { "mo", "ro" }, |
| { "mo_Cyrl", "ro_Cyrl" }, { "mo_US", "ro_US" }, |
| { "mo_Cyrl_US", "ro_Cyrl_US" }, { "sh", "sr_Latn" }, |
| { "sh_US", "sr_Latn_US" }, { "sh_Cyrl", "sr" }, |
| { "sh_Cyrl_US", "sr_US" }, { "hy_SU", "hy" }, |
| { "hy_AM", "hy" }, { "en_SU", "en_RU" }, |
| { "rO-cYrl-aQ", "ro_Cyrl_AQ" }, }; |
| for (String[] pair : tests) { |
| String actual = canonicalizer.transform(pair[0]); |
| assertEquals("Canonical", pair[1], actual); |
| } |
| } |
| |
| public void TestBrackets() { |
| String[][] tests = { |
| { |
| "language", |
| "en", |
| "Anglish (abc)", |
| "en", |
| "Anglish [abc]", |
| "〖?Anglish [abc]?❬ (U.S. [ghi])❭〗〖?Anglish [abc]?❬ (Latine [def])❭〗〖?Anglish [abc]?❬ (Latine [def], U.S. [ghi])❭〗〖❬Langue: ❭?Anglish (abc)?〗" }, |
| { |
| "script", |
| "Latn", |
| "Latine (def)", |
| "en_Latn", |
| "Anglish [abc] (Latine [def])", |
| "〖❬Anglish [abc] (❭?Latine [def]?❬)❭〗〖❬Anglish [abc] (❭?Latine [def]?❬, U.S. [ghi])❭〗〖❬Scripte: ❭?Latine (def)?〗" }, |
| { |
| "territory", |
| "US", |
| "U.S. (ghi)", |
| "en_Latn_US", |
| "Anglish [abc] (Latine [def], U.S. [ghi])", |
| "〖❬Anglish [abc] (❭?U.S. [ghi]?❬)❭〗〖❬Anglish [abc] (Latine [def], ❭?U.S. [ghi]?❬)❭〗〖❬Territorie: ❭?U.S. (ghi)?〗" }, |
| { null, null, null, "en_US", "Anglish [abc] (U.S. [ghi])", null }, |
| { "variant", "FOOBAR", "foo (jkl)", "en_foobar", "Anglish [abc] (foo [jkl])", null }, |
| { "key", "co", "sort (mno)", "en_foobar@co=FOO", "Anglish [abc] (foo [jkl], sort [mno]=foo)", null }, |
| { "key|type", "co|fii", "sortfii (mno)", "en_foobar@co=FII", "Anglish [abc] (foo [jkl], sortfii [mno])", null }, }; |
| // load up a dummy source |
| SimpleXMLSource dxs = new SimpleXMLSource("xx"); |
| for (String[] row : tests) { |
| if (row[0] == null) { |
| continue; |
| } |
| int typeCode = CLDRFile.typeNameToCode(row[0]); |
| String path = CLDRFile.getKey(typeCode, row[1]); |
| dxs.putValueAtDPath(path, row[2]); |
| } |
| // create a cldrfile from it and test |
| SimpleXMLSource root = new SimpleXMLSource("root"); |
| root.putValueAtDPath( |
| "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", |
| "{0} ({1})"); |
| root.putValueAtDPath( |
| "//ldml/localeDisplayNames/localeDisplayPattern/localeSeparator", |
| "{0}, {1}"); |
| root.putValueAtDPath( |
| "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"language\"]", |
| "Langue: {0}"); |
| root.putValueAtDPath( |
| "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"script\"]", |
| "Scripte: {0}"); |
| root.putValueAtDPath( |
| "//ldml/localeDisplayNames/codePatterns/codePattern[@type=\"territory\"]", |
| "Territorie: {0}"); |
| CLDRFile f = new CLDRFile(dxs, root); |
| ExampleGenerator eg = new ExampleGenerator(f, testInfo.getEnglish(), |
| CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); |
| for (String[] row : tests) { |
| if (row[0] != null) { |
| int typeCode = CLDRFile.typeNameToCode(row[0]); |
| String standAlone = f.getName(typeCode, row[1]); |
| if (!assertEquals("stand-alone " + row[3], row[2], standAlone)) { |
| typeCode = CLDRFile.typeNameToCode(row[0]); |
| standAlone = f.getName(typeCode, row[1]); |
| } |
| ; |
| if (row[5] != null) { |
| String path = CLDRFile.getKey(typeCode, row[1]); |
| String example = eg |
| .getExampleHtml(path, "?" + row[2] + "?"); |
| assertEquals("example " + row[3], row[5], |
| ExampleGenerator.simplify(example)); |
| } |
| } |
| String displayName = f.getName(row[3], true, "{0}={1}", |
| "{0} ({1})", "{0}, {1}"); |
| assertEquals("locale " + row[3], row[4], displayName); |
| } |
| } |
| |
| public void TestLocaleNamePattern() { |
| assertEquals("Locale name", "Chinese", |
| testInfo.getEnglish().getName("zh")); |
| assertEquals("Locale name", "Chinese (United States)", testInfo |
| .getEnglish().getName("zh-US")); |
| assertEquals("Locale name", "Chinese (Arabic, United States)", testInfo |
| .getEnglish().getName("zh-Arab-US")); |
| CLDRFile japanese = testInfo.getCLDRFile("ja", true); |
| assertEquals("Locale name", "中国語", japanese.getName("zh")); |
| assertEquals("Locale name", "中国語 (アメリカ合衆国)", japanese.getName("zh-US")); |
| assertEquals("Locale name", "中国語 (アラビア文字\u3001アメリカ合衆国)", |
| japanese.getName("zh-Arab-US")); |
| } |
| |
| public void TestExtendedLanguage() { |
| assertEquals("Extended language translation", "Simplified Chinese", |
| testInfo.getEnglish().getName("zh_Hans")); |
| assertEquals("Extended language translation", |
| "Simplified Chinese (Singapore)", testInfo.getEnglish() |
| .getName("zh_Hans_SG")); |
| assertEquals("Extended language translation", "American English", |
| testInfo.getEnglish().getName("en-US")); |
| assertEquals("Extended language translation", |
| "American English (Arabic)", |
| testInfo.getEnglish().getName("en-Arab-US")); |
| } |
| |
| public void testAllVariants() { |
| Relation<String, String> extensionToKeys = SUPPLEMENTAL_DATA_INFO.getBcp47Extension2Keys(); |
| Relation<String, String> keyToValues = SUPPLEMENTAL_DATA_INFO.getBcp47Keys(); |
| Map<R2<String, String>, String> extKeyToDeprecated = SUPPLEMENTAL_DATA_INFO.getBcp47Deprecated(); |
| Map<String, String> keyToValueType = SUPPLEMENTAL_DATA_INFO.getBcp47ValueType(); |
| LanguageTagParser ltp = new LanguageTagParser(); |
| String lastKey = ""; |
| CLDRFile english = testInfo.getEnglish(); |
| |
| String extName = english.getKeyName("t"); // special case where we need name |
| assertNotNull("Name of extension: " + "t", extName); |
| |
| Set<String> allowedNoKeyValueNameSet = ImmutableSet.of("cu", "tz"); |
| |
| main: |
| for (Entry<String, String> entry : extensionToKeys.entrySet()) { |
| String extension = entry.getKey(); |
| String key = entry.getValue(); |
| |
| String dep = extKeyToDeprecated.get(Row.of(key, "")); |
| if ("true".equals(dep)) { |
| logln("# Deprecated: " + Row.of(extension, key)); |
| // # MULTIPLE: [u, vt, CODEPOINTS] |
| continue; |
| } |
| |
| boolean allowedNoKeyValueName = allowedNoKeyValueNameSet.contains(key); |
| |
| String kname = english.getKeyName(key); |
| assertNotNull("Name of key: " + key, kname); |
| |
| //System.out.println("\n#Key: " + key + (kname == null ? "" : " (" + kname + ")")); |
| |
| // if (extension.equals("t")) { |
| // System.out.println("skipping -t- for now: " + key); |
| // continue; |
| // } |
| boolean isMultiple = "multiple".equals(keyToValueType.get(key)); // single | multiple | incremental | any |
| |
| Set<String> values = keyToValues.get(key); |
| String lastValue = null; |
| int count = 0; |
| for (String value : values) { |
| |
| dep = extKeyToDeprecated.get(Row.of(key, value)); |
| if ("true".equals(dep)) { |
| logln("# Deprecated: " + Row.of(extension, key)); |
| // # MULTIPLE: [u, vt, CODEPOINTS] |
| continue; |
| } |
| |
| boolean specialValue = value.equals(value.toUpperCase(Locale.ROOT)); |
| |
| String kvname = english.getKeyValueName(key, value); |
| if (!allowedNoKeyValueName && !specialValue) { |
| assertNotNull("Name of <" + key + "," + value + ">", kvname); |
| } else { |
| // logln("Name of <" + key + "," + value + ">" + " = " + kvname); |
| } |
| |
| //System.out.println("\n#Value: " + value + (kname == null ? "" : " (" + kvname + ")")); |
| |
| |
| String gorp = key.equals(lastKey) ? "" : |
| (key.equals("t") ? "-u-ca-persian" : "-t-hi") |
| + "-a-AA-v-VV-y-YY-x-foobar"; |
| |
| lastKey = key; |
| if (++count > 4) { |
| continue; |
| } |
| |
| if (specialValue) { |
| Set<String> valuesSet; |
| switch (value) { |
| case "PRIVATE_USE": // [t, x0, PRIVATE_USE] |
| valuesSet = ImmutableSet.of("foobar2"); |
| continue main; |
| case "REORDER_CODE": // [u, kr, REORDER_CODE] |
| valuesSet = ImmutableSet.of("digit", "sample"); |
| break; |
| case "RG_KEY_VALUE": // [u, rg, RG_KEY_VALUE] |
| valuesSet = ImmutableSet.of("usca"); |
| break; |
| case "SUBDIVISION_CODE": // [u, sd, SUBDIVISION_CODE] |
| valuesSet = ImmutableSet.of("usca", "gbsct", "frnor"); |
| break; |
| default: |
| throw new IllegalArgumentException(); |
| } |
| showItem(ltp, extension, key, gorp, valuesSet.toArray(new String[valuesSet.size()])); |
| |
| continue; |
| } |
| showItem(ltp, extension, key, gorp, value); |
| if (isMultiple) { |
| if (lastValue != null) { |
| showItem(ltp, extension, key, gorp, value, lastValue); |
| } else { |
| lastValue = value; |
| } |
| } |
| } |
| } |
| } |
| |
| private void showItem(LanguageTagParser ltp, String extension, String key, String gorp, String... values) { |
| |
| String locale = "en-GB-" + extension + (extension.equals("t") ? "-hi" : "") |
| + "-" + key + "-" + CollectionUtilities.join(values, "-") + gorp; |
| ltp.set(locale); |
| |
| logln(ltp.toString(Format.bcp47) |
| + " == " + ltp.toString(Format.icu) |
| + "\n\t\tstructure:\t" + ltp.toString(Format.structure)); |
| try { |
| String name = testInfo.getEnglish().getName(locale); |
| logln("\tname:\t" + name); |
| } catch (Exception e) { |
| errln("Name for " + locale + "; " + e.getMessage()); |
| e.printStackTrace(); |
| } |
| } |
| } |