blob: e2d6dc774e98327bcc044555b95ab4197eb37b55 [file] [log] [blame]
package org.unicode.cldr.unittest;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Row.R4;
import com.ibm.icu.util.LocaleMatcher;
import com.ibm.icu.util.LocaleMatcher.LanguageMatcherData;
import com.ibm.icu.util.LocalePriorityList;
import com.ibm.icu.util.ULocale;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.LocaleNames;
import org.unicode.cldr.util.Pair;
public class LanguageInfoTest extends TestFmwk {
static CLDRConfig testInfo = CLDRConfig.getInstance();
static LanguageMatcherData data = LocaleMatcherTest.LANGUAGE_MATCHER_DATA;
static Map<ULocale, ULocale> FALLBACKS = new LinkedHashMap<>();
// @Override
// protected void init() throws Exception {
// super.init();
// SupplementalDataInfo supp = testInfo.getSupplementalDataInfo();
// List<R4<String, String, Integer, Boolean>> languageData = supp
// .getLanguageMatcherData("written");
// for (R4<String, String, Integer, Boolean> item : languageData) {
// data.addDistance(item.get0().replace('_', '-'), item.get1()
// .replace('_', '-'), item.get2(), item.get3());
// logln(item.get0() + "\t" + getName(item.get0()) + "\t"
// + item.get1() + "\t" + getName(item.get1()) + "\t"
// + item.get2() + "\t" + item.get3());
// if (item.get2() == 10) {
// FALLBACKS.put(new ULocale(item.get0()),
// new ULocale(item.get1()));
// }
// }
// data.freeze();
// }
public void testGetData() {
Set<Pair<String, String>> alreadySeen = new HashSet<>();
for (R4<String, String, Integer, Boolean> foo :
testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) {
// assertTrue("check bounds", foo.get2() >= 0 && foo.get2() <= 100);
String desired = foo.get0();
String supported = foo.get1();
Integer score = foo.get2();
Boolean oneway = foo.get3();
assertEquals("Same number of fields", count('_', desired), count('_', supported));
Pair<String, String> source = Pair.of(desired, supported);
if (alreadySeen.contains(source)) {
errln("Duplicate entry for " + source);
continue;
}
alreadySeen.add(source);
logln(
score
+ "\t"
+ desired
+ "\t"
+ getName(desired)
+ "\t"
+ supported
+ "\t"
+ getName(supported)
+ "\t"
+ oneway);
}
}
private int count(char c, String string) {
int count = 0;
int pos = string.indexOf(c);
while (pos >= 0) {
++count;
pos = string.indexOf(c, pos + 1);
}
return count;
}
public static String getName(String item) {
return item.contains("*")
? "n/a"
: item.contains("$") ? item : testInfo.getEnglish().getName(item);
}
public static void main(String[] args) {
new LanguageInfoTest().run(args);
}
public void testBasics() {
if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
return;
}
final LocaleMatcher matcher =
new LocaleMatcher(
LocalePriorityList.add(ULocale.FRENCH)
.add(ULocale.UK)
.add(ULocale.ENGLISH)
.build(),
data);
logln(matcher.toString());
assertEquals("UK in FR, UK, EN", ULocale.UK, matcher.getBestMatch(ULocale.UK));
assertEquals("US in FR, UK, EN", ULocale.ENGLISH, matcher.getBestMatch(ULocale.US));
assertEquals("FR in FR, UK, EN", ULocale.FRENCH, matcher.getBestMatch(ULocale.FRANCE));
assertEquals("JA in FR, UK, EN", ULocale.FRENCH, matcher.getBestMatch(ULocale.JAPAN));
}
public void TestChinese() {
// if (logKnownIssue("Cldrbug:8169",
// "Problems with language matcher TestChinese.")) {
// return;
// }
if (logKnownIssue("CLDR-14166", "Skip until CLDR updated for new ICU4J LocaleMatcher")) {
return;
}
LocaleMatcher matcher =
new LocaleMatcher(LocalePriorityList.add("zh_CN, zh_TW, iw").build(), data);
ULocale taiwanChinese = new ULocale("zh_TW");
ULocale chinaChinese = new ULocale("zh_CN");
assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant_HK"));
assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant_TW"));
assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_Hant"));
assertEquals("zh_CN, zh_TW, iw;", taiwanChinese, matcher.getBestMatch("zh_TW"));
assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh_Hans_CN"));
assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh_CN"));
assertEquals("zh_CN, zh_TW, iw;", chinaChinese, matcher.getBestMatch("zh"));
}
static final ULocale MUL = new ULocale(LocaleNames.MUL);
public void testFallbacks() {
if (logKnownIssue("CLDR-17189", "waiting on LocaleMatcherData update")) {
return;
}
for (R4<String, String, Integer, Boolean> foo :
testInfo.getSupplementalDataInfo().getLanguageMatcherData("written_new")) {
String rawDesired = foo.get0();
if (rawDesired.contains("*")) {
continue;
}
if (rawDesired.equals("tlh")) {
if (logKnownIssue("cldrbug:8919", "Hack until tlh has likely subtags")) {
continue;
}
}
ULocale desired = new ULocale(rawDesired);
ULocale supported = new ULocale(foo.get1());
Integer score = foo.get2();
Boolean oneway = foo.get3();
if (!oneway) {
continue;
}
// we put LocaleNames.MUL first in the list, to verify that the fallback works enough to
// be better than the default.
@SuppressWarnings("deprecation")
final LocaleMatcher matcher =
new LocaleMatcher(LocalePriorityList.add(MUL).add(supported).build(), data);
ULocale bestMatch = matcher.getBestMatch(desired);
if (!assertEquals("fallback for " + desired + ", " + score, supported, bestMatch)) {
ULocale max = ULocale.addLikelySubtags(desired);
warnln(
"Might be missing something like\n"
+ "<languageMatch desired=\""
+ desired.getLanguage()
+ "_"
+ max.getScript()
+ "\" supported=\"en_Latn\" percent=\"90\" oneway=\"true\" />");
bestMatch = matcher.getBestMatch(desired); // for debugging
}
}
}
}