blob: 2bd546da1cee9fbe9d75e04e6d091de1274b9d3c [file] [log] [blame]
package org.unicode.cldr.unittest;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Pattern;
import org.unicode.cldr.util.Annotations;
import org.unicode.cldr.util.Annotations.AnnotationSet;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.Emoji;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.SimpleFactory;
import org.unicode.cldr.util.XListFormatter;
import org.unicode.cldr.util.XListFormatter.ListTypeLength;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.dev.util.CollectionUtilities;
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.dev.util.UnicodeMap.EntryRange;
import com.ibm.icu.text.UnicodeSet;
public class TestAnnotations extends TestFmwkPlus {
private static final boolean SHOW_LIST = false;
private static final boolean SHOW_ENGLISH = false;
public static void main(String[] args) {
new TestAnnotations().run(args);
}
enum Containment {
contains, empty, not_contains
}
public void TestBasic() {
String[][] tests = {
{ "en", "[\u2650]", "contains", "sagitarius", "zodiac" },
{ "en", "[\u0020]", "empty" },
{ "en", "[\u2651]", "not_contains", "foobar" },
};
for (String[] test : tests) {
UnicodeMap<Annotations> data = Annotations.getData(test[0]);
UnicodeSet us = new UnicodeSet(test[1]);
Set<String> annotations = new LinkedHashSet<>();
Containment contains = Containment.valueOf(test[2]);
for (int i = 3; i < test.length; ++i) {
annotations.add(test[i]);
}
for (String s : us) {
Set<String> set = data.get(s).getKeywords();
if (set == null) {
set = Collections.emptySet();
}
switch (contains) {
case contains:
if (Collections.disjoint(set, annotations)) {
LinkedHashSet<String> temp = new LinkedHashSet<>(annotations);
temp.removeAll(set);
assertEquals("Missing items", Collections.EMPTY_SET, temp);
}
break;
case not_contains:
if (!Collections.disjoint(set, annotations)) {
LinkedHashSet<String> temp = new LinkedHashSet<>(annotations);
temp.retainAll(set);
assertEquals("Extra items", Collections.EMPTY_SET, temp);
}
break;
case empty:
assertEquals("mismatch", Collections.emptySet(), set);
break;
}
}
}
}
public void TestList() {
if (!SHOW_LIST) {
return;
}
if (isVerbose()) {
for (String locale : Annotations.getAvailable()) {
for (EntryRange<Annotations> s : Annotations.getData(locale).entryRanges()) {
logln(s.toString());
}
}
}
}
public void TestNames() {
AnnotationSet eng = Annotations.getDataSet("en");
String[][] tests = { // the expected value for keywords can use , as well as |.
{"👨🏻", "man: light skin tone", "adult | man | light skin tone"},
{"👱‍♂️", "man: blond hair", "blond, blond-haired man, hair, man, man: blond hair"},
{"👱🏻‍♂️", "man: light skin tone, blond hair", "blond, blond-haired man, hair, man, man: blond hair, light skin tone, blond hair"},
{"👨‍🦰", "man: red hair", "adult | man | red hair"},
{ "👨🏻‍🦰", "man: light skin tone, red hair", "adult | man | light skin tone| red hair"},
{ "🇪🇺", "flag: European Union", "flag" },
{ "#️⃣", "keycap: #", "keycap" },
{ "9️⃣", "keycap: 9", "keycap" },
{ "💏", "kiss", "couple | kiss" },
{ "👩‍❤️‍💋‍👩", "kiss: woman, woman", "couple | kiss | woman" },
{ "💑", "couple with heart", "couple | couple with heart | love" },
{ "👩‍❤️‍👩", "couple with heart: woman, woman", "couple | couple with heart | love | woman" },
{ "👪", "family", "family" },
{ "👩‍👩‍👧", "family: woman, woman, girl", "family | woman | girl" },
{ "👦🏻", "boy: light skin tone", "boy | young | light skin tone" },
{ "👩🏿", "woman: dark skin tone", "adult | woman | dark skin tone" },
{ "👨‍⚖", "man judge", "justice | man | man judge | scales" },
{ "👨🏿‍⚖", "man judge: dark skin tone", "justice | man | man judge | scales | dark skin tone" },
{ "👩‍⚖", "woman judge", "judge | scales | woman" },
{ "👩🏼‍⚖", "woman judge: medium-light skin tone", "judge | scales | woman | medium-light skin tone" },
{ "👮", "police officer", "cop | officer | police" },
{ "👮🏿", "police officer: dark skin tone", "cop | officer | police | dark skin tone" },
{ "👮‍♂️", "man police officer", "cop | man | officer | police" },
{ "👮🏼‍♂️", "man police officer: medium-light skin tone", "cop | man | officer | police | medium-light skin tone" },
{ "👮‍♀️", "woman police officer", "cop | officer | police | woman" },
{ "👮🏿‍♀️", "woman police officer: dark skin tone", "cop | officer | police | woman | dark skin tone" },
{ "🚴", "person biking", "bicycle | biking | cyclist | person biking" },
{ "🚴🏿", "person biking: dark skin tone", "bicycle | biking | cyclist | person biking | dark skin tone" },
{ "🚴‍♂️", "man biking", "bicycle | biking | cyclist | man" },
{ "🚴🏿‍♂️", "man biking: dark skin tone", "bicycle | biking | cyclist | man | dark skin tone" },
{ "🚴‍♀️", "woman biking", "bicycle | biking | cyclist | woman" },
{ "🚴🏿‍♀️", "woman biking: dark skin tone", "bicycle | biking | cyclist | woman | dark skin tone" },
};
Splitter BAR = Splitter.on(CharMatcher.anyOf("|,")).trimResults();
boolean ok = true;
for (String[] test : tests) {
String emoji = test[0];
String expectedName = test[1];
Set<String> expectedKeywords = new HashSet<>(BAR.splitToList(test[2]));
final String shortName = eng.getShortName(emoji);
final Set<String> keywords = eng.getKeywords(emoji);
ok &= assertEquals("short name for " + emoji, expectedName, shortName);
ok &= assertEquals("keywords for " + emoji, expectedKeywords, keywords);
}
if (!ok) {
System.out.println("Possible replacement, but check");
for (String[] test : tests) {
String emoji = test[0];
final String shortName = eng.getShortName(emoji);
final Set<String> keywords = eng.getKeywords(emoji);
System.out.println("{\"" + emoji
+ "\",\"" + shortName
+ "\",\"" + CollectionUtilities.join(keywords, " | ")
+ "\"},");
}
}
}
// comment this out, since we now have console check for this.
public void TestUniqueness() {
// if (logKnownIssue("cldrbug:10104", "Disable until the uniqueness problems are fixed")) {
// return;
// }
Set<String> locales = new TreeSet<>();
locales.add("en");
locales.addAll(Annotations.getAvailable());
locales.remove("root");
// if (getInclusion() < 6) {
// locales.retainAll(CLDRConfig.getInstance().getStandardCodes().getLocaleCoverageLocales(Organization.cldr));
// }
//locales.remove("sr_Latn");
Multimap<String, String> localeToNameToEmoji = TreeMultimap.create();
Multimap<String, String> nameToEmoji = TreeMultimap.create();
UnicodeMap<Annotations> english = Annotations.getData("en");
AnnotationSet englishSet = Annotations.getDataSet("en");
UnicodeSet englishKeys = getCurrent(english.keySet());
Map<String, UnicodeSet> localeToMissing = new TreeMap<>();
for (String locale : locales) {
logln("uniqueness: " + locale);
AnnotationSet data = Annotations.getDataSet(locale);
nameToEmoji.clear();
localeToMissing.put(locale, new UnicodeSet(englishKeys).removeAll(data.keySet()).freeze());
for (String emoji : Emoji.getAllRgi()) { // Entry<String, Annotations> value : data.entrySet()) {
String name = data.getShortName(emoji);
if (name == null) {
continue;
}
nameToEmoji.put(name, emoji);
}
for (Entry<String, Collection<String>> entry : nameToEmoji.asMap().entrySet()) {
String name = entry.getKey();
Collection<String> emojis = entry.getValue();
if (emojis.size() > 1) {
errln("Duplicate name in " + locale + ": “" + name + "” for "
+ CollectionUtilities.join(emojis, " & "));
localeToNameToEmoji.putAll(locale + "\t" + name, emojis);
}
}
}
if (isVerbose() && !localeToNameToEmoji.isEmpty()) {
System.out.println("\nCollisions");
for (Entry<String, String> entry : localeToNameToEmoji.entries()) {
String locale = entry.getKey();
String emoji = entry.getValue();
System.out.println(locale
+ "\t" + englishSet.getShortName(emoji)
+ "\t" + emoji);
}
}
if (SHOW_LIST && !localeToMissing.isEmpty()) {
System.out.println("\nMissing");
int count = 2;
for (Entry<String, UnicodeSet> entry : localeToMissing.entrySet()) {
String locale = entry.getKey();
for (String emoji : entry.getValue()) {
System.out.println(locale
+ "\t" + emoji
+ "\t" + englishSet.getShortName(emoji)
+ "\t" + "=GOOGLETRANSLATE(C" + count + ",\"en\",A" + count + ")"
// =GOOGLETRANSLATE(C2,"en",A2)
);
++count;
}
}
}
}
private UnicodeSet getCurrent(UnicodeSet keySet) {
UnicodeSet currentAge = new UnicodeSet("[:age=9.0:]");
UnicodeSet result = new UnicodeSet();
for (String s : keySet) {
if (currentAge.containsAll(s)) {
result.add(s);
}
}
return result.freeze();
}
public void testAnnotationPaths() {
assertTrue("", Emoji.getNonConstructed().contains("®"));
Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*");
for (String locale : Arrays.asList("en", "root")) {
CLDRFile enAnnotations = factoryAnnotations.make(locale, false);
// //ldml/annotations/annotation[@cp="🧜"][@type="tts"]
Set<String> annotationPaths = enAnnotations.getPaths("//ldml/anno",
Pattern.compile("//ldml/annotations/annotation.*tts.*").matcher(""), new TreeSet<>());
Set<String> annotationPathsExpected = Emoji.getNamePaths();
checkAMinusBIsC("(" + locale + ".xml - Emoji.getNamePaths)", annotationPaths, annotationPathsExpected, Collections.<String> emptySet());
checkAMinusBIsC("(Emoji.getNamePaths - " + locale + ".xml)", annotationPathsExpected, annotationPaths, Collections.<String> emptySet());
}
}
public void testSuperfluousAnnotationPaths() {
Factory factoryAnnotations = SimpleFactory.make(CLDRPaths.ANNOTATIONS_DIRECTORY, ".*");
ImmutableSet<String> rootPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make("root", false).iterator("//ldml/annotations/"));
CLDRFile englishAnnotations = factoryAnnotations.make("en", false);
ImmutableSet<String> englishPaths = ImmutableSortedSet.copyOf(englishAnnotations.iterator("//ldml/annotations/"));
Set<String> superfluous2 = setDifference(rootPaths, englishPaths);
assertTrue("en contains root", superfluous2.isEmpty());
if (!superfluous2.isEmpty()) {
for (String path : superfluous2) {
// XPathParts parts = XPathParts.getFrozenInstance(path);
// String emoji = parts.getAttributeValue(-1, "cp");
System.out.println("locale=en; action=add; path=" + path + "; value=XXX");
}
}
Set<String> allSuperfluous = new TreeSet<>();
for (String locale : factoryAnnotations.getAvailable()) {
ImmutableSet<String> currentPaths = ImmutableSortedSet.copyOf(factoryAnnotations.make(locale, false).iterator("//ldml/annotations/"));
Set<String> superfluous = setDifference(currentPaths, rootPaths);
assertTrue("root contains " + locale, superfluous.isEmpty());
allSuperfluous.addAll(superfluous);
for (String s : currentPaths) {
if (s.contains("\uFE0F")) {
errln("Contains FE0F: " + s);
break;
}
}
}
// get items to fix
if (!allSuperfluous.isEmpty()) {
for (String path : allSuperfluous) {
// XPathParts parts = XPathParts.getFrozenInstance(path);
// String emoji = parts.getAttributeValue(-1, "cp");
System.out.println("locale=/.*/; action=delete; path=" + path);
}
}
}
private Set<String> setDifference(ImmutableSet<String> a, ImmutableSet<String> b) {
Set<String> superfluous = new LinkedHashSet<>(a);
superfluous.removeAll(b);
return superfluous;
}
private void checkAMinusBIsC(String title, Set<String> a, Set<String> b, Set<String> c) {
Set<String> aMb = new TreeSet<>(a);
aMb.removeAll(b);
assertEquals(title, c, aMb);
}
public void testListFormatter() {
Object[][] tests = {
{"en", ListTypeLength.NORMAL, "ABC", "A, B, and C"},
{"en", ListTypeLength.AND_SHORT, "ABC", "A, B, & C"},
{"en", ListTypeLength.AND_NARROW, "ABC", "A, B, C"},
{"en", ListTypeLength.OR_WIDE, "ABC", "A, B, or C"}
};
Factory factory = CLDRConfig.getInstance().getCldrFactory();
for (Object[] test : tests) {
CLDRFile cldrFile = factory.make((String)(test[0]), true);
ListTypeLength listTypeLength = (ListTypeLength)(test[1]);
String expected = (String)test[3];
XListFormatter xlistFormatter = new XListFormatter(cldrFile, listTypeLength);
String source = (String)test[2];
String actual = xlistFormatter.formatCodePoints(source);
assertEquals(test[0] + ", " + listTypeLength + ", " + source, expected, actual);
}
}
}