blob: c50e0b704149442757a4b1edc92b6002b1419d3f [file] [log] [blame]
package org.unicode.cldr.unittest;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.unicode.cldr.tool.ToolConstants;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.LanguageTagCanonicalizer;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.Validity;
import org.unicode.cldr.util.Validity.Status;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.text.UnicodeSet;
public class TestValidity extends TestFmwkPlus {
private boolean DEBUG = false;
public static void main(String[] args) {
new TestValidity().run(args);
}
Validity validity = Validity.getInstance(CLDRPaths.COMMON_DIRECTORY);
public void TestBasicValidity() {
Object[][] tests = {
{ LstrType.language, Validity.Status.regular, true, "aa", "en" },
{ LstrType.language, null, false, "eng" }, // null means never found under any status
{ LstrType.language, null, false, "root" },
{ LstrType.language, Validity.Status.special, true, "mul" },
{ LstrType.language, Validity.Status.deprecated, true, "aju" },
{ LstrType.language, Validity.Status.private_use, true, "qaa" },
{ LstrType.language, Validity.Status.unknown, true, "und" },
{ LstrType.script, Validity.Status.regular, true, "Zyyy" },
{ LstrType.script, Validity.Status.special, true, "Zsye" },
{ LstrType.script, Validity.Status.regular, true, "Zyyy" },
{ LstrType.script, Validity.Status.unknown, true, "Zzzz" },
{ LstrType.region, Validity.Status.deprecated, true, "QU" },
{ LstrType.region, Validity.Status.macroregion, true, "EU" },
{ LstrType.region, Validity.Status.regular, true, "XK" },
{ LstrType.region, Validity.Status.macroregion, true, "001" },
{ LstrType.region, Validity.Status.private_use, true, "AA" },
{ LstrType.region, Validity.Status.unknown, true, "ZZ" },
{ LstrType.subdivision, Validity.Status.unknown, true, "kzzzzz" },
{ LstrType.subdivision, Validity.Status.regular, true, "usca" },
{ LstrType.subdivision, Validity.Status.deprecated, true, "albr" },
{ LstrType.currency, Validity.Status.regular, true, "USD" },
{ LstrType.currency, Validity.Status.unknown, true, "XXX" },
{ LstrType.currency, Validity.Status.deprecated, true, "ADP" },
{ LstrType.unit, Validity.Status.regular, true, "area-acre"},
};
for (Object[] test : tests) {
LstrType lstr = (LstrType) test[0];
Validity.Status subtypeRaw = (Validity.Status) test[1];
Boolean desired = (Boolean) test[2];
for (int i = 3; i < test.length; ++i) {
String code = (String) test[i];
List<Status> subtypes = subtypeRaw == null ? Arrays.asList(Status.values()) : Collections.singletonList(subtypeRaw);
for (Status subtype : subtypes) {
Set<String> actual = validity.getStatusToCodes(lstr).get(subtype);
assertRelation("Validity", desired, CldrUtility.ifNull(actual,Collections.EMPTY_SET), TestFmwkPlus.CONTAINS, code);
}
}
}
if (isVerbose()) {
for (LstrType lstrType : LstrType.values()) {
logln(lstrType.toString());
final Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(lstrType);
for (Entry<Validity.Status, Set<String>> entry2 : statusToCodes.entrySet()) {
logln("\t" + entry2.getKey());
logln("\t\t" + entry2.getValue());
}
}
}
}
static final Set<String> ALLOWED_UNDELETIONS = ImmutableSet.of("NL-BQ1", "NL-BQ2", "NL-BQ3", "NO-21", "NO-22");
static final Set<String> ALLOWED_MISSING = ImmutableSet.of("root");
public void TestCompatibility() {
// Only run the rest in exhaustive mode, since it requires CLDR_ARCHIVE_DIRECTORY
if (getInclusion() <= 5) {
return;
}
final String oldCommon = CLDRPaths.ARCHIVE_DIRECTORY + "cldr-" + ToolConstants.PREVIOUS_CHART_VERSION + "/common/";
Validity oldValidity = Validity.getInstance(oldCommon);
for (LstrType type : LstrType.values()) {
final Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(type);
if (statusToCodes == null) {
logln("validity data unavailable: " + type);
continue;
}
for (Entry<Status, Set<String>> e2 : statusToCodes.entrySet()) {
Status oldStatus = e2.getKey();
for (String code : e2.getValue()) {
Status newStatus = getNewStatus(type, code);
if (oldStatus == newStatus) {
continue;
}
if (oldStatus == Status.regular && newStatus != Status.deprecated) {
errln(type + ":" + code + ":" + oldStatus + " — regular item changed, and didn't become deprecated");
}
if (newStatus == null && !ALLOWED_MISSING.contains(code)) {
errln(type + ":" + code + ":" + oldStatus + " — missing in new data");
}
if (oldStatus == Status.deprecated && !ALLOWED_UNDELETIONS.contains(code)) {
errln(type + ":" + code + ":" + oldStatus + " => " + newStatus
+ " // add to exception list if really un-deprecated");
} else {
logln(type + ":" + code + " was " + oldStatus + " => " + newStatus);
}
}
}
}
}
private Status getNewStatus(LstrType type, String code) {
Map<Status, Set<String>> info = validity.getStatusToCodes(type);
for (Entry<Status, Set<String>> e : info.entrySet()) {
if (e.getValue().contains(code)) {
return e.getKey();
}
}
return null;
}
public void TestBothDirections() {
for (LstrType type : LstrType.values()) {
Map<Status, Set<String>> statusToCodes = validity.getStatusToCodes(type);
Map<String, Status> codeToStatus = validity.getCodeToStatus(type);
assertEquals("null at same time", statusToCodes == null, codeToStatus == null);
if (statusToCodes == null) {
logln("validity data unavailable: " + type);
continue;
}
for (Entry<Status, Set<String>> entry : statusToCodes.entrySet()) {
Status status = entry.getKey();
for (String code : entry.getValue()) {
assertEquals("Forward works", status, codeToStatus.get(code));
}
}
for (Entry<String, Status> entry : codeToStatus.entrySet()) {
final String code = entry.getKey();
final Status status = entry.getValue();
assertTrue("Reverse works: " + status, statusToCodes.get(status).contains(code));
}
}
}
public void TestUnits() {
Splitter HYPHEN_SPLITTER = Splitter.on('-');
UnicodeSet allowed = new UnicodeSet("[a-z0-9A-Z]").freeze();
Validity validity = Validity.getInstance(CLDRPaths.COMMON_DIRECTORY);
Map<String,String> shortened = ImmutableMap.<String,String>builder()
.put("acceleration", "accel")
.put("revolution", "revol")
.put("centimeter", "cmeter")
.put("kilometer", "kmeter")
.put("milligram", "mgram")
.put("deciliter", "dliter")
.put("millimole", "mmole")
.put("consumption", "consumpt")
.put("100kilometers", "100km")
.put("microsecond", "microsec")
.put("millisecond", "millisec")
.put("nanosecond", "nanosec")
.put("milliampere", "milliamp")
.put("foodcalorie", "foodcal")
.put("kilocalorie", "kilocal")
.put("kilojoule", "kjoule")
.put("frequency", "freq")
.put("gigahertz", "gigahertz")
.put("kilohertz", "khertz")
.put("megahertz", "megahertz")
.put("astronomical", "astro")
.put("decimeter", "dmeter")
.put("micrometer", "micmeter")
.put("scandinavian", "scand")
.put("millimeter", "mmeter")
.put("nanometer", "nanomete")
.put("picometer", "pmeter")
.put("microgram", "migram")
.put("horsepower", "horsep")
.put("milliwatt", "mwatt")
.put("hectopascal", "hpascal")
.put("temperature", "temp")
.put("fahrenheit", "fahren")
.put("centiliter", "cliter")
.put("hectoliter", "hliter")
.put("megaliter", "megliter")
.put("milliliter", "mliter")
.put("tablespoon", "tblspoon")
.build();
for (Entry<LstrType, Map<Status, Set<String>>> e1 : validity.getData().entrySet()) {
LstrType lstrType = e1.getKey();
for (Entry<Status, Set<String>> e2 : e1.getValue().entrySet()) {
Status status = e2.getKey();
for (String code : e2.getValue()) {
StringBuilder fixed = new StringBuilder();
for (String subcode : HYPHEN_SPLITTER.split(code)) {
if (fixed.length() > 0) {
fixed.append('-');
}
if (!allowed.containsAll(subcode)) {
errln("subcode has illegal character: " + subcode + ", in " + code);
} else if (subcode.length() > 8) {
fixed.append(shorten(subcode, shortened));
} else {
fixed.append(subcode);
}
}
String fixedCode = fixed.toString();
if (!fixedCode.equals(code)) {
warnln("code has overlong subcode: " + code + " should be " + fixedCode);
}
}
}
}
if (DEBUG) {
for (Entry<String, String> e : shortened.entrySet()) {
System.out.println('"' + e.getKey() + "\", \"" + e.getValue() + "\",");
}
}
}
private String shorten(String subcode, Map<String, String> shortened) {
String result = shortened.get(subcode);
if (result != null) return result;
switch (subcode) {
case "temperature": result = "temp"; break;
case "acceleration": result = "accel"; break;
case "frequency": result = "freq"; break;
default: result = subcode.substring(0, 8); break;
}
shortened.put(subcode, result);
return result;
}
public void TestLanguageTagParser() {
String[][] tests = {
{"en-cyrl_ru_variant2_variant1", "en_Cyrl_RU_VARIANT1_VARIANT2", "en-Cyrl-RU-variant1-variant2"},
{"EN-U-CO-PHONEBK-EM-EMOJI-T_RU", "en_t_ru_u_co_phonebk_em_emoji", "en-t-ru-u-co-phonebk-em-emoji"},
};
LanguageTagParser ltp = new LanguageTagParser();
for (String[] test : tests) {
String source = test[0];
String expectedLanguageSubtagParserIcu = test[1];
String expectedLanguageSubtagParserBCP = test[2];
ltp.set(source);
String actualLanguageSubtagParserIcu = ltp.toString();
assertEquals("Language subtag (ICU) for " + source, expectedLanguageSubtagParserIcu, actualLanguageSubtagParserIcu);
String actualLanguageSubtagParserBCP = ltp.toString(LanguageTagParser.OutputOption.BCP47);
assertEquals("Language subtag (BCP47) for " + source, expectedLanguageSubtagParserBCP, actualLanguageSubtagParserBCP);
}
}
public void TestLanguageTagCanonicalizer() {
String[][] tests = {
{ "de-fonipa", "de_FONIPA" },
{ "el-1901-polytoni-aaland", "el_AX_1901_POLYTON" },
{ "en-POLYTONI-WHATEVER-ANYTHING-AALAND", "en_AX_ANYTHING_POLYTON_WHATEVER" },
{ "eng-840", "en" },
{ "sh_ba", "sr_Latn_BA" },
{ "iw-arab-010", "he_Arab_AQ" },
{ "und", "und" },
{ "und_us", "und_US" },
{ "und_su", "und_RU" },
};
LanguageTagCanonicalizer canon = new LanguageTagCanonicalizer();
for (String[] inputExpected : tests) {
assertEquals("Canonicalize", inputExpected[1], canon.transform(inputExpected[0]));
}
}
}