blob: 9cba84712d8da567aed0aadf9ec7117cd456638f [file] [log] [blame]
package org.unicode.cldr.unittest;
import java.io.File;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.unicode.cldr.util.Builder;
import org.unicode.cldr.util.Builder.MBuilder;
import org.unicode.cldr.util.CLDRTransforms;
import org.unicode.cldr.util.CLDRTransforms.Direction;
import org.unicode.cldr.util.CLDRTransforms.MyHandler;
import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID;
import org.unicode.cldr.util.CLDRTransforms.Visibility;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.XMLFileReader;
import com.ibm.icu.dev.test.TestFmwk;
import com.ibm.icu.impl.Relation;
import com.ibm.icu.impl.Row;
import com.ibm.icu.impl.Row.R2;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.lang.UScript;
import com.ibm.icu.text.Transliterator;
import com.ibm.icu.util.ULocale;
public class TestBcp47Transforms extends TestFmwk {
public static void main(String[] args) {
new TestBcp47Transforms().run(args);
}
public void TestNames() {
SupplementalDataInfo suppData = SupplementalDataInfo.getInstance();
Relation<String, String> extensionToKeys = suppData
.getBcp47Extension2Keys();
Set<String> keys = extensionToKeys.getAll("t");
// extension="t" name="m0"
Relation<String, String> keyToSubtypes = suppData.getBcp47Keys();
Map<R2<String, String>, String> descriptions = suppData
.getBcp47Descriptions();
for (String key : keys) {
for (String subtype : keyToSubtypes.getAll(key)) {
String description = descriptions.get(Row.of(key, subtype));
System.out.println(key + ", " + subtype + ", " + description);
}
}
Map<String, String> old2newName = new TreeMap<String, String>();
for (String file : Arrays.asList(new File(CLDRTransforms.TRANSFORM_DIR)
.list())) {
if (!file.endsWith(".xml"))
continue;
ParsedTransformID directionInfo = new ParsedTransformID();
getIcuRulesFromXmlFile(CLDRTransforms.TRANSFORM_DIR, file,
directionInfo);
if (directionInfo.getVisibility() == Visibility.internal)
continue;
String source = directionInfo.source;
String target = directionInfo.target;
String variant = directionInfo.variant;
String standard = getStandard0(source, target, variant);
// System.out.println(standard
// + "\t =>\t" + directionInfo
// + "\tdirection:\t" + directionInfo.getDirection()
// + "\tvisibility:\t" + directionInfo.getVisibility()
// );
if (!standard.contains("?")) {
old2newName.put(directionInfo.toString(), standard);
}
if (directionInfo.getDirection() == Direction.both) {
standard = getStandard0(source, target, variant);
if (!standard.contains("?")) {
old2newName.put(directionInfo.toString(), standard);
}
}
}
for (String source : Collections.list(Transliterator
.getAvailableSources())) {
for (String target : Collections.list(Transliterator
.getAvailableTargets(source))) {
for (String variant : Collections.list(Transliterator
.getAvailableVariants(source, target))) {
if (variant.isEmpty())
variant = null;
String name = source + "-" + target
+ (variant == null ? "" : "/" + variant);
if (!old2newName.containsKey(name)) {
String standard = getStandard0(source, target, variant);
if (!standard.contains("?")) {
old2newName.put(name, standard);
}
}
}
}
}
for (Entry<String, String> entry : old2newName.entrySet()) {
System.out.println(entry);
}
System.out.println("Missing");
for (Entry<String, Set<R2<Type, String>>> entry : MISSING
.keyValuesSet()) {
System.out.println(entry);
}
}
enum Type {
source, target, mechanism
}
private String getStandard0(String source, String target, String variant) {
String id = source + "-" + target + "/" + variant;
String newSource = getStandard(Type.source, source, id);
String newTarget = getStandard(Type.target, target, id);
String newMechanism = getStandard(Type.mechanism, variant, id);
return newTarget + "-t-" + newSource
+ (newMechanism == null ? "" : "-m0-" + newMechanism);
}
static ULocale.Builder ubuilder = new ULocale.Builder();
static Relation<String, Row.R2<Type, String>> MISSING = Relation
.<String, Row.R2<Type, String>> of(
new TreeMap<String, Set<Row.R2<Type, String>>>(),
TreeSet.class);
static StandardCodes sc = StandardCodes.make();
static Map<String, String> SPECIAL_CASES;
static Set<String> languages = sc.getAvailableCodes("language");
static Set<String> scripts = new HashSet<String>();
static Set<String> regions = new HashSet<String>();
static {
MBuilder<String, String, HashMap<String, String>> builder = Builder
.with(new HashMap<String, String>());
// add language names
for (String s : languages) {
final String data = sc.getData("language", s);
add(builder, s, data);
}
// add script names. They override (eg Latin => und-Latn)
for (String s : sc.getAvailableCodes("script")) {
scripts.add(s.toLowerCase(Locale.ENGLISH));
final String data = sc.getData("script", s);
add(builder, "und-" + s, data);
// System.out.println(data + "\t" + s);
}
for (String s : sc.getAvailableCodes("territory")) {
regions.add(s.toLowerCase(Locale.ENGLISH));
}
// real special cases
builder.put("any", "und").put("simplified", "Hans")
.put("traditional", "Hant").put("ipa", "und-fonipa")
.put("xsampa", "und-fonxsamp").put("japanesekana", "und-Hrkt");
/*
* source fullwidth source jamo target accents target ascii target
* halfwidth target jamo target numericpinyin target publishing
*/
SPECIAL_CASES = builder.freeze();
}
public static void add(
MBuilder<String, String, HashMap<String, String>> builder,
String code, String names) {
names = names.toLowerCase(Locale.ENGLISH);
if (!names.contains("▪")) {
builder.put(names, code);
return;
}
for (String name : names.split("▪")) {
builder.put(name, code);
}
}
private String getStandard(Type type, String source, String id) {
source = source == null ? null : source.toLowerCase(Locale.ENGLISH);
if (type == Type.mechanism) {
if (source == null)
return null;
if (source.equals("bgn") || source.equals("ungegn"))
return source;
MISSING.put(source, Row.of(type, id));
return "?" + source;
}
String special = SPECIAL_CASES.get(source);
if (special != null) {
return special;
}
int code;
try {
code = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, source);
return "und-" + UScript.getShortName(code);
} catch (Exception e1) {
}
try {
ULocale ulocale = new ULocale(source);
// hack for now
String language = ulocale.getLanguage();
if (languages.contains(language)) {
String script = ulocale.getScript();
if (script.isEmpty()
|| scripts.contains(script.toLowerCase(Locale.ENGLISH))) {
String region = ulocale.getCountry();
if (region.isEmpty()
|| regions.contains(region
.toLowerCase(Locale.ENGLISH))) {
return ulocale.toLanguageTag();
}
}
}
} catch (Exception e) {
}
// we failed
MISSING.put(source, Row.of(type, id));
return "?" + source;
}
public String getIcuRulesFromXmlFile(String dir, String cldrFileName,
ParsedTransformID directionInfo) {
final MyHandler myHandler = new CLDRTransforms.MyHandler(cldrFileName,
directionInfo);
XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
xfr.read(dir + cldrFileName, XMLFileReader.CONTENT_HANDLER
| XMLFileReader.ERROR_HANDLER, true);
return myHandler.getRules();
}
}