| /* |
| ********************************************************************** |
| * Copyright (c) 2002-2011, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ********************************************************************** |
| * Author: Mark Davis |
| ********************************************************************** |
| */ |
| package org.unicode.cldr.tool; |
| |
| import java.io.File; |
| import java.io.PrintWriter; |
| import java.text.ParseException; |
| import java.text.ParsePosition; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.Date; |
| import java.util.Iterator; |
| import java.util.LinkedHashMap; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| |
| import org.unicode.cldr.draft.FileUtilities; |
| import org.unicode.cldr.util.Builder; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRFile.DraftStatus; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.CldrUtility; |
| import org.unicode.cldr.util.Factory; |
| import org.unicode.cldr.util.ICUServiceBuilder; |
| import org.unicode.cldr.util.LanguageTagParser; |
| import org.unicode.cldr.util.Log; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.SortedBag; |
| import org.unicode.cldr.util.TimezoneFormatter; |
| import org.unicode.cldr.util.TransliteratorUtilities; |
| import org.w3c.dom.NamedNodeMap; |
| import org.w3c.dom.Node; |
| |
| import com.ibm.icu.dev.tool.UOption; |
| import com.ibm.icu.dev.util.UnicodeMap; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.text.Collator; |
| import com.ibm.icu.text.DateFormat; |
| import com.ibm.icu.text.DecimalFormat; |
| import com.ibm.icu.text.Normalizer; |
| import com.ibm.icu.text.RuleBasedCollator; |
| import com.ibm.icu.text.SimpleDateFormat; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.text.UnicodeSetIterator; |
| import com.ibm.icu.util.ULocale; |
| |
| //import org.unicode.cldr.tool.GenerateCldrDateTimeTests; |
| |
| /** |
| * Generated tests for CLDR. |
| * |
| * @author medavis |
| */ |
| |
| public class GenerateCldrTests { |
| |
| protected static final boolean METAZONES_WORK = false; |
| |
| // static private PrintWriter log; |
| PrintWriter out; |
| |
| private static final int HELP1 = 0, HELP2 = 1, SOURCEDIR = 2, DESTDIR = 3, |
| LOGDIR = 4, MATCH = 5, NOT_RESOLVED = 6, LANGUAGES = 7, |
| SHOW = 8; |
| |
| private static final UOption[] options = { |
| UOption.HELP_H(), |
| UOption.HELP_QUESTION_MARK(), |
| UOption.SOURCEDIR().setDefault(CLDRPaths.COMMON_DIRECTORY), |
| UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "/test/"), |
| UOption.create("log", 'l', UOption.REQUIRES_ARG).setDefault(CLDRPaths.GEN_DIRECTORY), |
| UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"), |
| UOption.create("notresolved", 'n', UOption.NO_ARG), |
| UOption.create("languages", 'g', UOption.NO_ARG), |
| // "C:\\ICU4J\\icu4j\\src\\com\\ibm\\icu\\dev\\tool\\cldr\\"), |
| UOption.create("show", 's', UOption.NO_ARG), }; |
| |
| private static final String VERSION = CLDRFile.GEN_VERSION; |
| |
| GenerateCldrCollationTests cldrCollations; |
| |
| static String logDir = null, destDir = null; |
| |
| public static boolean hasLocalizedLanguageFor(String locale, |
| String otherLocale) { |
| String lang = new LanguageTagParser().set(otherLocale).getLanguage(); |
| String localizedVersion = english.getName(locale); |
| return !lang.equals(localizedVersion); |
| } |
| |
| public static boolean hasLocalizedCountryFor(String locale, |
| String otherLocale) { |
| String country = new LanguageTagParser().set(otherLocale).getRegion(); |
| if (country.equals("")) |
| return true; |
| String localizedVersion = english.getName(CLDRFile.TERRITORY_NAME, locale); |
| return !country.equals(localizedVersion); |
| } |
| |
| public static void main(String[] args) throws Exception { |
| double deltaTime = System.currentTimeMillis(); |
| UOption.parseArgs(args, options); |
| Log.setLog(options[LOGDIR].value, "log.txt"); |
| // log = FileUtilities.openUTF8Writer(options[LOGDIR].value, "log.txt"); |
| try { |
| if (options[LANGUAGES].doesOccur) { |
| GenerateStatistics.generateSize( |
| options[GenerateCldrTests.SOURCEDIR].value + "main/", |
| options[GenerateCldrTests.LOGDIR].value, |
| options[MATCH].value, true); |
| return; |
| } |
| // compareAvailable(); |
| |
| // if (true) return; |
| // System.out.println(createCaseClosure(new |
| // UnicodeSet("[a{bc}{def}{oss}]"))); |
| // System.out.println(createCaseClosure(new |
| // UnicodeSet("[a-z\u00c3\u0178{aa}]"))); |
| GenerateCldrTests t = new GenerateCldrTests(); |
| // t.generate(new ULocale("hu"), null); |
| t.generate(options[MATCH].value); |
| } finally { |
| Log.close(); |
| deltaTime = System.currentTimeMillis() - deltaTime; |
| System.out.println("Elapsed: " + deltaTime / 1000.0 + " seconds"); |
| System.out.println("Done"); |
| } |
| } |
| |
| /* |
| * private static void compareAvailable() { String[] cols = |
| * Collator.getAvailableULocales(); Locale[] alocs = |
| * NumberFormat.getAvailableLocales(); Set sCols = filter(cols); Set sLocs = |
| * filter(alocs); Set oldSLocs = new TreeSet(sCols); sLocs.removeAll(sCols); |
| * log.println("main - collation"); showLocales(sLocs); |
| * sCols.removeAll(oldSLocs); log.println(); |
| * log.println("collation - main"); showLocales(sCols); } |
| */ |
| |
| /** |
| * |
| */ |
| /* |
| * private static void checkLocaleNames() {Stringe[] locales = |
| * String.getAvailableLocales(); for (int i = 0; i < locales.length; ++i) { |
| * if (!hasLocalizedCountryFor(String.ENGLISH, locales[i]) || |
| * !hasLocalizedLanguageFor(String.ENGLISH, locales[i]) || |
| * !hasLocalizedCountryFor(locales[i], locales[i]) || |
| * !hasLocalizedLanguageFor(locales[i], locales[i])) { |
| * Log.getLog().print("FAILURE\t"); } else { |
| * Log.getLog().print(" \t"); } Log.logln(locales[i] + "\t" + |
| * locales[i].getDisplayName(String.ENGLISH) + "\t" + |
| * locales[i].getDisplayName(locales[i])); } } |
| */ |
| /** |
| * @param sLocs |
| */ |
| private static void showLocales(Set<String> sLocs) { |
| for (Iterator<String> it = sLocs.iterator(); it.hasNext();) { |
| String s = it.next(); |
| Log.logln(s + "\t" + ULocale.getDisplayLanguage(s, "en")); |
| } |
| } |
| |
| /** |
| * @param cols |
| * @return |
| */ |
| private static Set<String> filter(Object[] cols) { |
| Set<String> result = new TreeSet<String>(); |
| for (int i = 0; i < cols.length; ++i) { |
| String s = cols[i].toString(); |
| if (s.indexOf('_') >= 0) |
| continue; |
| result.add(s); |
| } |
| return result; |
| } |
| |
| Set<String> addULocales(Object[] objects, Set<String> target) { |
| for (int i = 0; i < objects.length; ++i) { |
| target.add(objects[i].toString()); |
| } |
| return target; |
| } |
| |
| LanguageTagParser ltp = new LanguageTagParser(); |
| |
| private void addLocale(String locale) { |
| String lang; |
| try { |
| lang = ltp.set(locale).getLanguageScript(); |
| // lang = locale.getLanguage(); |
| if (lang.length() == 0 || lang.equals("root")) |
| return; // skip root |
| } catch (RuntimeException e) { |
| return; // illegal locale name, must be supplemental |
| } |
| // ULocale parent = new ULocale(lang); |
| // System.out.println(item + ", " + parent); |
| parentToLocales.put(lang, locale); |
| /* |
| * RuleBasedCollator col = cldrCollations.getInstance(item); if (col == |
| * null) { System.out.println("No collator for: " + item); } String |
| * rules = col.getRules(); // |
| * ((RuleBasedCollator)Collator.getInstance(item)).getRules(); |
| * rulesToLocales.add(rules, item); localesToRules.put(item, rules); |
| */ |
| } |
| |
| Set<String> collationLocales = new TreeSet<String>(); // =ULocaleComparator |
| // addULocales(Collator.getAvailableULocales(), |
| // new |
| // TreeSet(ULocaleComparator)); |
| |
| // Set numberLocales = addULocales(NumberFormat.getAvailableLocales(), new |
| // TreeSet(ULocaleComparator)); |
| // Set dateLocales = addULocales(DateFormat.getAvailableLocales(), new |
| // TreeSet(ULocaleComparator)); |
| Set<String> allLocales = new TreeSet<String>(); // ULocaleComparator |
| |
| // Map localesToRules = new HashMap(); |
| |
| // Relation rulesToLocales = new Relation(new TreeMap(ULocaleComparator),TreeSet.class); |
| |
| Relation<String, String> parentToLocales = Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); |
| |
| /* |
| * void getLocaleList() { collationLocales = new TreeSet(ULocaleComparator); |
| * collationLocales.addAll(cldrCollations.getAvailableSet()); |
| * |
| * collationLocales = addULocales(new String[] { // HACK "ga", "nl", "pt", |
| * "de@collation=phonebook", "es@collation=traditional", |
| * "hi@collation=direct", "zh@collation=pinyin", "zh@collation=stroke", |
| * "zh@collation=traditional", }, collationLocales); |
| * |
| * allLocales.addAll(collationLocales); allLocales.addAll(numberLocales); |
| * allLocales.addAll(dateLocales); // HACK // get all collations with same |
| * rules |
| * |
| * for (Iterator it = allLocales.iterator(); it.hasNext();) { |
| * addLocale((ULocale) it.next()); } |
| * |
| * String[] others = new String[] { "de@collation=phonebook", |
| * "es@collation=traditional", "hi@collation=direct", "zh@collation=pinyin", |
| * "zh@collation=stroke", "zh@collation=traditional", }; for (int i = 0; i < |
| * others.length; ++i) { addLocale(new ULocale(others[i])); } |
| * |
| * } |
| */ |
| |
| // GenerateCldrDateTimeTests cldrOthers; |
| Factory mainCldrFactory; |
| |
| ICUServiceBuilder icuServiceBuilder; |
| |
| private static CLDRFile english; |
| |
| // static Transform<String,ULocale> TO_LOCALE = new Transform<String,ULocale>(){ |
| // public ULocale transform(String source) { |
| // return new ULocale(source); |
| // } |
| // }; |
| |
| void generate(String pat) throws Exception { |
| mainCldrFactory = Factory.make(options[SOURCEDIR].value + "main" |
| + File.separator, pat); |
| english = mainCldrFactory.make("en", true); |
| Factory collationCldrFactory = Factory.make(options[SOURCEDIR].value |
| + "collation" + File.separator, pat); |
| //Factory supplementalCldrFactory = Factory.make(options[SOURCEDIR].value |
| // + "supplemental" + File.separator, ".*"); |
| |
| // allLocales = Builder.with(allLocales) |
| // .addAll(mainCldrFactory.getAvailable(), TO_LOCALE) |
| // .addAll(collationCldrFactory.getAvailable(), TO_LOCALE) |
| // .freeze(); |
| allLocales.addAll(mainCldrFactory.getAvailable()); |
| if (!allLocales.containsAll(collationCldrFactory.getAvailable())) { |
| System.err.println("Collation locale that is not in main!\t" |
| + Builder.with(new TreeSet<String>(collationCldrFactory.getAvailable())).removeAll(allLocales).get()); |
| } |
| allLocales.addAll(collationCldrFactory.getAvailable()); |
| allLocales = Collections.unmodifiableSet(allLocales); |
| |
| cldrCollations = new GenerateCldrCollationTests(options[SOURCEDIR].value |
| + "collation" + File.separator, pat, allLocales); |
| if (options[SHOW].doesOccur) |
| cldrCollations.show(); |
| |
| for (Iterator<String> it = cldrCollations.getAvailableSet().iterator(); it |
| .hasNext();) { |
| collationLocales.add(it.next()); |
| } |
| if (!allLocales.containsAll(collationLocales)) { |
| System.err.println("Collation locale that is not in main!\t" |
| + Builder.with(new TreeSet<String>(collationLocales)).removeAll(allLocales).get()); |
| } |
| collationLocales = allLocales; |
| |
| // TODO HACK |
| // collationLocales.remove("ar_IN"); |
| icuServiceBuilder = new ICUServiceBuilder(); |
| /* |
| * cldrOthers = new GenerateCldrDateTimeTests(options[SOURCEDIR].value + |
| * "main" + File.separator, pat, |
| * !options[GenerateCldrTests.NOT_RESOLVED].doesOccur); if |
| * (options[SHOW].doesOccur) cldrOthers.show(); |
| */ |
| // getLocaleList(); |
| for (Iterator<String> it = collationLocales.iterator(); it.hasNext();) { |
| addLocale(it.next()); |
| } |
| |
| Matcher m = PatternCache.get(pat).matcher(""); |
| for (Iterator<String> it = parentToLocales.keySet().iterator(); it.hasNext();) { |
| String p = it.next(); |
| if (!m.reset(p).matches()) |
| continue; |
| generate2(p); |
| } |
| } |
| |
| private void generate2(String locale) throws Exception { |
| System.out.println("Main Generation:\t" + locale); |
| out = FileUtilities.openUTF8Writer(options[DESTDIR].value, locale + ".xml"); |
| out.println("<?xml version='1.0' encoding='UTF-8' ?>"); |
| out.println( |
| // "<!DOCTYPE cldrTest SYSTEM 'http://www.unicode.org/cldr/dtd/1.5/cldrTest.dtd'>" |
| // + |
| "<!DOCTYPE cldrTest SYSTEM '../common/dtd/cldrTest.dtd'>"); |
| out.println("<!-- For information, see readme.html -->"); |
| out.println(" <cldrTest version='" + VERSION + |
| "' base='" + locale + "'>"); |
| CLDRFile localeFile = mainCldrFactory.make(locale, true); |
| out.println(" <!-- " |
| + TransliteratorUtilities.toXML.transliterate(english.getName(locale) |
| + " [" + localeFile.getName(locale)) + "] -->"); |
| generateItems(locale, allLocales, NumberShower); |
| generateItems(locale, allLocales, DateShower); |
| generateItems(locale, allLocales, ZoneFieldShower); |
| generateItems(locale, collationLocales, CollationShower); |
| out.println(" </cldrTest>"); |
| out.close(); |
| ToolUtilities.generateBat(options[SOURCEDIR].value + "test" + File.separator, |
| locale + ".xml", options[DESTDIR].value, locale + ".xml", |
| new CldrUtility.SimpleLineComparator(0)); |
| } |
| |
| /* |
| * |
| * // first pass through and get all the functional equivalents Map |
| * uniqueLocales = new TreeMap(); |
| * |
| * String[] keywords = Collator.getKeywords(); boolean [] isAvailable = new |
| * boolean[1]; for (int i = 0; i < locales.length; ++i) { add(locales[i], |
| * uniqueLocales); if (true) continue; // TODO restore once Vladimir fixes |
| * for (int j = 0; j < keywords.length; ++j) { String[] values = |
| * Collator.getKeywordValues(keywords[j]); for (int k = 0; k < |
| * values.length; ++k) { // TODO -- for a full job, would do all |
| * combinations of different keywords! if (values[k].equals("standard")) |
| * continue; add(new ULocale(locales[i] + "@" + keywords[j] + "=" + |
| * values[k]), uniqueLocales); //ULocale other = |
| * Collator.getFunctionalEquivalent(keywords[j], locales[i], isAvailable); } |
| * } } for (int i = 0; i < extras.length; ++i) { add(new ULocale(extras[i]), |
| * uniqueLocales); } // items are now sorted by rules. So resort by locale |
| * Map toDo = new TreeMap(ULocaleComparator); for (Iterator it = |
| * uniqueLocales.keySet().iterator(); it.hasNext();) { Object rules = |
| * it.next(); Set s = (Set) uniqueLocales.get(rules); ULocale ulocale = |
| * (ULocale) s.iterator().next(); // get first one toDo.put(ulocale, s); } |
| * for (Iterator it = toDo.keySet().iterator(); it.hasNext();) { ULocale |
| * ulocale = (ULocale) it.next(); Set s = (Set) toDo.get(ulocale); |
| * generate(ulocale); } |
| */ |
| |
| /** |
| * add locale into list. Replace old if shorter |
| * |
| * @param locale |
| */ |
| void add(String locale, Map<String, Set<String>> uniqueLocales) { |
| try { |
| RuleBasedCollator col = cldrCollations.getInstance(locale); // (RuleBasedCollator) |
| // Collator.getInstance(locale); |
| // for our purposes, separate locales if we are using different |
| // exemplars |
| String key = col.getRules() + "\uFFFF" + getExemplarSet(locale, 0, DraftStatus.unconfirmed); |
| Set<String> s = uniqueLocales.get(key); |
| if (s == null) { |
| s = new TreeSet<String>(ULocaleComparator); |
| uniqueLocales.put(key, s); |
| } |
| System.out.println("Adding " + locale); |
| s.add(locale); |
| } catch (Throwable e) { // skip |
| System.out.println("skipped " + locale); |
| } |
| } |
| |
| /** |
| * Work-around |
| */ |
| public UnicodeSet getExemplarSet(String locale, int options, |
| DraftStatus minimalDraftStatus) { |
| String n = locale.toString(); |
| int pos = n.indexOf('@'); |
| if (pos >= 0) |
| locale = n.substring(0, pos); |
| CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, |
| minimalDraftStatus); |
| String v = cldrFile.getStringValue("//ldml/characters/exemplarCharacters"); |
| UnicodeSet result = new UnicodeSet(v); |
| v = cldrFile |
| .getStringValue("//ldml/characters/exemplarCharacters[@type=\"auxiliary\"]"); |
| if (v != null) { |
| result.addAll(new UnicodeSet(v)); |
| } |
| if (options == 0) |
| result.closeOver(UnicodeSet.CASE); |
| return result; |
| } |
| |
| public static final Comparator<Object> ULocaleComparator = new Comparator<Object>() { |
| public int compare(Object o1, Object o2) { |
| return o1.toString().compareTo(o2.toString()); |
| } |
| }; |
| |
| /* |
| * public interface Equator { public boolean equals(Object o1, Object o2); } |
| */ |
| @SuppressWarnings("rawtypes") |
| static boolean intersects(Collection a, Collection b) { |
| for (Iterator it = a.iterator(); it.hasNext();) { |
| if (b.contains(it.next())) |
| return true; |
| } |
| return false; |
| } |
| |
| /* |
| * static Collection extract(Object x, Collection a, Equator e, Collection |
| * output) { List itemsToRemove = new ArrayList(); for (Iterator it = |
| * a.iterator(); it.hasNext();) { Object item = it.next(); if (e.equals(x, |
| * item)) { itemsToRemove.add(item); // have to do this because iterator may |
| * not allow output.add(item); } } a.removeAll(itemsToRemove); return |
| * output; } |
| */ |
| class ResultsPrinter { |
| private Set<Map> listOfSettings = new LinkedHashSet<Map>(); |
| |
| private transient LinkedHashMap<String, String> settings = new LinkedHashMap<String, String>(); |
| |
| ResultsPrinter() { |
| } |
| |
| ResultsPrinter(ResultsPrinter rpIncludeDraft, ResultsPrinter rpNoDraft) { |
| Set<Map> listOfSettings1 = rpIncludeDraft.getListOfSettings(); |
| Set<Map> listOfSettings2 = rpNoDraft.getListOfSettings(); |
| if (listOfSettings1.size() != listOfSettings2.size()) { |
| throw new InternalError("can't combine"); |
| } |
| Iterator<Map> it1 = listOfSettings1.iterator(); |
| Iterator<Map> it2 = listOfSettings2.iterator(); |
| while (it1.hasNext()) { |
| Map settings1 = it1.next(); |
| Map settings2 = it2.next(); |
| if (settings1.equals(settings2)) { |
| settings1.put("draft", "unconfirmed approved"); |
| addToListOfSettings(settings1); |
| } else { |
| // they should only differ by result! |
| settings1.put("draft", "unconfirmed"); |
| addToListOfSettings(settings1); |
| settings2.put("draft", "approved"); |
| addToListOfSettings(settings2); |
| } |
| } |
| } |
| |
| private void addToListOfSettings(Map settings1) { |
| for (Object key : settings1.keySet()) { |
| if (key == null || settings1.get(key) == null) { |
| throw new IllegalArgumentException("null key or value in settings."); |
| } |
| } |
| listOfSettings.add(settings1); |
| } |
| |
| void set(String name, String value) { |
| if (name == null || value == null) { |
| throw new IllegalArgumentException("null key or value in settings."); |
| } |
| settings.put(name, value); |
| } |
| |
| void setResult(String result) { |
| if (result == null) { |
| throw new IllegalArgumentException("null key or value in settings."); |
| } |
| settings.put("result", result); |
| addToListOfSettings((Map) settings.clone()); |
| } |
| |
| void print() { |
| Map oldSettings = new TreeMap(); |
| for (Iterator it2 = getListOfSettings().iterator(); it2.hasNext();) { |
| Map settings = (Map) it2.next(); |
| String result = (String) settings.get("result"); |
| out.print(" <result"); |
| for (Iterator it = settings.keySet().iterator(); it.hasNext();) { |
| Object key = it.next(); |
| if (key.equals("result")) |
| continue; |
| Object value = settings.get(key); |
| if (!value.equals(oldSettings.get(key))) { |
| out.print(" " + key + "='" |
| + TransliteratorUtilities.toXML.transliterate(value.toString()) |
| + "'"); |
| } |
| } |
| out.println(">" + TransliteratorUtilities.toXML.transliterate(result) |
| + "</result>"); |
| oldSettings = settings; |
| } |
| } |
| |
| public boolean equals(Object other) { |
| try { |
| ResultsPrinter that = (ResultsPrinter) other; |
| return getListOfSettings().equals(that.getListOfSettings()); |
| } catch (Exception e) { |
| return false; |
| } |
| } |
| |
| public int hashCode() { |
| throw new IllegalArgumentException(); |
| } |
| |
| /** |
| * |
| */ |
| |
| private void setListOfSettings(Set listOfSettings) { |
| this.listOfSettings = listOfSettings; |
| } |
| |
| private Set<Map> getListOfSettings() { |
| return Collections.unmodifiableSet(listOfSettings); |
| } |
| } |
| |
| abstract class DataShower { |
| abstract ResultsPrinter show(String first_locale, DraftStatus minimalDraftStatus); |
| |
| ResultsPrinter show(String first) throws Exception { |
| ResultsPrinter rpIncludeDraft = show(first, DraftStatus.unconfirmed); |
| ResultsPrinter rpNoDraft = show(first, DraftStatus.approved); |
| return new ResultsPrinter(rpIncludeDraft, rpNoDraft); |
| } |
| |
| abstract String getElement(); |
| } |
| |
| interface DataShower2 { |
| void show(ULocale first, Collection others) throws Exception; |
| } |
| |
| private void generateItems(String locale, Collection<String> onlyLocales, |
| DataShower generator) throws Exception { |
| Set<String> sublocales = new TreeSet<String>(); // ULocaleComparator |
| sublocales.add(locale); |
| sublocales.addAll(parentToLocales.getAll(locale)); |
| sublocales.retainAll(onlyLocales); |
| Map<String, ResultsPrinter> locale_results = new TreeMap<String, ResultsPrinter>(ULocaleComparator); |
| for (Iterator<String> it = sublocales.iterator(); it.hasNext();) { |
| String current = it.next(); |
| locale_results.put(current, generator.show(current)); |
| } |
| // do it this way so that the locales stay in order |
| Set<String> matchingLocales = new TreeSet<String>(ULocaleComparator); |
| while (sublocales.size() != 0) { |
| String first = sublocales.iterator().next(); |
| ResultsPrinter r = locale_results.get(first); |
| for (Iterator<String> it = sublocales.iterator(); it.hasNext();) { |
| String other = it.next(); |
| ResultsPrinter r2 = locale_results.get(other); |
| if (r2.equals(r)) |
| matchingLocales.add(other); |
| } |
| showLocales(generator.getElement(), matchingLocales); |
| r.print(); |
| out.println(" </" + generator.getElement() + ">"); |
| sublocales.removeAll(matchingLocales); |
| matchingLocales.clear(); |
| } |
| //Comparator c; |
| } |
| |
| public void showLocales(String elementName, Collection<String> others) { |
| // System.out.println(elementName + ": " + locale); |
| out.println(" <" + elementName + " "); |
| StringBuffer comment = new StringBuffer(); |
| if (others != null && others.size() != 0) { |
| out.print("locales='"); |
| boolean first = true; |
| for (Iterator<String> it = others.iterator(); it.hasNext();) { |
| if (first) |
| first = false; |
| else { |
| out.print(" "); |
| comment.append("; "); |
| } |
| String loc = it.next(); |
| out.print(loc); |
| comment.append(english.getName(loc) + " [" |
| + getNativeName(loc) + "]"); |
| } |
| out.print("'"); |
| } |
| out.println(">"); |
| out.println("<!-- " |
| + TransliteratorUtilities.toXML.transliterate(comment.toString()) |
| + " -->"); |
| } |
| |
| private String getNativeName(String loc) { |
| int atPos = loc.indexOf('@'); |
| String keywords = ""; |
| if (atPos >= 0) { |
| keywords = loc.substring(atPos + 1); |
| loc = loc.substring(0, atPos); |
| } |
| return mainCldrFactory.make(loc, true).getName(loc) + "@" + keywords; |
| } |
| |
| DataShower ZoneFieldShower = new DataShower() { |
| |
| // Set zones = new |
| // TreeSet(sc.getAvailableCodes("tzid")); |
| List<String> zones = Arrays.asList(new String[] { "America/Los_Angeles", |
| "America/Argentina/Buenos_Aires", "America/Buenos_Aires", |
| "America/Havana", "Australia/ACT", "Australia/Sydney", "Europe/London", |
| "Europe/Moscow", "Etc/GMT+3" }); |
| |
| String[] perZoneSamples = { "Z", "ZZZZ", "z", "zzzz", "v", "vvvv", "V", "VVVV" }; |
| |
| String[] dates = { "2004-01-15T12:00:00Z", "2004-07-15T12:00:00Z" }; |
| |
| public ResultsPrinter show(String first, DraftStatus minimalDraftStatus) { |
| TimezoneFormatter tzf = new TimezoneFormatter(mainCldrFactory, first |
| .toString(), minimalDraftStatus); |
| ResultsPrinter rp = new ResultsPrinter(); |
| if (!METAZONES_WORK) { |
| return rp; |
| } |
| // TODO Auto-generated |
| // method stub |
| ParsePosition parsePosition = new ParsePosition(0); |
| for (Iterator<String> it = zones.iterator(); it.hasNext();) { |
| String tzid = it.next(); |
| rp.set("zone", tzid); |
| for (int j = 0; j < dates.length; ++j) { |
| String date = dates[j]; |
| Date datetime; |
| try { |
| datetime = ICUServiceBuilder.isoDateParse(date); |
| } catch (ParseException e1) { |
| throw new IllegalArgumentException(e1); |
| } |
| rp.set("date", dates[j]); |
| for (int i = 0; i < perZoneSamples.length; ++i) { |
| try { |
| String pattern = perZoneSamples[i]; |
| if (!METAZONES_WORK && (pattern.contains("z") || pattern.contains("V"))) { |
| continue; |
| } |
| rp.set("field", pattern); |
| String formatted = tzf.getFormattedZone(tzid, pattern, datetime.getTime(), false); |
| parsePosition.setIndex(0); |
| String parsed = tzf.parse(formatted, parsePosition); |
| if (parsed == null) { |
| // for |
| // debugging |
| formatted = tzf.getFormattedZone(tzid, pattern, datetime.getTime(), false); |
| parsePosition.setIndex(0); |
| parsed = tzf.parse(formatted, parsePosition); |
| } |
| rp.set("parse", parsed); |
| rp.setResult(formatted); |
| } catch (RuntimeException e) { |
| throw (IllegalArgumentException) new IllegalArgumentException( |
| "Failure in " + first).initCause(e); |
| } |
| } |
| } |
| } |
| return rp; |
| /* |
| * Date datetime = ICUServiceBuilder .isoDateParse (samples[j]); |
| * rp.set("input", ICUServiceBuilder .isoDateFormat (datetime)); |
| */ |
| } |
| |
| public String getElement() { |
| return "zoneFields"; |
| } |
| }; |
| |
| DataShower DateShower = new DataShower() { |
| public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { |
| String[] samples = { "1900-01-31T00:00:00Z", "1909-02-28T00:00:01Z", |
| "1918-03-26T00:59:59Z", "1932-04-24T01:00:00Z", |
| "1945-05-20T01:00:01Z", "1952-06-18T11:59:59Z", |
| "1973-07-16T12:00:00Z", "1999-08-14T12:00:01Z", |
| "2000-09-12T22:59:59Z", "2001-10-08T23:00:00Z", |
| "2004-11-04T23:00:01Z", "2010-12-01T23:59:59Z", }; |
| CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, |
| minimalDraftStatus); |
| icuServiceBuilder.setCldrFile(cldrFile); |
| ResultsPrinter rp = new ResultsPrinter(); |
| for (int j = 0; j < samples.length; ++j) { |
| Date datetime; |
| try { |
| datetime = ICUServiceBuilder.isoDateParse(samples[j]); |
| } catch (ParseException e) { |
| throw new IllegalArgumentException(e); |
| } |
| rp.set("input", ICUServiceBuilder.isoDateFormat(datetime)); |
| for (int i = 0; i < ICUServiceBuilder.LIMIT_DATE_FORMAT_INDEX; ++i) { |
| rp.set("dateType", ICUServiceBuilder.getDateNames(i)); |
| for (int k = 0; k < ICUServiceBuilder.LIMIT_DATE_FORMAT_INDEX; ++k) { |
| if (i == 0 && k == 0) |
| continue; |
| DateFormat df = icuServiceBuilder.getDateFormat("gregorian", i, k); |
| String pattern = ((SimpleDateFormat) df).toPattern(); |
| if (!METAZONES_WORK && (pattern.contains("z") || pattern.contains("V"))) { |
| continue; |
| } |
| rp.set("timeType", ICUServiceBuilder.getDateNames(k)); |
| if (false && i == 2 && k == 0) { |
| System.out.println("debug: date " |
| + icuServiceBuilder.getDateNames(i) + ", time " |
| + icuServiceBuilder.getDateNames(k) + " = " |
| + df.format(datetime)); |
| } |
| rp.setResult(df.format(datetime)); |
| } |
| } |
| } |
| return rp; |
| } |
| |
| public String getElement() { |
| return "date"; |
| } |
| }; |
| |
| DataShower NumberShower = new DataShower() { |
| public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { |
| CLDRFile cldrFile = mainCldrFactory.make(locale.toString(), true, |
| minimalDraftStatus); |
| icuServiceBuilder.setCldrFile(cldrFile); |
| |
| double[] samples = { 0, 0.01, -0.01, 1, -1, 123.456, -123.456, 123456.78, |
| -123456.78, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY, |
| Double.NaN }; |
| ResultsPrinter rp = new ResultsPrinter(); |
| for (int j = 0; j < samples.length; ++j) { |
| double sample = samples[j]; |
| rp.set("input", String.valueOf(sample)); |
| for (int i = 0; i < ICUServiceBuilder.LIMIT_NUMBER_INDEX; ++i) { |
| rp.set("numberType", icuServiceBuilder.getNumberNames(i)); |
| DecimalFormat nf = icuServiceBuilder.getNumberFormat(i); |
| String formatted = nf.format(sample); |
| if (formatted.indexOf("NaNNaN") >= 0) { |
| formatted = nf.format(sample); // for |
| // debugging |
| } |
| rp.setResult(formatted); |
| } |
| } |
| return rp; |
| } |
| |
| public String getElement() { |
| return "number"; |
| } |
| }; |
| |
| // ========== COLLATION ========== |
| |
| /* |
| * Equator CollationEquator = new Equator() { |
| *//** |
| * Must both be ULocales |
| */ |
| /* |
| * public boolean equals(Object o1, Object o2) { try { ULocale loc1 = |
| * (ULocale) o1; ULocale loc2 = (ULocale) o2; if (loc1.equals(loc2)) return |
| * true; return |
| * cldrCollations.getInstance(loc1).equals(cldrCollations.getInstance |
| * (loc2)); } catch (RuntimeException e) { System.out.println("Failed on: " |
| * + o1 + " ;\t" + o2); throw e; } } }; |
| */static ULocale zhHack = new ULocale("zh"); // FIXME |
| // hack |
| // for |
| // zh |
| |
| DataShower CollationShower = new DataShower() { |
| public ResultsPrinter show(String locale, DraftStatus minimalDraftStatus) { |
| // if |
| // (locale.equals(zhHack)) |
| // return; |
| |
| Collator col = cldrCollations.getInstance(locale); // Collator.getInstance(locale); |
| |
| UnicodeSet tailored = new UnicodeSet(); |
| if (col != null) { |
| tailored = col.getTailoredSet(); |
| if (new LanguageTagParser().set(locale).getLanguage().equals("zh")) { |
| tailored.addAll(new UnicodeSet("[[a-z]-[v]]")); |
| Log.logln("HACK for Pinyin"); |
| } |
| tailored = createCaseClosure(tailored); |
| tailored = nfc(tailored); |
| } else { |
| System.out.println("No collation for: " + locale); |
| col = cldrCollations.getInstance("root"); |
| } |
| // System.out.println(tailored.toPattern(true)); |
| |
| UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE, |
| minimalDraftStatus); |
| // add all the exemplars |
| |
| exemplars = createCaseClosure(exemplars); |
| exemplars = nfc(exemplars); |
| // System.out.println(exemplars.toPattern(true)); |
| tailored.addAll(exemplars); |
| // UnicodeSet |
| // tailoredMinusHan = new |
| // UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET); |
| if (!exemplars.containsAll(tailored)) { |
| // BagFormatter bf = |
| // new |
| // BagFormatter(); |
| Log.logln("In Tailored, but not Exemplar; Locale: " + locale + "\t" |
| + english.getName(locale)); |
| Log.logln(new UnicodeSet(tailored).removeAll(exemplars) |
| .toPattern(false)); |
| // bf.(log,"tailored", |
| // tailored, |
| // "exemplars", |
| // exemplars); |
| Log.getLog().flush(); |
| } |
| tailored.addAll(new UnicodeSet("[\\ .02{12}]")); |
| tailored.removeAll(SKIP_COLLATION_SET); |
| |
| SortedBag bag = new SortedBag(col); |
| return doCollationResult(col, tailored, bag); |
| } |
| |
| public String getElement() { |
| return "collation"; |
| } |
| }; |
| |
| /* |
| * public void show(ULocale locale, Collection others) { |
| * showLocales("collation", others); |
| * |
| * Collator col = cldrCollations.getInstance(locale); // |
| * Collator.getInstance(locale); |
| * |
| * UnicodeSet tailored = col.getTailoredSet(); if |
| * (locale.getLanguage().equals("zh")) { tailored.addAll(new |
| * UnicodeSet("[[a-z]-[v]]")); log.println("HACK for Pinyin"); } tailored = |
| * createCaseClosure(tailored); tailored = nfc(tailored); |
| * //System.out.println(tailored.toPattern(true)); |
| * |
| * UnicodeSet exemplars = getExemplarSet(locale, UnicodeSet.CASE); // add |
| * all the exemplars if (false) for (Iterator it = others.iterator(); |
| * it.hasNext(); ) { exemplars.addAll(getExemplarSet((ULocale)it.next(), |
| * UnicodeSet.CASE)); } |
| * |
| * exemplars = createCaseClosure(exemplars); exemplars = nfc(exemplars); |
| * //System.out.println(exemplars.toPattern(true)); |
| * tailored.addAll(exemplars); //UnicodeSet tailoredMinusHan = new |
| * UnicodeSet(tailored).removeAll(SKIP_COLLATION_SET); if |
| * (!exemplars.containsAll(tailored)) { //BagFormatter bf = new |
| * BagFormatter(); log.println("In Tailored, but not Exemplar; Locale: " + |
| * locale + "\t" + locale.getDisplayName()); log.println(new |
| * UnicodeSet(tailored).removeAll(exemplars).toPattern(false)); |
| * //bf.(log,"tailored", tailored, "exemplars", exemplars); log.flush(); } |
| * tailored.addAll(new UnicodeSet("[\\ .02{12}]")); |
| * tailored.removeAll(SKIP_COLLATION_SET); |
| * |
| * SortedBag bag = new SortedBag(col); doCollationResult(col, tailored, |
| * bag); out.println(" </collation>"); }}; |
| */ |
| static final UnicodeSet SKIP_COLLATION_SET = new UnicodeSet( |
| "[[:script=han:][:script=hangul:]-[\u4e00-\u4eff \u9f00-\u9fff \uac00-\uacff \ud700-\ud7ff]]"); |
| |
| /** |
| * @param col |
| * @param tailored |
| * @param bag |
| */ |
| private ResultsPrinter doCollationResult(Collator col, UnicodeSet tailored, |
| SortedBag bag) { |
| for (UnicodeSetIterator usi = new UnicodeSetIterator(tailored); usi.next();) { |
| String s = usi.getString(); |
| bag.add('x' + s); |
| bag.add('X' + s); |
| bag.add('x' + s + 'x'); |
| } |
| // out.println(" <set locale='" + locale + "'/>"); |
| /* |
| * if (others != null) for (Iterator it = others.iterator(); |
| * it.hasNext(); ) { ULocale uloc = (ULocale) it.next(); if |
| * (uloc.equals(locale)) continue; out.println(" <other locale='" + |
| * uloc + "'/>"); } |
| */ |
| String last = ""; |
| boolean needEquals = false; |
| StringBuffer tempResult = new StringBuffer(CldrUtility.LINE_SEPARATOR); |
| for (Iterator<String> it = bag.iterator(); it.hasNext();) { |
| String s = it.next(); |
| if (col.compare(s, last) != 0) { |
| if (needEquals) |
| tempResult.append(last).append(CldrUtility.LINE_SEPARATOR); |
| needEquals = false; |
| last = s; |
| } else { |
| needEquals = true; |
| } |
| tempResult.append(TransliteratorUtilities.toXML.transliterate(s)).append( |
| CldrUtility.LINE_SEPARATOR); |
| } |
| ResultsPrinter result = new ResultsPrinter(); |
| result.setResult(tempResult.toString()); |
| return result; |
| } |
| |
| static public Set<String> getMatchingXMLFiles(String dir, String localeRegex) { |
| Matcher m = PatternCache.get(localeRegex).matcher(""); |
| Set<String> s = new TreeSet<String>(); |
| File[] files = new File(dir).listFiles(); |
| for (int i = 0; i < files.length; ++i) { |
| String name = files[i].getName(); |
| if (!name.endsWith(".xml")) |
| continue; |
| if (name.startsWith("supplementalData")) |
| continue; |
| String locale = name.substring(0, name.length() - 4); // drop .xml |
| if (!locale.equals("root") && !m.reset(locale).matches()) |
| continue; |
| s.add(locale); |
| } |
| return s; |
| } |
| |
| /* |
| * public static boolean isDraft(Node node) { for (; node.getNodeType() != |
| * Node.DOCUMENT_NODE; node = node.getParentNode()){ NamedNodeMap attributes |
| * = node.getAttributes(); if (attributes == null) continue; for (int i = 0; |
| * i < attributes.getLength(); ++i) { Node attribute = attributes.item(i); |
| * if (attribute.getNodeName().equals("draft") && |
| * attribute.getNodeValue().equals("true")) return true; } } return false; } |
| */ |
| public static String getXPath(Node node) { |
| StringBuffer xpathFragment = new StringBuffer(); |
| StringBuffer xpath = new StringBuffer(); |
| for (; node.getNodeType() != Node.DOCUMENT_NODE; node = node |
| .getParentNode()) { |
| xpathFragment.setLength(0); |
| xpathFragment.append('/').append(node.getNodeName()); |
| NamedNodeMap attributes = node.getAttributes(); |
| if (attributes != null) { |
| for (int i = 0; i < attributes.getLength(); ++i) { |
| Node attribute = attributes.item(i); |
| xpathFragment.append("[@").append(attribute.getNodeName()) |
| .append('=').append(attribute.getNodeValue()).append(']'); |
| } |
| } |
| xpath.insert(0, xpathFragment); |
| } |
| xpath.insert(0, '/'); |
| return xpath.toString(); |
| } |
| |
| public static String replace(String source, String pattern, String replacement) { |
| // dumb code for now |
| for (int pos = source.indexOf(pattern, 0); pos >= 0; pos = source.indexOf( |
| pattern, pos + 1)) { |
| source = source.substring(0, pos) + replacement |
| + source.substring(pos + pattern.length()); |
| } |
| return source; |
| } |
| |
| public static interface Apply { |
| String apply(String source); |
| } |
| |
| static UnicodeSet apply(UnicodeSet source, Apply apply) { |
| UnicodeSet target = new UnicodeSet(); |
| for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next();) { |
| String s = usi.getString(); |
| target.add(apply.apply(s)); |
| } |
| return target; |
| } |
| |
| static UnicodeSet nfc(UnicodeSet source) { |
| return apply(source, new Apply() { |
| public String apply(String source) { |
| return Normalizer.compose(source, false); |
| } |
| }); |
| } |
| |
| public static interface CloseCodePoint { |
| /** |
| * @param cp |
| * code point to get closure for |
| * @param toAddTo |
| * Unicode set for the closure |
| * @return toAddTo (for chaining) |
| */ |
| UnicodeSet close(int cp, UnicodeSet toAddTo); |
| } |
| |
| public static UnicodeSet createCaseClosure(UnicodeSet source) { |
| UnicodeSet target = new UnicodeSet(); |
| for (UnicodeSetIterator usi = new UnicodeSetIterator(source); usi.next();) { |
| String s = usi.getString(); |
| UnicodeSet temp = createClosure(s, CCCP); |
| if (temp == null) |
| target.add(s); |
| else |
| target.addAll(temp); |
| } |
| return target; |
| } |
| |
| public static class UnicodeSetComparator implements Comparator<Object> { |
| UnicodeSetIterator ait = new UnicodeSetIterator(); |
| |
| UnicodeSetIterator bit = new UnicodeSetIterator(); |
| |
| public int compare(Object o1, Object o2) { |
| if (o1 == o2) |
| return 0; |
| if (o1 == null) |
| return -1; |
| if (o2 == null) |
| return 1; |
| UnicodeSet a = (UnicodeSet) o1; |
| UnicodeSet b = (UnicodeSet) o2; |
| if (a.size() != b.size()) { |
| return a.size() < b.size() ? -1 : 1; |
| } |
| ait.reset(a); |
| bit.reset(b); |
| while (ait.nextRange()) { |
| bit.nextRange(); |
| if (ait.codepoint != bit.codepoint) { |
| return ait.codepoint < bit.codepoint ? -1 : 1; |
| } |
| if (ait.codepoint == UnicodeSetIterator.IS_STRING) { |
| int result = ait.string.compareTo(bit.string); |
| if (result != 0) |
| return result; |
| } else if (ait.codepointEnd != bit.codepointEnd) { |
| return ait.codepointEnd < bit.codepointEnd ? -1 : 1; |
| } |
| } |
| return 0; |
| } |
| } |
| |
| public static final CloseCodePoint CCCP = new CloseCodePoint() { |
| Locale locale = Locale.ENGLISH; |
| |
| UnicodeSet NONE = new UnicodeSet(); |
| |
| UnicodeMap<UnicodeSet> map = new UnicodeMap<UnicodeSet>(); // new |
| |
| // UnicodeSetComparator() |
| |
| public UnicodeSet close(int cp, UnicodeSet toAddTo) { |
| UnicodeSet result = map.getValue(cp); |
| if (result == null) { |
| result = new UnicodeSet(); |
| result.add(cp); |
| String s = UCharacter.toLowerCase(locale, UTF16.valueOf(cp)); |
| result.add(s); |
| s = UCharacter.toUpperCase(locale, UTF16.valueOf(cp)); |
| result.add(s); |
| s = UCharacter.toTitleCase(locale, UTF16.valueOf(cp), null); |
| result.add(s); |
| // special hack |
| if (result.contains("SS")) |
| result.add("sS").add("ss"); |
| if (result.size() == 1) |
| result = NONE; |
| map.put(cp, result); |
| } |
| if (result != NONE) |
| toAddTo.addAll(result); |
| else |
| toAddTo.add(cp); |
| return toAddTo; |
| } |
| }; |
| |
| public static UnicodeSet createClosure(String source, CloseCodePoint closer) { |
| return createClosure(source, 0, closer); |
| } |
| |
| public static UnicodeSet createClosure(String source, int position, |
| CloseCodePoint closer) { |
| UnicodeSet result = new UnicodeSet(); |
| // if at end, return empty set |
| if (position >= source.length()) |
| return result; |
| int cp = UTF16.charAt(source, position); |
| // if last character, return its set |
| int endPosition = position + UTF16.getCharCount(cp); |
| if (endPosition >= source.length()) |
| return closer.close(cp, result); |
| // otherwise concatenate its set with the remainder |
| UnicodeSet remainder = createClosure(source, endPosition, closer); |
| return createAppend(closer.close(cp, result), remainder); |
| } |
| |
| /** |
| * Produce the result of appending each element of this to each element of |
| * other. That is, [a{cd}] + [d{ef}] => [{ad}{aef}{cdd}{cdef}] |
| */ |
| public static UnicodeSet createAppend(UnicodeSet a, UnicodeSet b) { |
| UnicodeSet target = new UnicodeSet(); |
| for (UnicodeSetIterator usi = new UnicodeSetIterator(a); usi.next();) { |
| String s = usi.getString(); |
| for (UnicodeSetIterator usi2 = new UnicodeSetIterator(b); usi2.next();) { |
| String s2 = usi2.getString(); |
| target.add(s + s2); |
| } |
| } |
| return target; |
| } |
| } |