| package org.unicode.cldr.test; |
| |
| import java.io.PrintWriter; |
| import java.math.BigDecimal; |
| import java.text.ParsePosition; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.BitSet; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.EnumSet; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRFile.Status; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.CldrUtility; |
| import org.unicode.cldr.util.DtdType; |
| import org.unicode.cldr.util.Factory; |
| import org.unicode.cldr.util.Iso639Data; |
| import org.unicode.cldr.util.Iso639Data.Scope; |
| import org.unicode.cldr.util.Level; |
| import org.unicode.cldr.util.Pair; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.SimpleFactory; |
| import org.unicode.cldr.util.StandardCodes; |
| import org.unicode.cldr.util.VariantFolder; |
| import org.unicode.cldr.util.VariantFolder.CanonicalFolder; |
| import org.unicode.cldr.util.VariantFolder.CaseVariantFolder; |
| import org.unicode.cldr.util.VariantFolder.CompatibilityFolder; |
| import org.unicode.cldr.util.props.BagFormatter; |
| import org.unicode.cldr.util.XPathParts; |
| |
| import com.ibm.icu.lang.UCharacter; |
| import com.ibm.icu.lang.UScript; |
| import com.ibm.icu.text.Collator; |
| import com.ibm.icu.text.DecimalFormat; |
| import com.ibm.icu.text.NumberFormat; |
| import com.ibm.icu.text.Transliterator; |
| import com.ibm.icu.text.UTF16; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.text.UnicodeSetIterator; |
| import com.ibm.icu.util.Currency; |
| import com.ibm.icu.util.ULocale; |
| |
| public class TestMisc { |
| |
| static Currency SWISS_FRANC = Currency.getInstance("CHF"); |
| |
| static class Lists { |
| public static <E extends Comparable> List<E> sortedCopy(Collection<E> iterable) |
| { |
| List<E> list = new ArrayList<E>(); |
| list.addAll(iterable); |
| Collections.sort(list); |
| return list; |
| } |
| } |
| |
| enum Foo { |
| A, M, Z |
| }; |
| |
| public static void main(String[] args) { |
| |
| checkAliases(); |
| if (true) return; |
| |
| Transliterator en_ru = Transliterator.getInstance("en-ru"); |
| System.out.println("Mark + " + en_ru.transform("Mark")); |
| |
| Transliterator latn_cyrl = Transliterator.getInstance("Latn-Cyrl"); |
| System.out.println("Mark + " + latn_cyrl.transform("Mark")); |
| |
| Transliterator ulatn_ucyrl = Transliterator.getInstance("und_Latn-und_Cyrl"); |
| System.out.println("Mark + " + latn_cyrl.transform("Mark")); |
| |
| Locale locale = new Locale("abc-d αγζθ ?ef_g%hi", "abc-d αγζθ ?ef_g%hi", "abc-d αγζθ ?ef_g%hi"); |
| |
| System.out |
| .println("Locale locale = new Locale(\"abc-d αγζθ ?ef_g%hi\",\"abc-d αγζθ ?ef_g%hi\",\"abc-d αγζθ ?ef_g%hi\");"); |
| System.out.println("locale.toString() == \"" + locale + "\""); |
| |
| MyXSymbolTable sym = new MyXSymbolTable(); |
| BagFormatter bf = new BagFormatter(); |
| for (String test : new String[] { |
| "[:reduceCase=[[Åå{fi}]]:]", |
| "[:reduceCanonical=[[Åå{fi}]]:]", |
| "[[,٫.]]", |
| "[[,٫.][:close=compatibility:]]", |
| "[[\\ ,٬.']]", |
| "[[\\ ,٬.'][:close=compatibility:]]", |
| "[[\u002E\u2024\uFE52\uFF0E\u3002][:close=compatibility:]]", |
| "[[[\u002C \u002E \u066B \u2024 \u3002 \uFE52 \uFF0E、، \u002E \u2024 \uFE52 \uFF0E \u3002]-[\u002E\u2024\uFE52\uFF0E\u3002]][:close=compatibility:]]", |
| |
| "[[" + |
| "\\u0020" + |
| "[, ٬ ..․﹒ '' \u2018 \u2019 ]" + |
| "-[.\u2024\u3002\uFE12\uFE52\uFF0E\uFF61]" + |
| "-[,\u060C\u066B\u3001\uFE10\uFE11\uFE50\uFE51\uFF0C\uFF64]]" + |
| "[:close=compatibility:]]", |
| |
| /* |
| * "[[Åå{fi}][:close=canonical:]]", |
| * "[[Åå{fi}][:close=compatibility:]]", |
| * "[[Åå{fi}][:reduce=case:]]", |
| * "[[Åå{fi}][:reduce=canonical:]]", |
| * "[[Åå{fi}][:reduce=compatibility:]]", |
| */ |
| }) { |
| ParsePosition p = new ParsePosition(0); |
| UnicodeSet set = new UnicodeSet(test, p, sym); |
| UnicodeSet codes = set.complement().complement(); |
| System.out.println(test + CldrUtility.LINE_SEPARATOR + |
| codes.toPattern(true) + CldrUtility.LINE_SEPARATOR + |
| bf.showSetNames(set.complement().complement()) + CldrUtility.LINE_SEPARATOR); |
| } |
| if (true) return; |
| |
| StandardCodes sc = StandardCodes.make(); |
| for (String s : new String[] { "language", "script", "territory" }) { |
| System.out.println(s + ":\t" + sc.getGoodAvailableCodes(s).size()); |
| } |
| if (true) return; |
| |
| Set<Foo> inFileOrder = EnumSet.allOf(Foo.class); |
| List<Foo> inAlphaOrder = Lists.sortedCopy(inFileOrder); |
| System.out.println(inFileOrder); |
| System.out.println(inAlphaOrder); |
| |
| DecimalFormat currencyFormat = (DecimalFormat) NumberFormat.getCurrencyInstance(new ULocale("de-CH")); |
| currencyFormat.setCurrency(SWISS_FRANC); |
| // sometime later... |
| // we want the financial format of the currency, not the retail format |
| System.out.println("Retail:\t" + currencyFormat.format(123.53)); |
| BigDecimal increment = currencyFormat.getRoundingIncrement(); |
| System.out.println("Rounding Increment:\t" + increment); |
| double double_increment = increment.doubleValue(); |
| System.out.println("Double rounding Increment:\t" + double_increment); |
| double log = Math.log10(double_increment); |
| System.out.println("Double log:\t" + log); |
| double new_increment = Math.pow(10, Math.floor(log)); |
| System.out.println("Floored Increment:\t" + new_increment); |
| currencyFormat.setRoundingIncrement(new_increment); |
| System.out.println("Financial:\t" + currencyFormat.format(123.53)); |
| |
| if (true) return; |
| |
| testWeights(); |
| if (true) return; |
| |
| testScripts(); |
| testToRegex(); |
| // checkEastAsianWidth(); |
| if (true) return; |
| // import ICU |
| UnicodeSet RTL = new UnicodeSet("[[:Bidi_Class=Arabic_Letter:][:Bidi_Class=Right_To_Left:]]"); |
| |
| checkCollections(); |
| |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| CLDRFile englishFile = cldrFactory.make("en", true); |
| ExampleGenerator eg = new ExampleGenerator(englishFile, englishFile, CLDRPaths.SUPPLEMENTAL_DIRECTORY); |
| System.out |
| .println(eg |
| .getHelpHtml( |
| "//ldml/numbers/currencyFormats/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"][@draft=\"provisional\"]", |
| "")); |
| System.out.println(eg.getHelpHtml("/exemplarCharacters", "")); |
| System.out.println(eg.getHelpHtml("/calendar/pattern", "")); |
| |
| if (true) return; |
| Set<String> s = new HashSet<String>(Arrays.asList("a", "A", "c")); |
| Collator caselessCompare = Collator.getInstance(Locale.ENGLISH); |
| caselessCompare.setStrength(Collator.PRIMARY); |
| Set<String> t = new TreeSet<String>((Comparator) caselessCompare); |
| t.addAll(Arrays.asList("a", "b", "c")); |
| System.out.println("s equals t: " + s.equals(t)); |
| System.out.println("t equals s: " + t.equals(s)); |
| |
| Set<String> u = Collections.unmodifiableSet(t); |
| System.out.println("s==t " + (s.equals(t))); |
| System.out.println("s==u " + (s.equals(u))); |
| UnicodeSet x = new UnicodeSet("[a-z]"); |
| UnicodeSet y = (UnicodeSet) new UnicodeSet("[a-z]").freeze(); |
| System.out.println("x==y " + (x.equals(y))); |
| // showEnglish(); |
| // checkPrivateUse(); |
| // testPopulous(); |
| // checkDistinguishing(); |
| // checkEastAsianWidth(); |
| // checkEnglishPaths(); |
| System.out.println("Done"); |
| } |
| |
| private static void checkAliases() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| CLDRFile en = cldrFactory.make("root", true); |
| Status status = new Status(); |
| Matcher m = PatternCache.get("gregorian.*dayPeriods").matcher(""); |
| for (Iterator<String> it = en.iterator(null, en.getComparator()); it.hasNext();) { |
| String path = it.next(); |
| if (!m.reset(path).find()) { |
| continue; |
| } |
| //String locale = en.getSourceLocaleID(path, status); |
| String value = en.getStringValue(path); |
| String fullPath = en.getFullXPath(path); |
| System.out.println("value:\t" + value + "\tpath:\t" + fullPath); |
| if (!path.equals(status.pathWhereFound)) { |
| System.out.println("\torigin:\t" + status); |
| } |
| // System.out.println("locale:\t" + locale); |
| System.out.println(); |
| } |
| } |
| |
| private static void testWeights() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| CLDRFile english = cldrFactory.make("en", true); |
| Set<Pair<Integer, String>> rel = new TreeSet<Pair<Integer, String>>(); |
| for (String desiredLocale : cldrFactory.getAvailable()) { |
| int vote = Level.getDefaultWeight("google", desiredLocale); |
| rel.add(new Pair<Integer, String>(vote, desiredLocale)); |
| } |
| for (Pair<Integer, String> p : rel) { |
| System.out.println(p + "\t" + english.getName(p.getSecond())); |
| } |
| } |
| |
| private static void testScripts() { |
| BagFormatter bf = new BagFormatter(); |
| |
| UnicodeSet caseFolded = new UnicodeSet(); |
| UnicodeSet simpleCaseFolded = new UnicodeSet(); |
| for (int i = 0; i < 0x10FFFF; ++i) { |
| String form = UTF16.valueOf(i); |
| if (UCharacter.foldCase(form, true).equals(form)) { |
| caseFolded.add(i); |
| } |
| if (UCharacter.foldCase(i, true) == i) { |
| simpleCaseFolded.add(i); |
| } |
| } |
| caseFolded.freeze(); |
| simpleCaseFolded.freeze(); |
| |
| UnicodeSet functionalExceptCase = (UnicodeSet) new UnicodeSet("[" + |
| "[:L:][:Mc:][:Mn:][:Nd:]" + |
| "&[:^NFKC_QuickCheck=No:]" + |
| "&[:^default_ignorable_code_point:]]").freeze(); |
| |
| UnicodeSet asciiIdn = (UnicodeSet) new UnicodeSet("[-A-Z0-9]").freeze(); |
| |
| UnicodeSet archaic = (UnicodeSet) new UnicodeSet("[" + |
| "[:script=Bugi:]" + |
| "[:script=Copt:]" + |
| "[:script=Cprt:]" + |
| "[:script=Dsrt:]" + |
| "[:script=Glag:]" + |
| "[:script=Goth:]" + |
| "[:script=Hano:]" + |
| "[:script=Ital:]" + |
| "[:script=Khar:]" + |
| "[:script=Linb:]" + |
| "[:script=Ogam:]" + |
| "[:script=Osma:]" + |
| "[:script=Phag:]" + |
| "[:script=Phnx:]" + |
| "[:script=Runr:]" + |
| "[:script=Shaw:]" + |
| "[:script=Sylo:]" + |
| "[:script=Syrc:]" + |
| "[:script=Tagb:]" + |
| "[:script=Tglg:]" + |
| "[:script=Ugar:]" + |
| "[:script=Xpeo:]" + |
| "[:script=Xsux:]" + |
| // "[:script=Arab:]" + |
| // "[:script=Armn:]" + |
| // "[:script=Beng:]" + |
| // "[:script=Bopo:]" + |
| "[:block=Combining_Diacritical_Marks _for_Symbols:]" + |
| "[:block=Musical_Symbols:]" + |
| "[:block=Ancient_Greek_Musical_Notation:]]").freeze(); |
| |
| System.out.println("functionalExceptCase: " + functionalExceptCase); |
| System.out.println("archaic: " + archaic); |
| |
| System.out.println("SimpleCaseFolded & !CaseFolded & Functional & !Archaic:" + CldrUtility.LINE_SEPARATOR |
| + bf.showSetNames(new UnicodeSet(simpleCaseFolded) |
| .removeAll(caseFolded) |
| .retainAll(functionalExceptCase) |
| .removeAll(archaic). |
| removeAll(asciiIdn) |
| )); |
| |
| UnicodeSet functional = (UnicodeSet) new UnicodeSet(functionalExceptCase).retainAll(caseFolded).freeze(); |
| System.out.println("functional: " + functional.size()); |
| UnicodeSet functionalAndNotArchaic = (UnicodeSet) new UnicodeSet(functional).removeAll(archaic).freeze(); |
| System.out.println("archaic: " + archaic.size()); |
| System.out.println("functionalAndNotArchaic: " + functionalAndNotArchaic.size()); |
| |
| // System.out.println(bf.showSetNames("Case Folded", caseFolded,"Simple Case Folded", simpleCaseFolded)); |
| |
| UnicodeSet functionalCommon = new UnicodeSet("[:script=common:]").retainAll(functional).removeAll(archaic) |
| .removeAll(asciiIdn); |
| System.out.println("Common & Functional & !Archaic:" + CldrUtility.LINE_SEPARATOR |
| + bf.showSetNames(functionalCommon)); |
| |
| UnicodeSet functionalInherited = new UnicodeSet("[:script=inherited:]").retainAll(functional) |
| .removeAll(archaic).removeAll(asciiIdn); |
| System.out.println("Inherited & Functional & !Archaic:" + CldrUtility.LINE_SEPARATOR |
| + bf.showSetNames(functionalInherited)); |
| |
| UnicodeSet nl = new UnicodeSet("[:Nl:]").retainAll(functional).removeAll(archaic); |
| System.out.println("Nl:" + CldrUtility.LINE_SEPARATOR + bf.showSetNames(new UnicodeSet("[:Nl:]"))); |
| System.out.println("Nl & Functional & !Archaic:" + CldrUtility.LINE_SEPARATOR + bf.showSetNames(nl)); |
| |
| UnicodeSet restrictedXidContinue = new UnicodeSet( |
| "[[:xid_continue:]" + |
| "&[:^NFKC_QuickCheck=No:]" + |
| "&[:^default_ignorable_code_point:]" + |
| "&[:^Pc:]]").retainAll(caseFolded); |
| |
| System.out.println(bf.showSetDifferences("IDNA Functional", functional, |
| "Unicode XID & NFKC &!DefaultIgnorable &! Pc", restrictedXidContinue)); |
| |
| Transliterator t = Transliterator.getInstance("lower"); |
| System.out.println("ABC " + t.transliterate("ABC")); |
| /* |
| * generalCategory(cp) is {Ll, Lu, Lo, Lm, Mn, Mc, Nd}, AND |
| * NFKC(cp) == cp, AND |
| * casefold(cp) == cp, AND |
| * !defaultIgnorableCodePoint(cp) |
| */ |
| BitSet scripts = new BitSet(); |
| for (int cp = 0; cp < 0x10FFFF; ++cp) { |
| int script = UScript.getScript(cp); |
| if (script == UScript.COMMON || script == UScript.UNKNOWN || script == UScript.INHERITED) { |
| continue; |
| } |
| scripts.set(script); |
| } |
| Set<String> toPrint = new TreeSet<String>(); |
| for (int script = 0; script < scripts.size(); ++script) { |
| if (!scripts.get(script)) continue; |
| String code = UScript.getShortName(script); |
| String name = UScript.getName(script); |
| if (StandardCodes.isScriptModern(code)) { |
| toPrint.add("modern\t" + code + "\t" + name); |
| } else { |
| toPrint.add("archaic\t" + code + "\t" + name); |
| } |
| } |
| for (String line : toPrint) { |
| System.out.println(line); |
| } |
| } |
| |
| private static void checkCollections() { |
| System.out.println("Collections"); |
| new org.unicode.cldr.util.CldrUtility.Apply<String>() { |
| public void apply(String item) { |
| if (Iso639Data.getScope(item.toString()) != Scope.Collection) return; |
| System.out.println(item + "\t" + CldrUtility.join(Iso639Data.getNames(item), ", ")); |
| } |
| }.applyTo(Iso639Data.getAvailable()); |
| System.out.println(CldrUtility.LINE_SEPARATOR + "Macrolanguages"); |
| new org.unicode.cldr.util.CldrUtility.Apply<String>() { |
| public void apply(String item) { |
| if (Iso639Data.getScope(item.toString()) != Scope.Macrolanguage) return; |
| System.out.println(item + "\t" + CldrUtility.join(Iso639Data.getNames(item), ", ")); |
| } |
| }.applyTo(Iso639Data.getAvailable()); |
| } |
| |
| static void testToRegex() { |
| String[] tests = { "\\-", "a", "d-f", "\\u2000", "\\uAC00-\\uAC12", "{AB}", "{CDE}", "\\uFFF0-\\U0010000F", |
| "\\U0010100F-\\U0010300F" }; // }; // |
| for (int i = (1 << tests.length) - 1; i >= 0; --i) { |
| String test = "["; |
| for (int j = 0; j < tests.length; ++j) { |
| if ((i & (1 << j)) != 0) { |
| test += tests[j]; |
| } |
| } |
| test += "]"; |
| testToRegex(new UnicodeSet(test)); |
| } |
| } |
| |
| private static void testToRegex(UnicodeSet test) { |
| String formatted = CldrUtility.toRegex(test); |
| System.out.println(test + "\t->\t" + formatted); |
| Matcher newTest = PatternCache.get(formatted).matcher(""); |
| UnicodeSet failures = new UnicodeSet(); |
| for (UnicodeSetIterator it = new UnicodeSetIterator(test); it.next();) { |
| if (!newTest.reset(it.getString()).matches()) { |
| failures.add(it.getString()); |
| } |
| } |
| if (failures.size() != 0) { |
| System.out.println("\tFailed on: " + failures); |
| } |
| System.out.flush(); |
| } |
| |
| static void checkEastAsianWidth() { |
| UnicodeSet dontCares = (UnicodeSet) new UnicodeSet("[[:surrogate:][:unassigned:][:control:]]").freeze(); |
| UnicodeSet dontCares2 = (UnicodeSet) new UnicodeSet("[:^letter:]").freeze(); |
| |
| // UnicodeSet wide = new UnicodeSet("[[:East_Asian_Width=wide:][:East_Asian_Width=fullwidth:][:Co:]]"); // |
| // remove supplementaries |
| // System.out.format("Wide %s" + Utility.LINE_SEPARATOR + "" + Utility.LINE_SEPARATOR, wide); |
| // System.out.format("Wide(spanned) %s" + Utility.LINE_SEPARATOR + "" + Utility.LINE_SEPARATOR, |
| // Utility.addDontCareSpans(wide, dontCares)); |
| // UnicodeSet zeroWidth = new |
| // UnicodeSet("[[:default_ignorable_code_point:][:Mn:][:Me:]-[:Noncharacter_Code_Point:]-[:Cc:]]"); // remove |
| // supplementaries |
| // System.out.format("ZeroWidth %s" + Utility.LINE_SEPARATOR + "" + Utility.LINE_SEPARATOR, zeroWidth); |
| // System.out.format("ZeroWidth(spanned) %s" + Utility.LINE_SEPARATOR + "" + Utility.LINE_SEPARATOR, |
| // Utility.addDontCareSpans(zeroWidth, dontCares)); |
| |
| // P2. In each paragraph, find the first character of type L, AL, or R. |
| UnicodeSet strongL = (UnicodeSet) new UnicodeSet("[[:BidiClass=L:]-[:unassigned:]]").freeze(); // |
| showSpans("Bidi L", strongL, dontCares); |
| showSpans("Bidi L*", strongL, dontCares2); |
| |
| UnicodeSet strongRAL = (UnicodeSet) new UnicodeSet("[[:BidiClass=R:][:BidiClass=AL:]-[:unassigned:]]").freeze(); |
| showSpans("Bidi R,AL", strongRAL, dontCares); |
| showSpans("Bidi R,AL*", strongRAL, dontCares2); |
| |
| UnicodeSet strong = (UnicodeSet) new UnicodeSet( |
| "[[:BidiClass=L:][:BidiClass=R:][:BidiClass=AL:]-[:unassigned:]]").freeze(); |
| showSpans("Strong", strong, dontCares); |
| showSpans("Strong*", strong, dontCares2); |
| |
| } |
| |
| private static void showSpans(String title, UnicodeSet sourceSet, UnicodeSet dontCares) { |
| System.out.println(title); |
| System.out.format("\tSource Set: %s" + CldrUtility.LINE_SEPARATOR, sourceSet); |
| System.out.format("\tDon't Cares: %s" + CldrUtility.LINE_SEPARATOR, dontCares); |
| UnicodeSet spanned = new UnicodeSet(sourceSet).addBridges(dontCares); |
| spanned = spanned.complement().complement(); |
| String spannedString = spanned.toString(); |
| String unescapedString = spanned.toPattern(false); |
| System.out.format("\tRanges: %d" + CldrUtility.LINE_SEPARATOR, spanned.getRangeCount()); |
| System.out.format("\tStrlen(\\u): %d" + CldrUtility.LINE_SEPARATOR, spannedString.length()); |
| System.out.format("\tStrlen(!\\u): %d" + CldrUtility.LINE_SEPARATOR, unescapedString.length()); |
| String title2 = "Result"; |
| String sample = spannedString; |
| if (false) { |
| if (sample.length() > 60) { |
| title2 = "Sample"; |
| sample = sample.substring(0, 60) + " ..."; |
| } |
| } |
| System.out.format("\t%s: %s" + CldrUtility.LINE_SEPARATOR, title2, sample); |
| System.out.println(); |
| } |
| |
| static int[] extraCJK = { |
| |
| 0x3006, // IDEOGRAPHIC CLOSING MARK;Lo |
| 0x302A, // IDEOGRAPHIC LEVEL TONE MARK;Mn |
| 0x302B, // IDEOGRAPHIC RISING TONE MARK;Mn |
| 0x302C, // IDEOGRAPHIC DEPARTING TONE MARK;Mn |
| 0x302D, // IDEOGRAPHIC ENTERING TONE MARK;Mn |
| 0x302E, // HANGUL SINGLE DOT TONE MARK;Mn |
| 0x302F, // HANGUL DOUBLE DOT TONE MARK;Mn |
| 0x3031, // VERTICAL KANA REPEAT MARK;Lm |
| 0x3032, // VERTICAL KANA REPEAT WITH VOICED SOUND MARK;Lm |
| 0x3033, // VERTICAL KANA REPEAT MARK UPPER HALF;Lm |
| 0x3034, // VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF;Lm |
| 0x3035, // VERTICAL KANA REPEAT MARK LOWER HALF;Lm |
| 0x303C, // MASU MARK;Lo |
| 0x3099, // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK;Mn |
| 0x309A, // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK;Mn |
| 0x309B, // KATAKANA-HIRAGANA VOICED SOUND MARK;Sk |
| 0x309C, // KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK;Sk |
| 0x30A0, // KATAKANA-HIRAGANA DOUBLE HYPHEN;Pd |
| 0x30FC, // KATAKANA-HIRAGANA PROLONGED SOUND MARK;Lm |
| 0xFF70, // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK;Lm |
| 0xFF9E, // HALFWIDTH KATAKANA VOICED SOUND MARK;Lm |
| 0xFF9F, // HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm |
| }; |
| |
| void checkCFK() { |
| // UnicodeSet Han, Hangul, Hiragana, Katakana, or Bopomofo |
| } |
| |
| private static void checkDistinguishing() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| Set<String> cldrFiles = cldrFactory.getAvailableLanguages(); |
| Set<String> distinguishing = new TreeSet<String>(); |
| Set<String> nondistinguishing = new TreeSet<String>(); |
| XPathParts parts = new XPathParts(); |
| for (Iterator<String> it = cldrFiles.iterator(); it.hasNext();) { |
| CLDRFile cldrFile = cldrFactory.make(it.next(), false); |
| DtdType dtdType = null; |
| if (cldrFile.isNonInheriting()) continue; |
| for (Iterator<String> it2 = cldrFile.iterator(); it2.hasNext();) { |
| String path = it2.next(); |
| if (dtdType == null) { |
| dtdType = DtdType.fromPath(path); |
| } |
| String fullPath = cldrFile.getFullXPath(path); |
| if (path.equals(fullPath)) continue; |
| parts.set(fullPath); |
| for (int i = 0; i < parts.size(); ++i) { |
| Map<String, String> m = parts.getAttributes(i); |
| if (m.size() == 0) continue; |
| String element = parts.getElement(i); |
| for (Iterator<String> mit = m.keySet().iterator(); mit.hasNext();) { |
| String attribute = mit.next(); |
| if (CLDRFile.isDistinguishing(dtdType, element, attribute)) { |
| distinguishing.add(attribute + "\tD\t" + element); |
| } else { |
| nondistinguishing.add(attribute + "\tN\t" + element); |
| } |
| } |
| } |
| } |
| } |
| System.out.println("Distinguishing"); |
| for (Iterator<String> it = distinguishing.iterator(); it.hasNext();) { |
| System.out.println(it.next()); |
| } |
| System.out.println(); |
| System.out.println("Non-Distinguishing"); |
| for (Iterator<String> it = nondistinguishing.iterator(); it.hasNext();) { |
| System.out.println(it.next()); |
| } |
| } |
| |
| private static void showEnglish() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| String requestedLocale = "en"; |
| CLDRFile cldrFile = cldrFactory.make(requestedLocale, true); |
| CLDRFile.Status status = new CLDRFile.Status(); |
| for (Iterator<String> it = cldrFile.iterator(); it.hasNext();) { |
| String requestedPath = it.next(); |
| String localeWhereFound = cldrFile.getSourceLocaleID(requestedPath, status); |
| if (!localeWhereFound.equals(requestedLocale) || !status.pathWhereFound.equals(requestedPath)) { |
| System.out.println("requested path:\t" + requestedPath |
| + "\tfound locale:\t" + localeWhereFound |
| + "\tsame?\t" + localeWhereFound.equals(requestedLocale) |
| + "\tfound path:\t" + status.pathWhereFound |
| + "\tsame?\t" + status.pathWhereFound.equals(requestedPath) |
| ); |
| } |
| } |
| } |
| |
| private static void checkPrivateUse() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| String requestedLocale = "en"; |
| CLDRFile cldrFile = cldrFactory.make(requestedLocale, true); |
| StandardCodes sc = StandardCodes.make(); |
| XPathParts parts = new XPathParts(); |
| Set<String> careAbout = new HashSet<String>(Arrays.asList(new String[] { "language", "script", "territory", "variant" })); |
| HashMap<String, Set<String>> foundItems = new HashMap<String, Set<String>>(); |
| TreeSet<String> problems = new TreeSet<String>(); |
| for (Iterator<String> it = cldrFile.iterator("", new UTF16.StringComparator(true, false, 0)); it.hasNext();) { |
| String requestedPath = it.next(); |
| parts.set(requestedPath); |
| String element = parts.getElement(-1); |
| if (!careAbout.contains(element)) continue; |
| String type = parts.getAttributeValue(-1, "type"); |
| if (type == null) continue; |
| Set<String> foundSet = (Set<String>) foundItems.get(element); |
| if (foundSet == null) foundItems.put(element, foundSet = new TreeSet<String>()); |
| foundSet.add(type); |
| |
| List<String> data = sc.getFullData(element, type); |
| if (data == null) { |
| problems.add("No RFC3066bis data for: " + element + "\t" + type + "\t" |
| + cldrFile.getStringValue(requestedPath)); |
| continue; |
| } |
| if (isPrivateOrDeprecated(data)) { |
| problems.add("Private/Deprecated Data for: " + element + "\t" + type + "\t" |
| + cldrFile.getStringValue(requestedPath) + "\t" + data); |
| } |
| // String canonical_value = (String)data.get(2); |
| } |
| for (Iterator<String> it = problems.iterator(); it.hasNext();) { |
| System.out.println(it.next()); |
| } |
| for (Iterator<String> it = careAbout.iterator(); it.hasNext();) { |
| String element = (String) it.next(); |
| Set<String> real = sc.getAvailableCodes(element); |
| Set<String> notFound = new TreeSet<String>(real); |
| notFound.removeAll((Set<String>) foundItems.get(element)); |
| for (Iterator<String> it2 = notFound.iterator(); it2.hasNext();) { |
| String type = it2.next(); |
| List<String> data = sc.getFullData(element, type); |
| if (isPrivateOrDeprecated(data)) continue; |
| System.out.println("Missing Translation for: " + element + "\t" + type + "\t" |
| + "\t" + data); |
| } |
| } |
| } |
| |
| static boolean isPrivateOrDeprecated(List<String> data) { |
| if (data.toString().indexOf("PRIVATE") >= 0) { |
| return true; |
| } |
| if ("PRIVATE USE".equals(data.get(0))) return true; |
| if (data.size() < 3) return false; |
| if (data.get(2) == null) return false; |
| if (data.get(2).toString().length() != 0) return true; |
| return false; |
| } |
| |
| static void testPopulous() { |
| Factory cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); |
| CLDRFile supp = cldrFactory.make("supplementalData", false); |
| CLDRFile temp = SimpleFactory.makeFile("supplemental"); |
| temp.setNonInheriting(true); |
| XPathParts parts = new XPathParts(null, null); |
| for (Iterator<String> it = supp.iterator(null, supp.getComparator()); it.hasNext();) { |
| String path = it.next(); |
| String value = supp.getStringValue(path); |
| String fullPath = supp.getFullXPath(path); |
| parts.set(fullPath); |
| // Map attributes = parts.getAttributes(-1); |
| String type = parts.getAttributeValue(-1, "type"); |
| String pop = (String) language_territory_hack_map.get(type); |
| if (pop != null) { |
| parts.putAttributeValue(-1, "mostPopulousTerritory", pop); |
| fullPath = parts.toString(); |
| } |
| temp.add(fullPath, value); |
| } |
| PrintWriter pw = new PrintWriter(System.out); |
| temp.write(pw); |
| pw.close(); |
| } |
| |
| private static final Map<String, String> language_territory_hack_map = new HashMap<String, String>(); |
| private static final String[][] language_territory_hack = { |
| { "af", "ZA" }, |
| { "am", "ET" }, |
| { "ar", "SA" }, |
| { "as", "IN" }, |
| { "ay", "PE" }, |
| { "az", "AZ" }, |
| { "bal", "PK" }, |
| { "be", "BY" }, |
| { "bg", "BG" }, |
| { "bn", "IN" }, |
| { "bs", "BA" }, |
| { "ca", "ES" }, |
| { "ch", "MP" }, |
| { "cpe", "SL" }, |
| { "cs", "CZ" }, |
| { "cy", "GB" }, |
| { "da", "DK" }, |
| { "de", "DE" }, |
| { "dv", "MV" }, |
| { "dz", "BT" }, |
| { "el", "GR" }, |
| { "en", "US" }, |
| { "es", "ES" }, |
| { "et", "EE" }, |
| { "eu", "ES" }, |
| { "fa", "IR" }, |
| { "fi", "FI" }, |
| { "fil", "PH" }, |
| { "fj", "FJ" }, |
| { "fo", "FO" }, |
| { "fr", "FR" }, |
| { "ga", "IE" }, |
| { "gd", "GB" }, |
| { "gl", "ES" }, |
| { "gn", "PY" }, |
| { "gu", "IN" }, |
| { "gv", "GB" }, |
| { "ha", "NG" }, |
| { "he", "IL" }, |
| { "hi", "IN" }, |
| { "ho", "PG" }, |
| { "hr", "HR" }, |
| { "ht", "HT" }, |
| { "hu", "HU" }, |
| { "hy", "AM" }, |
| { "id", "ID" }, |
| { "is", "IS" }, |
| { "it", "IT" }, |
| { "ja", "JP" }, |
| { "ka", "GE" }, |
| { "kk", "KZ" }, |
| { "kl", "GL" }, |
| { "km", "KH" }, |
| { "kn", "IN" }, |
| { "ko", "KR" }, |
| { "kok", "IN" }, |
| { "ks", "IN" }, |
| { "ku", "TR" }, |
| { "ky", "KG" }, |
| { "la", "VA" }, |
| { "lb", "LU" }, |
| { "ln", "CG" }, |
| { "lo", "LA" }, |
| { "lt", "LT" }, |
| { "lv", "LV" }, |
| { "mai", "IN" }, |
| { "men", "GN" }, |
| { "mg", "MG" }, |
| { "mh", "MH" }, |
| { "mk", "MK" }, |
| { "ml", "IN" }, |
| { "mn", "MN" }, |
| { "mni", "IN" }, |
| { "mo", "MD" }, |
| { "mr", "IN" }, |
| { "ms", "MY" }, |
| { "mt", "MT" }, |
| { "my", "MM" }, |
| { "na", "NR" }, |
| { "nb", "NO" }, |
| { "nd", "ZA" }, |
| { "ne", "NP" }, |
| { "niu", "NU" }, |
| { "nl", "NL" }, |
| { "nn", "NO" }, |
| { "no", "NO" }, |
| { "nr", "ZA" }, |
| { "nso", "ZA" }, |
| { "ny", "MW" }, |
| { "om", "KE" }, |
| { "or", "IN" }, |
| { "pa", "IN" }, |
| { "pau", "PW" }, |
| { "pl", "PL" }, |
| { "ps", "PK" }, |
| { "pt", "BR" }, |
| { "qu", "PE" }, |
| { "rn", "BI" }, |
| { "ro", "RO" }, |
| { "ru", "RU" }, |
| { "rw", "RW" }, |
| { "sd", "IN" }, |
| { "sg", "CF" }, |
| { "si", "LK" }, |
| { "sk", "SK" }, |
| { "sl", "SI" }, |
| { "sm", "WS" }, |
| { "so", "DJ" }, |
| { "sq", "CS" }, |
| { "sr", "CS" }, |
| { "ss", "ZA" }, |
| { "st", "ZA" }, |
| { "sv", "SE" }, |
| { "sw", "KE" }, |
| { "ta", "IN" }, |
| { "te", "IN" }, |
| { "tem", "SL" }, |
| { "tet", "TL" }, |
| { "th", "TH" }, |
| { "ti", "ET" }, |
| { "tg", "TJ" }, |
| { "tk", "TM" }, |
| { "tkl", "TK" }, |
| { "tvl", "TV" }, |
| { "tl", "PH" }, |
| { "tn", "ZA" }, |
| { "to", "TO" }, |
| { "tpi", "PG" }, |
| { "tr", "TR" }, |
| { "ts", "ZA" }, |
| { "uk", "UA" }, |
| { "ur", "IN" }, |
| { "uz", "UZ" }, |
| { "ve", "ZA" }, |
| { "vi", "VN" }, |
| { "wo", "SN" }, |
| { "xh", "ZA" }, |
| { "zh", "CN" }, |
| { "zh_Hant", "TW" }, |
| { "zu", "ZA" }, |
| { "aa", "ET" }, |
| { "byn", "ER" }, |
| { "eo", "DE" }, |
| { "gez", "ET" }, |
| { "haw", "US" }, |
| { "iu", "CA" }, |
| { "kw", "GB" }, |
| { "sa", "IN" }, |
| { "sh", "HR" }, |
| { "sid", "ET" }, |
| { "syr", "SY" }, |
| { "tig", "ER" }, |
| { "tt", "RU" }, |
| { "wal", "ET" }, }; |
| static { |
| for (int i = 0; i < language_territory_hack.length; ++i) { |
| language_territory_hack_map.put(language_territory_hack[i][0], language_territory_hack[i][1]); |
| } |
| } |
| |
| static class MyXSymbolTable extends UnicodeSet.XSymbolTable { |
| static VariantFolder caseFolder = new VariantFolder(new CaseVariantFolder()); |
| static VariantFolder canonicalFolder = new VariantFolder(new CanonicalFolder()); |
| static VariantFolder compatibilityFolder = new VariantFolder(new CompatibilityFolder()); |
| |
| public boolean applyPropertyAlias(String propertyName, String propertyValue, UnicodeSet result) { |
| if (propertyName.equalsIgnoreCase("close")) { |
| if (propertyValue.equalsIgnoreCase("case")) { |
| result.addAll(caseFolder.getClosure(result)); |
| } else if (propertyValue.equalsIgnoreCase("canonical")) { |
| result.addAll(canonicalFolder.getClosure(result)); |
| } else if (propertyValue.equalsIgnoreCase("compatibility")) { |
| result.addAll(compatibilityFolder.getClosure(result)); |
| } |
| return true; |
| } else if (propertyName.equalsIgnoreCase("reduce")) { |
| if (propertyValue.equalsIgnoreCase("case")) { |
| UnicodeSet temp = caseFolder.reduce(result); |
| result.clear().addAll(temp); |
| } else if (propertyValue.equalsIgnoreCase("canonical")) { |
| UnicodeSet temp = canonicalFolder.reduce(result); |
| result.clear().addAll(temp); |
| } else if (propertyValue.equalsIgnoreCase("compatibility")) { |
| UnicodeSet temp = compatibilityFolder.reduce(result); |
| result.clear().addAll(temp); |
| } |
| return true; |
| } else if (propertyName.equalsIgnoreCase("reduceCase")) { |
| UnicodeSet temp = caseFolder.reduce(new UnicodeSet(propertyValue.replace( |
| "·]", ":]"))); |
| result.clear().addAll(temp); |
| return true; |
| } else if (propertyName.equalsIgnoreCase("reduceCanonical")) { |
| UnicodeSet temp = canonicalFolder.reduce(new UnicodeSet(propertyValue.replace( |
| "·]", ":]"))); |
| result.clear().addAll(temp); |
| return true; |
| } else if (propertyName.equalsIgnoreCase("reduceCase")) { |
| UnicodeSet temp = caseFolder.reduce(new UnicodeSet(propertyValue.replace( |
| "·]", ":]"))); |
| result.clear().addAll(temp); |
| return true; |
| } |
| return false; |
| } |
| } |
| |
| } |