blob: 73814c95a95963adb13cd673b8d5bc0f275375eb [file] [log] [blame]
package org.unicode.cldr.draft;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.unicode.cldr.tool.ToolConfig;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.Organization;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
import org.unicode.cldr.util.With;
import org.unicode.cldr.util.XPathParts;
import com.ibm.icu.impl.Relation;
public class ExtractCountItems {
CLDRConfig testInfo = ToolConfig.getToolInstance();
Factory factory = testInfo.getCldrFactory();
static class SampleData {
EnumMap<Count, String> countToString = new EnumMap<Count, String>(Count.class);
Relation<String, Count> stringToCount = Relation.of(new HashMap<String, Set<Count>>(), HashSet.class);
String basePath;
public SampleData(String basePath) {
this.basePath = basePath;
}
public int put(Count count, String value) {
countToString.put(count, value);
stringToCount.put(value, count);
return stringToCount.size();
}
public String toString() {
return countToString.toString();
}
public Set<Entry<Count, String>> entrySet() {
return countToString.entrySet();
}
}
XPathParts parts = new XPathParts();
Map<String, SampleData> samples = new LinkedHashMap<String, SampleData>();
public static void main(String[] args) {
new ExtractCountItems().gatherData();
}
void gatherData() {
Set<String> singletonLanguages = new LinkedHashSet<String>();
Map<String, Map<String, SampleData>> defectiveLocales = new LinkedHashMap<String, Map<String, SampleData>>();
for (String locale : factory.getAvailableLanguages()) {
Map<String, Level> locale_status = StandardCodes.make().getLocaleToLevel(Organization.google);
if (locale_status == null) continue;
Level level = locale_status.get(locale);
if (level == null) continue;
if (level.compareTo(Level.BASIC) <= 0) continue;
Set<String> pluralLocales = testInfo.getSupplementalDataInfo().getPluralLocales();
if (!pluralLocales.contains(locale)) {
continue;
}
PluralInfo pluralInfo = testInfo.getSupplementalDataInfo().getPlurals(locale);
Set<String> keywords = pluralInfo.getCanonicalKeywords();
int keywordCount = keywords.size();
if (keywordCount == 1) {
singletonLanguages.add(locale);
continue;
}
CLDRFile cldr = factory.make(locale, true);
Map<String, SampleData> data = new LinkedHashMap<String, SampleData>();
SampleData sampleData = getSamples(cldr, keywordCount, "//ldml/units/unit", data);
if (sampleData == null) {
// try currencies
sampleData = getSamples(cldr, keywordCount, "//ldml/numbers/currencies/currency", data);
}
if (sampleData == null) {
defectiveLocales.put(locale, data);
continue;
}
System.out.println("#" + locale + "\t" + sampleData.basePath);
for (Entry<Count, String> entry : sampleData.entrySet()) {
System.out.println(locale + "\t" + entry.getKey() + "\t<" + entry.getValue() + ">");
}
}
System.out.println("");
System.out.println("\tLanguages with one plural category: " + singletonLanguages);
System.out.println("\tLanguages with only defective samples: ");
for (Entry<String, Map<String, SampleData>> entry : defectiveLocales.entrySet()) {
String locale = entry.getKey();
System.out.println(locale);
PluralInfo pluralInfo = testInfo.getSupplementalDataInfo().getPlurals(locale);
Set<String> keywords = pluralInfo.getCanonicalKeywords();
EnumSet<Count> realKeywords = EnumSet.noneOf(Count.class);
for (String s : keywords) {
realKeywords.add(Count.valueOf(s));
}
Set<Pair<Count, Count>> missingPairs = new HashSet<Pair<Count, Count>>();
for (Count i : realKeywords) {
for (Count j : realKeywords) {
if (i.compareTo(j) >= 0) {
continue;
}
missingPairs.add(new Pair<Count, Count>(i, j));
}
}
showMinimalPairs(missingPairs, entry.getValue());
}
}
private void showMinimalPairs(Set<Pair<Count, Count>> missingPairs, Map<String, SampleData> pathToSamples) {
for (Entry<String, SampleData> entry : pathToSamples.entrySet()) {
SampleData samples = entry.getValue();
for (Iterator<Pair<Count, Count>> it = missingPairs.iterator(); it.hasNext();) {
Pair<Count, Count> missing = it.next();
Count first = missing.getFirst();
Count second = missing.getSecond();
String s1 = samples.countToString.get(first);
String s2 = samples.countToString.get(second);
if (s1 != null && s2 != null && !s1.equals(s2)) {
// have match, display and remove.
System.out.println(missing + "\t" + samples.basePath + "\t<" + s1 + ">\t<" + s2 + ">");
it.remove();
}
}
}
System.out.println("Missing minimal pairs: " + missingPairs);
}
SampleData getSamples(CLDRFile cldr, int keywordCount, String prefix, Map<String, SampleData> data) {
for (String path : With.in(cldr.iterator(prefix, cldr.getComparator()))) {
if (!path.contains("@count")) {
continue;
}
if (path.contains("//ldml/numbers/decimalFormats") || path.contains("//ldml/numbers/currencyFormats")
|| path.contains("@alt=\"short\"")) {
continue;
}
String value = cldr.getStringValue(path).toLowerCase(Locale.ENGLISH);
// get the path without the count = basepath
// System.out.println("\t# " + cldr.getLocaleID() + "\t" + value + "\t" + path);
parts.set(path);
Count count = PluralInfo.Count.valueOf(parts.getAttributeValue(-1, "count"));
parts.setAttribute(-1, "count", null);
String basePath = parts.toString();
SampleData sampleData = data.get(basePath);
if (sampleData == null) {
data.put(basePath, sampleData = new SampleData(basePath));
}
int items = sampleData.put(count, value);
if (items == keywordCount) {
samples.put(cldr.getLocaleID(), sampleData);
return sampleData;
}
}
return null;
}
}