blob: d47cadcf5cd93b5bbada3df15d0d2ed3292f11a6 [file] [log] [blame]
package org.unicode.cldr.tool;
import java.util.Collection;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.unicode.cldr.util.Annotations;
import org.unicode.cldr.util.Annotations.AnnotationSet;
import org.unicode.cldr.util.Counter;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
public class MakeAnnotationHistogram {
private static final Integer ZERO = (Integer) 0;
public static void main(String[] args) {
AnnotationSet english = Annotations.getDataSet("en");
UnicodeSet codes = english.getExplicitValues().keySet();
Multimap<String,ULocale> missingCodeToLocales = TreeMultimap.create();
Map<String,Counter<Integer>> codeToCounter = new TreeMap<>();
int maxmax = 0;
for (String locale : Annotations.getAvailable()) {
ULocale ulocale = new ULocale(locale);
AnnotationSet annotationSet = Annotations.getDataSet(locale);
Counter<Integer> counter = new Counter<>();
int max = 0;
for (String code : codes) {
String name = annotationSet.getShortName(code);
if (name == null) {
missingCodeToLocales.put(code, ulocale);
continue;
}
int clusterCount = getCount(name, ulocale);
counter.add(clusterCount, 1);
max = Math.max(max, clusterCount);
Counter<Integer> counterForCode = codeToCounter.get(code);
if (counterForCode == null) {
codeToCounter.put(code, counterForCode = new Counter<>());
}
counterForCode.add(clusterCount,1);
}
System.out.print(locale + "\t" + ulocale.getDisplayName());
for (int i = 1; i <= max; ++i) {
System.out.print("\t" + emptyIfZero(counter.getCount(i)));
}
System.out.println();
if (maxmax < max) {
maxmax = max;
}
}
System.out.println("Missing");
for (Entry<String, Collection<ULocale>> entry : missingCodeToLocales.asMap().entrySet()) {
System.out.println(entry.getKey() + "\t" + entry.getValue());
}
System.out.println("CodeToGCs");
for (Entry<String, Counter<Integer>> entry : codeToCounter.entrySet()) {
String code = entry.getKey();
Counter<Integer> counter = entry.getValue();
System.out.print(code);
for (int i = 1; i <= maxmax; ++i) {
System.out.print("\t" + emptyIfZero(counter.getCount(i)));
}
System.out.println();
}
}
private static String emptyIfZero(long count) {
return count == 0 ? "" : String.valueOf(count);
}
private static int getCount(String name, ULocale locale) {
BreakIterator boundary = BreakIterator.getCharacterInstance(locale);
int count = 0;
boundary.setText(name);
int start = boundary.first();
for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
if (name.charAt(start) == ' ') {
continue;
}
++count;
}
return count;
}
}