tools/java/org/unicode/cldr/tool/ChartLanguageGroups.java - platform/external/cldr - Git at Google

 package org.unicode.cldr.tool;

 import java.io.IOException;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
 import java.util.TreeSet;

 import org.unicode.cldr.util.CLDRConfig;
 import org.unicode.cldr.util.CLDRFile;
 import org.unicode.cldr.util.CldrUtility;
 import org.unicode.cldr.util.Pair;
 import org.unicode.cldr.util.StandardCodes;
 import org.unicode.cldr.util.StandardCodes.LstrField;
 import org.unicode.cldr.util.StandardCodes.LstrType;

 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.ImmutableSet.Builder;
 import com.google.common.collect.Multimap;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.util.ULocale;

 public class ChartLanguageGroups extends Chart {

     private static final String SHOULD_NOT_BE_LEAF_NODE = "🍂";
     private static final String LEAF_NODES = "🍃";
     private static final String TREE_NODES = "🌲";

     public static void main(String[] args) {
         new ChartLanguageGroups().writeChart(null);
     }

     static final Set<String> COLLECTIONS;
     static {
         Map<String, Map<LstrField, String>> languages = StandardCodes.getEnumLstreg().get(LstrType.language);
         Builder<String> _collections = ImmutableSet.<String>builder();
         for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
             String scope = e.getValue().get(LstrField.Scope);
             if (scope != null
                 && "Collection".equalsIgnoreCase(scope)) {
                 _collections.add(e.getKey());
             }
         }
         COLLECTIONS = _collections.build();
     }

     @Override
     public String getDirectory() {
         return FormattedFileWriter.CHART_TARGET_DIR;
     }

     @Override
     public String getTitle() {
         return "Language Groups";
     }

     @Override
     public String getExplanation() {
         return "<p>This chart shows draft language groups based on data extracted from wikidata. "
             + "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>"
             + "<ul>\n"
             + "<li>A " + TREE_NODES
             + " indicates that the contained languages are tree nodes (contain other languages or langauge groups), "
             + "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n"
             + "<li>A " + LEAF_NODES
             + " indicates that the contained languages are leaf nodes (contain nothing).</li>\n"
             + "<li>A " + SHOULD_NOT_BE_LEAF_NODE
             + " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is "
             + "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n"
             + "</ul>\n"
             + "<p><b>Caveats:</b> Only the wikidata containment for "
             + "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used."
             + "The containment data is not complete: "
             + "if a language doesn't appear in the chart it could be an isolate, or just be missing data."
             + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n"
             ;
     }

     Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);

     @Override
     public void writeContents(FormattedFileWriter pw) throws IOException {

         Multimap<String, String> lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups();

         TablePrinter tablePrinter = new TablePrinter()
             .addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .setBreakSpans(true)
             .addColumn("Name", "class='source'", null, "class='source'", true)
             .addColumn("St.", "class='source'", null, "class='source'", true)
             .addColumn("Contained", "class='source'", null, "class='target'", true)
             .setBreakSpans(true);

         show(lg, "mul", tablePrinter);
         pw.write(tablePrinter.toTable());
     }

     private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) {
         Collection<String> children = lg.get(parent);
         if (children == null || children.isEmpty()) {
             return;
         }
         TreeSet<Pair<String,String>> nameAndCode = new TreeSet<>(new Comparator<Pair<String,String>>() {
             @Override
             public int compare(Pair<String, String> o1, Pair<String, String> o2) {
                 int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
                 if (diff != 0) {
                     return diff;
                 }
                 return o1.getSecond().compareTo(o2.getSecond());
             }

         });
         for (String lang : children) {
             nameAndCode.add(Pair.of(getLangName(lang), lang));
         }
         StringBuilder treeList = new StringBuilder();
         StringBuilder leafList = new StringBuilder();
         LinkedHashSet<Pair<String,String>> nameAndCodeWithChildren = new LinkedHashSet<>();
         for (Pair<String, String> pair : nameAndCode) {
             String code = pair.getSecond();
             if (lg.containsKey(code)) {
                 addChildren(treeList, TREE_NODES, pair, false);
                 nameAndCodeWithChildren.add(pair);
             } else if (!code.equals("und")){
                 addChildren(leafList, LEAF_NODES, pair, true);
             }
         }
         if (treeList.length() != 0) {
             addRow(parent, tablePrinter, TREE_NODES, treeList);
         }
         if (leafList.length() != 0) {
             addRow(parent, tablePrinter, LEAF_NODES, leafList);
         }

         for (Pair<String, String> pair : nameAndCodeWithChildren) {
             show(lg, pair.getSecond(), tablePrinter);
         }
     }

     private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) {
         tablePrinter.addRow()
         .addCell(parent)
         .addCell(getLangName(parent))
         .addCell(marker)
         .addCell(treeList.toString())
         .finishRow();
     }

     private void addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections) {
         if (treeList.length() != 0) {
             treeList.append("; ");
         }
         treeList.append(getPairName(pair, showCollections));
     }

     private String getPairName(Pair<String, String> pair, boolean showCollection) {
         return (showCollection && COLLECTIONS.contains(pair.getSecond())
             ? SHOULD_NOT_BE_LEAF_NODE + " ":
             "")
             + pair.getSecond() + " “" + pair.getFirst() + "”";
     }

     private String getLangName(String langCode) {
         return langCode.equals("mul") ? "All"
             : langCode.equals("zh") ? "Mandarin Chinese"
                 : ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", "");
     }
 }
	package org.unicode.cldr.tool;

	import java.io.IOException;
	import java.util.Collection;
	import java.util.Comparator;
	import java.util.LinkedHashSet;
	import java.util.Map;
	import java.util.Map.Entry;
	import java.util.Set;
	import java.util.TreeSet;

	import org.unicode.cldr.util.CLDRConfig;
	import org.unicode.cldr.util.CLDRFile;
	import org.unicode.cldr.util.CldrUtility;
	import org.unicode.cldr.util.Pair;
	import org.unicode.cldr.util.StandardCodes;
	import org.unicode.cldr.util.StandardCodes.LstrField;
	import org.unicode.cldr.util.StandardCodes.LstrType;

	import com.google.common.collect.ImmutableSet;
	import com.google.common.collect.ImmutableSet.Builder;
	import com.google.common.collect.Multimap;
	import com.ibm.icu.text.Collator;
	import com.ibm.icu.util.ULocale;

	public class ChartLanguageGroups extends Chart {

	private static final String SHOULD_NOT_BE_LEAF_NODE = "🍂";
	private static final String LEAF_NODES = "🍃";
	private static final String TREE_NODES = "🌲";

	public static void main(String[] args) {
	new ChartLanguageGroups().writeChart(null);
	}

	static final Set<String> COLLECTIONS;
	static {
	Map<String, Map<LstrField, String>> languages = StandardCodes.getEnumLstreg().get(LstrType.language);
	Builder<String> _collections = ImmutableSet.<String>builder();
	for (Entry<String, Map<LstrField, String>> e : languages.entrySet()) {
	String scope = e.getValue().get(LstrField.Scope);
	if (scope != null
	&& "Collection".equalsIgnoreCase(scope)) {
	_collections.add(e.getKey());
	}
	}
	COLLECTIONS = _collections.build();
	}

	@Override
	public String getDirectory() {
	return FormattedFileWriter.CHART_TARGET_DIR;
	}

	@Override
	public String getTitle() {
	return "Language Groups";
	}

	@Override
	public String getExplanation() {
	return "<p>This chart shows draft language groups based on data extracted from wikidata. "
	+ "The <b>Status</b> cell indicates the nature of the items in the adjacent <b>Contained</b> cell:<p>"
	+ "<ul>\n"
	+ "<li>A " + TREE_NODES
	+ " indicates that the contained languages are tree nodes (contain other languages or langauge groups), "
	+ "and will be listed further down in the chart in a <b>Language Group</b> cell.</li>\n"
	+ "<li>A " + LEAF_NODES
	+ " indicates that the contained languages are leaf nodes (contain nothing).</li>\n"
	+ "<li>A " + SHOULD_NOT_BE_LEAF_NODE
	+ " before an item <i>in</i> a <b>Contained</b> cell indicates a leaf node that shouldn’t be — that is, its ISO 639 Scope is "
	+ "<a href='http://www-01.sil.org/iso639-3/scope.asp#C' target='_blank'>Collection</a>.</li>\n"
	+ "</ul>\n"
	+ "<p><b>Caveats:</b> Only the wikidata containment for "
	+ "<a href='http://unicode.org/reports/tr35/#unicode_language_subtag'>valid language codes</a> is used."
	+ "The containment data is not complete: "
	+ "if a language doesn't appear in the chart it could be an isolate, or just be missing data."
	+ "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.</p>\n"
	;
	}

	Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH);

	@Override
	public void writeContents(FormattedFileWriter pw) throws IOException {

	Multimap<String, String> lg = CLDRConfig.getInstance().getSupplementalDataInfo().getLanguageGroups();

	TablePrinter tablePrinter = new TablePrinter()
	.addColumn("Language Group", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
	.setBreakSpans(true)
	.addColumn("Name", "class='source'", null, "class='source'", true)
	.addColumn("St.", "class='source'", null, "class='source'", true)
	.addColumn("Contained", "class='source'", null, "class='target'", true)
	.setBreakSpans(true);

	show(lg, "mul", tablePrinter);
	pw.write(tablePrinter.toTable());
	}

	private void show(Multimap<String, String> lg, String parent, TablePrinter tablePrinter) {
	Collection<String> children = lg.get(parent);
	if (children == null \|\| children.isEmpty()) {
	return;
	}
	TreeSet<Pair<String,String>> nameAndCode = new TreeSet<>(new Comparator<Pair<String,String>>() {
	@Override
	public int compare(Pair<String, String> o1, Pair<String, String> o2) {
	int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst());
	if (diff != 0) {
	return diff;
	}
	return o1.getSecond().compareTo(o2.getSecond());
	}

	});
	for (String lang : children) {
	nameAndCode.add(Pair.of(getLangName(lang), lang));
	}
	StringBuilder treeList = new StringBuilder();
	StringBuilder leafList = new StringBuilder();
	LinkedHashSet<Pair<String,String>> nameAndCodeWithChildren = new LinkedHashSet<>();
	for (Pair<String, String> pair : nameAndCode) {
	String code = pair.getSecond();
	if (lg.containsKey(code)) {
	addChildren(treeList, TREE_NODES, pair, false);
	nameAndCodeWithChildren.add(pair);
	} else if (!code.equals("und")){
	addChildren(leafList, LEAF_NODES, pair, true);
	}
	}
	if (treeList.length() != 0) {
	addRow(parent, tablePrinter, TREE_NODES, treeList);
	}
	if (leafList.length() != 0) {
	addRow(parent, tablePrinter, LEAF_NODES, leafList);
	}

	for (Pair<String, String> pair : nameAndCodeWithChildren) {
	show(lg, pair.getSecond(), tablePrinter);
	}
	}

	private void addRow(String parent, TablePrinter tablePrinter, String marker, StringBuilder treeList) {
	tablePrinter.addRow()
	.addCell(parent)
	.addCell(getLangName(parent))
	.addCell(marker)
	.addCell(treeList.toString())
	.finishRow();
	}

	private void addChildren(StringBuilder treeList, String marker, Pair<String, String> pair, boolean showCollections) {
	if (treeList.length() != 0) {
	treeList.append("; ");
	}
	treeList.append(getPairName(pair, showCollections));
	}

	private String getPairName(Pair<String, String> pair, boolean showCollection) {
	return (showCollection && COLLECTIONS.contains(pair.getSecond())
	? SHOULD_NOT_BE_LEAF_NODE + " ":
	"")
	+ pair.getSecond() + " “" + pair.getFirst() + "”";
	}

	private String getLangName(String langCode) {
	return langCode.equals("mul") ? "All"
	: langCode.equals("zh") ? "Mandarin Chinese"
	: ENGLISH.getName(CLDRFile.LANGUAGE_NAME, langCode).replace(" (Other)", "").replace(" languages", "");
	}
	}