blob: d3dfaefcd1841f05ccae3ec69356858ccdf0a022 [file] [log] [blame]
package org.unicode.cldr.tool;
import com.google.common.base.Joiner;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
import com.ibm.icu.text.DecimalFormat;
import com.ibm.icu.text.MessageFormat;
import com.ibm.icu.text.PluralRules;
import com.ibm.icu.text.PluralRules.SampleType;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.test.ExampleGenerator;
import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.FileCopier;
import org.unicode.cldr.util.GrammarInfo;
import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
import org.unicode.cldr.util.ICUServiceBuilder;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.PathHeader;
import org.unicode.cldr.util.Rational;
import org.unicode.cldr.util.Rational.FormatStyle;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
import org.unicode.cldr.util.UnitConverter;
import org.unicode.cldr.util.UnitConverter.ConversionInfo;
import org.unicode.cldr.util.UnitConverter.PlaceholderLocation;
import org.unicode.cldr.util.UnitConverter.UnitId;
import org.unicode.cldr.util.UnitPathType;
import org.unicode.cldr.util.Validity;
import org.unicode.cldr.util.XPathParts;
/** Chart the grammatical forms, with unit examples */
public class ChartGrammaticalForms extends Chart {
private static final String FORMATTED_SAMPLE = "Formatted Sample";
private static final String INFO_ON_FEATURES =
"Current information is only for nominal forms. "
+ "Where a Usage is present other than “general”, that means that a subset of the grammatical features are relevant to that Usage. "
+ "For example, Feature=grammaticalGender and Usage=units might omit an ‘animate’ gender. "
+ "For the meanings of the values, see "
+ "<a target='spec' href='https://unicode.org/reports/tr35/tr35-general.html#Grammatical_Features'>LDML Grammatical Features</a>.";
private static final String MAIN_HEADER = "<h2>Grammatical Forms</h2>";
private static final boolean DEBUG = false;
private static final String DIR = CLDRPaths.CHART_DIRECTORY + "grammar/";
public static final PluralRules ENGLISH_PLURAL_RULES = SDI.getPlurals("en").getPluralRules();
public static void main(String[] args) {
new ChartGrammaticalForms().writeChart(null);
}
@Override
public String getDirectory() {
return DIR;
}
@Override
public String getTitle() {
return "Grammatical Forms Charts";
}
@Override
public String getFileName() {
return "index";
}
@Override
public String getExplanation() {
return MAIN_HEADER
+ "<p>In this version a preliminary set of languages have additional grammatical information, as listed below.<p>";
}
@Override
public void writeContents(FormattedFileWriter pw) throws IOException {
FileCopier.ensureDirectoryExists(DIR);
FileCopier.copy(Chart.class, "index.css", DIR);
FormattedFileWriter.copyIncludeHtmls(DIR);
FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
writeSubcharts(anchors);
pw.setIndex("Main Chart Index", "../index.html");
pw.write(anchors.toString());
showInfo(pw);
}
private void showInfo(FormattedFileWriter pw) throws IOException {
pw.append("<h2>Grammatical Features Info</h2>");
pw.append(
"<p>The following lists the available information about grammatical features for locales. "
+ "Note that only the above locales have localized data, at this time. "
+ INFO_ON_FEATURES
+ "</p>");
if (GrammaticalTarget.values().length > 1) {
throw new IllegalArgumentException(
"Needs adjustment for additional GrammaticalTarget.values()");
}
System.out.println(SDI.hasGrammarInfo());
TablePrinter tablePrinter = getFormattedGrammarInfo(SDI.hasGrammarInfo());
pw.append(tablePrinter.toString());
}
private TablePrinter getFormattedGrammarInfo(Set<String> localeIds) {
TablePrinter tablePrinter =
new TablePrinter()
.addColumn(
"Locale", "class='source' width='1%'", null, "class='source'", true)
.setSortPriority(0)
.setBreakSpans(true)
.addColumn(
"ID",
"class='source' width='1%'",
CldrUtility.getDoubleLinkMsg(),
"class='source'",
true)
.setBreakSpans(true)
.addColumn(
"Feature",
"class='source' width='1%'",
null,
"class='source'",
true)
.setSortPriority(1)
.setBreakSpans(true)
.addColumn("Usage", "class='source'", null, "class='source'", true)
.addColumn("Values", "class='source'", null, "class='source'", true);
for (String localeId : localeIds) {
if (localeId.equals("fi")) {
int debug = 0;
}
Set<String> failures = new LinkedHashSet<>();
GrammarInfo grammarInfo = SDI.getGrammarInfo(localeId, false);
String localeName = CONFIG.getEnglish().getName(localeId);
for (GrammaticalFeature feature : GrammaticalFeature.values()) {
Map<GrammaticalScope, Set<String>> scopeToValues =
grammarInfo.get(GrammaticalTarget.nominal, feature);
if (scopeToValues.isEmpty()) {
continue;
}
Set<String> values = null;
boolean multiline = false;
for (Entry<GrammaticalScope, Set<String>> entry : scopeToValues.entrySet()) {
if (values == null) {
values = entry.getValue();
} else if (!values.equals(entry.getValue())) {
multiline = true;
break;
}
}
Set<String> sortedValues = new TreeSet(feature.getValueComparator());
if (multiline) {
for (GrammaticalScope usage : GrammaticalScope.values()) {
values = scopeToValues.get(usage);
if (values == null || values.isEmpty()) {
continue;
}
sortedValues.clear();
sortedValues.addAll(values);
addRow(
tablePrinter,
localeName,
localeId,
feature,
usage.toString(),
Joiner.on(", ").join(sortedValues));
}
} else {
try {
sortedValues.addAll(values);
addRow(
tablePrinter,
localeName,
localeId,
feature,
Joiner.on(", ").join(scopeToValues.keySet()),
Joiner.on(", ").join(sortedValues));
} catch (Exception e) {
failures.add(e.getMessage());
}
}
}
if (!failures.isEmpty()) {
System.out.println("# Failures, " + localeId + "\t" + failures);
}
}
return tablePrinter;
}
public void addRow(
TablePrinter tablePrinter,
String locale,
String id,
GrammaticalFeature feature,
String usage,
final String valueString) {
tablePrinter
.addRow()
.addCell(locale)
.addCell(id)
.addCell(feature)
.addCell(usage)
.addCell(valueString)
.finishRow();
}
static final UnitConverter uc = SDI.getUnitConverter();
static final Map<String, Map<Rational, String>> BASE_TO_FACTOR_TO_UNIT;
static {
Map<String, Map<Rational, String>> _BASE_TO_BEST = new TreeMap<>();
ImmutableSet<String> skip = ImmutableSet.of("mile-scandinavian", "100-kilometer", "dunam");
Output<String> baseOut = new Output<>();
for (String longUnit :
Validity.getInstance()
.getStatusToCodes(LstrType.unit)
.get(Validity.Status.regular)) {
String shortUnit = uc.getShortId(longUnit);
System.out.println(shortUnit);
if (skip.contains(shortUnit)) {
continue;
}
if ("mile-per-gallon".equals(shortUnit)) {
int debug = 0;
}
// Set<String> systems = uc.getSystems(unit);
ConversionInfo info = uc.parseUnitId(shortUnit, baseOut, false);
if (info == null) {
continue;
}
Map<Rational, String> factorToUnit = _BASE_TO_BEST.get(baseOut.value);
if (factorToUnit == null) {
_BASE_TO_BEST.put(baseOut.value, factorToUnit = new TreeMap<>());
factorToUnit.put(Rational.ONE, baseOut.value);
}
if (!info.factor.isPowerOfTen()) {
continue;
}
String old = factorToUnit.get(info.factor);
if (old == null || old.length() > shortUnit.length()) {
factorToUnit.put(info.factor, shortUnit);
}
}
BASE_TO_FACTOR_TO_UNIT = CldrUtility.protectCollection(_BASE_TO_BEST);
for (Entry<String, Map<Rational, String>> entry : BASE_TO_FACTOR_TO_UNIT.entrySet()) {
System.out.println(entry);
}
}
class BestUnitForGender implements Comparable<BestUnitForGender> {
final boolean durationOrLength; // true is better
final boolean metric; // true is better
final double distanceFromOne; // zero is better
final String quantity;
final String shortUnit;
public BestUnitForGender(
String shortUnit, String quantity, Collection<String> systems, double baseSize) {
super();
this.shortUnit = shortUnit;
this.quantity = quantity;
this.durationOrLength = quantity.equals("duration") || quantity.equals("length");
this.metric = systems.contains("metric");
this.distanceFromOne = Math.abs(Math.log(baseSize));
}
@Override
public int compareTo(BestUnitForGender o) {
// negation, because we want the best one first
return ComparisonChain.start()
.compare(o.durationOrLength, durationOrLength)
.compare(o.metric, metric)
.compare(quantity, o.quantity)
.compare(distanceFromOne, o.distanceFromOne)
.compare(shortUnit, o.shortUnit)
.result();
}
@Override
public int hashCode() {
return shortUnit.hashCode();
}
@Override
public boolean equals(Object obj) {
return compareTo((BestUnitForGender) obj) == 0;
}
@Override
public String toString() {
return shortUnit
+ "("
+ (durationOrLength ? "D" : "")
+ (metric ? "M" : "")
+ ":"
+ quantity
+ ":"
+ Math.round(distanceFromOne * 10)
+ ")";
}
}
public class TablePrinterWithHeader {
final String header;
final TablePrinter tablePrinter;
public TablePrinterWithHeader(String header, TablePrinter tablePrinter) {
this.header = header;
this.tablePrinter = tablePrinter;
}
}
public void writeSubcharts(Anchors anchors) throws IOException {
Set<String> locales = GrammarInfo.getGrammarLocales();
LanguageTagParser ltp = new LanguageTagParser();
// ImmutableSet<String> casesNominativeOnly =
// ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null));
Factory factory = CLDRConfig.getInstance().getCldrFactory();
Comparator<String> caseOrder = GrammarInfo.CaseValues.COMPARATOR;
Set<String> sortedCases = new TreeSet<>(caseOrder);
Comparator<String> genderOrder = GrammarInfo.GenderValues.COMPARATOR;
Set<String> sortedGenders = new TreeSet<>(genderOrder);
Output<Double> sizeInBaseUnits = new Output<>();
// collect the "best unit ordering"
Map<String, BestUnitForGender> unitToBestUnit = new TreeMap<>();
Set<String> rawUnitsToAddGrammar = GrammarInfo.getUnitsToAddGrammar();
for (String longUnit : rawUnitsToAddGrammar) {
final String shortUnit = uc.getShortId(longUnit);
if (shortUnit.equals("generic")) {
continue;
}
String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits);
String quantity =
shortUnit.contentEquals("generic")
? "temperature"
: uc.getQuantityFromUnit(shortUnit, false);
Set<String> systems = uc.getSystems(shortUnit);
unitToBestUnit.put(
shortUnit,
new BestUnitForGender(shortUnit, quantity, systems, sizeInBaseUnits.value));
}
unitToBestUnit = ImmutableMap.copyOf(unitToBestUnit);
// quick check
// final BestUnitForGender u1 = unitToBestUnit.get("meter");
// final BestUnitForGender u2 = unitToBestUnit.get("square-centimeter");
// int comp = u1.compareTo(u2); // should be less
Set<BestUnitForGender> sorted2 = new TreeSet<>(unitToBestUnit.values());
System.out.println(sorted2);
PlaceholderLocation placeholderPosition = PlaceholderLocation.missing;
Matcher placeholderMatcher = UnitConverter.PLACEHOLDER.matcher("");
Output<String> unitPatternOut = new Output<>();
for (String locale : locales) {
if (locale.equals("root")) {
continue;
}
ltp.set(locale);
String region = ltp.getRegion();
if (!region.isEmpty()) {
continue;
}
GrammarInfo grammarInfo = SDI.getGrammarInfo(locale, true);
if (grammarInfo == null || !grammarInfo.hasInfo(GrammaticalTarget.nominal)) {
continue;
}
CLDRFile cldrFile = factory.make(locale, true);
{
Collection<String> genders =
grammarInfo.get(
GrammaticalTarget.nominal,
GrammaticalFeature.grammaticalGender,
GrammaticalScope.units);
sortedGenders.clear();
sortedGenders.addAll(genders);
}
{
Collection<String> rawCases =
grammarInfo.get(
GrammaticalTarget.nominal,
GrammaticalFeature.grammaticalCase,
GrammaticalScope.units);
if (rawCases.isEmpty()) {
rawCases = ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null));
}
sortedCases.clear();
sortedCases.addAll(rawCases);
}
if (sortedCases.size() <= 1 && sortedGenders.size() <= 1) {
continue;
}
// Collection<String> nomCases = rawCases.isEmpty() ? casesNominativeOnly : rawCases;
PluralInfo plurals = SDI.getPlurals(PluralType.cardinal, locale);
if (plurals == null) {
System.err.println("No " + PluralType.cardinal + " plurals for " + locale);
}
Collection<Count> adjustedPlurals = plurals.getAdjustedCounts();
ICUServiceBuilder isb = ICUServiceBuilder.forLocale(CLDRLocale.getInstance(locale));
DecimalFormat decFormat = isb.getNumberFormat(1);
Map<String, TablePrinterWithHeader> info = new LinkedHashMap<>();
TablePrinter tablePrinter = getFormattedGrammarInfo(Collections.singleton(locale));
info.put(
"Grammatical Features",
new TablePrinterWithHeader(
"<p>The following lists the available information about grammatical features for this locale. "
+ INFO_ON_FEATURES
+ "</p>",
tablePrinter));
// because some locales have more units with grammar, get the additional ones. Also grab
// the minimal pairs
Set<String> unitsToAddGrammar = new TreeSet<>(rawUnitsToAddGrammar);
Map<PathHeader, String> minimalInfo = new TreeMap<>();
PathHeader.Factory phf = PathHeader.getFactory();
for (String path : cldrFile) {
if (!path.startsWith("//ldml/units/unitLength[@type=\"long\"]/unit")) {
if (path.startsWith("//ldml/numbers/minimalPairs/")) {
if (!path.contains("ordinal")) {
minimalInfo.put(
phf.fromPath(path), cldrFile.getStringValueWithBailey(path));
}
}
continue;
}
XPathParts parts = XPathParts.getFrozenInstance(path);
String foundUnit = parts.getAttributeValue(3, "type");
if (unitsToAddGrammar.contains(foundUnit)) {
continue;
}
// ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/gender
// ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/unitPattern[@count="one"][@case="accusative"]
switch (parts.getElement(-1)) {
case "gender":
unitsToAddGrammar.add(foundUnit);
break;
case "unitPattern":
if (parts.getAttributeValue(4, "case") != null) {
unitsToAddGrammar.add(foundUnit);
}
break;
}
}
TablePrinter minimalPrinter =
new TablePrinter()
.addColumn(
"Type",
"class='source' width='1%'",
CldrUtility.getDoubleLinkMsg(),
"class='source'",
true)
.setRepeatHeader(true)
.addColumn(
"Size",
"class='source' width='1%'",
null,
"class='source'",
true)
.setSortPriority(0)
.setHidden(true)
.setBreakSpans(true)
.addColumn(
"Code",
"class='source' width='1%'",
null,
"class='source'",
true)
.addColumn("Pattern", "class='source'", null, "class='target'", true)
.addColumn(
"Formatted Sample",
"class='source'",
null,
"class='target'",
true);
int counter = 0;
ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, CONFIG.getEnglish());
for (Entry<PathHeader, String> entry : minimalInfo.entrySet()) {
PathHeader pathHeader = entry.getKey();
String value = entry.getValue();
minimalPrinter
.addRow()
.addCell(pathHeader.getHeader())
.addCell(counter++)
.addCell(pathHeader.getCode())
.addCell(value)
.addCell(
exampleGenerator.getExampleHtml(
pathHeader.getOriginalPath(), value));
// finish the row
minimalPrinter.finishRow();
}
info.put(
"Minimal Pairs",
new TablePrinterWithHeader(
"<p>This table has the minimal pairs used to test the appropriateness of different values.</p>\n",
minimalPrinter));
final PluralRules pluralRules = plurals.getPluralRules();
// set up the table and add the headers
TablePrinter caseTablePrinter =
new TablePrinter()
.addColumn(
"Quantity",
"class='source' width='1%'",
null,
"class='source'",
true)
.setSortPriority(0)
.setRepeatHeader(true)
.addColumn(
"Size",
"class='source' width='1%'",
null,
"class='source'",
true)
.setSortPriority(1)
.setHidden(true)
.addColumn(
"Unit",
"class='source' width='1%'",
CldrUtility.getDoubleLinkMsg(),
"class='source'",
true)
.setSortPriority(2)
.setBreakSpans(true);
if (sortedGenders.size() > 1) {
caseTablePrinter
.addColumn(
"Gender", "class='source' width='1%'", null, "class='source'", true)
.addColumn(
"Gender MP + unit", "class='target'", null, "class='source'", true);
}
if (sortedCases.size() > 1) {
caseTablePrinter.addColumn(
"Case", "class='source' width='1%'", null, "class='source'", true);
// double width = ((int) ((99.0 / (adjustedPlurals.size()*2 + 1)) * 1000)) / 1000.0;
// String widthStringTarget = "class='target' width='" + width + "%'";
String widthStringTarget = "class='target'";
addTwoColumns(
caseTablePrinter, widthStringTarget, adjustedPlurals, pluralRules, true);
}
// now get the case and/or gender items
// also gather info on the "best power units"
for (String longUnit : unitsToAddGrammar) {
final String shortUnit = uc.getShortId(longUnit);
String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits);
String quantity =
shortUnit.contentEquals("generic")
? "temperature"
: uc.getQuantityFromUnit(shortUnit, false);
String genderFormatted = "n/a";
String gender = "n/a";
if (sortedGenders.size() > 1) {
gender =
UnitPathType.gender.getTrans(
cldrFile, "long", shortUnit, null, null, null, null);
if (gender == null) {
gender = "n/a";
} else {
String genderMinimalPair =
cldrFile.getStringValue(
"//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\""
+ gender
+ "\"]");
if (genderMinimalPair != null) {
Count bestCount =
adjustedPlurals.contains(Count.one) ? Count.one : Count.other;
String unitPattern =
cldrFile.getStringValueWithBailey(
"//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
+ longUnit
+ "\"]/unitPattern"
+ GrammarInfo.getGrammaticalInfoAttributes(
grammarInfo,
UnitPathType.unit,
bestCount.toString(),
null,
"nominative"));
String unit =
unitPattern.replace("\u00A0", "").replace("{0}", "").trim();
genderFormatted = MessageFormat.format(genderMinimalPair, unit);
}
}
}
if (sortedCases.size() <= 1) {
caseTablePrinter
.addRow()
.addCell(quantity)
.addCell(sizeInBaseUnits.value)
.addCell(unitCell)
.addCell(gender)
.addCell(genderFormatted);
// finish the row
caseTablePrinter.finishRow();
} else {
// Set<String> systems = uc.getSystems(shortUnit);
if (unitCell == null
|| quantity == null
|| gender == null
|| sizeInBaseUnits.value == null) {
throw new IllegalArgumentException("No best base unit for: " + shortUnit);
}
for (String case1 : sortedCases) { //
// start a row, then add the cells in the row.
caseTablePrinter
.addRow()
.addCell(quantity)
.addCell(sizeInBaseUnits.value)
.addCell(unitCell);
if (sortedGenders.size() > 1) {
caseTablePrinter.addCell(gender).addCell(genderFormatted);
}
caseTablePrinter.addCell(case1);
for (Count plural : adjustedPlurals) {
Double sample = getBestSample(pluralRules, plural);
// <caseMinimalPairs case="nominative">{0} kostet
// €3,50.</caseMinimalPairs>
String unitPattern =
cldrFile.getStringValueWithBailey(
"//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
+ longUnit
+ "\"]/unitPattern"
+ GrammarInfo.getGrammaticalInfoAttributes(
grammarInfo,
UnitPathType.unit,
plural.toString(),
null,
case1));
unitPattern = unitPattern.replace("\u00A0", " ");
caseTablePrinter.addCell(unitPattern);
String numberPlusUnit =
MessageFormat.format(unitPattern, decFormat.format(sample));
String caseMinimalPair =
cldrFile.getStringValue(
"//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\""
+ case1
+ "\"]");
String withContext =
caseMinimalPair == null
? numberPlusUnit
: MessageFormat.format(caseMinimalPair, numberPlusUnit);
caseTablePrinter.addCell(withContext);
}
// finish the row
caseTablePrinter.finishRow();
}
}
}
info.put(
"Unit Case and Gender Info",
new TablePrinterWithHeader(
"<p>This table has rows contains unit forms appropriate for different grammatical cases and plural forms. "
+ "Each plural form has a sample value such as <i>(1.2)</i> or <i>(2)</i>. "
+ "That value is used with the localized unit pattern to form a formatted measure, such as “2,0 Stunden”. "
+ "That formatted measure is in turn substituted into a "
+ "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair pattern</a> to get the "
+ FORMATTED_SAMPLE
+ "</b>. "
+ "The <b>Gender</b> column is informative; it just supplies the supplied gender for the unit.</p>\n"
+ "<ul><li>For clarity, conversion values are supplied for non-metric units. "
+ "For more information, see <a target='unit_conversions' href='../supplemental/unit_conversions.html'>Unit Conversions</a>.</li>"
+ "</ul>\n",
caseTablePrinter));
// get best units for gender.
Multimap<String, BestUnitForGender> bestUnitForGender = TreeMultimap.create();
for (String longUnit : unitsToAddGrammar) {
final String shortUnit = uc.getShortId(longUnit);
String gender =
UnitPathType.gender.getTrans(
cldrFile, "long", shortUnit, null, null, null, null);
final BestUnitForGender bestUnit = unitToBestUnit.get(shortUnit);
if (gender != null && bestUnit != null) {
bestUnitForGender.put(gender, bestUnit);
}
}
for (Entry<String, Collection<BestUnitForGender>> entry :
bestUnitForGender.asMap().entrySet()) {
List<String> items =
entry.getValue().stream()
.map(x -> x.shortUnit)
.collect(Collectors.toList());
System.out.println(locale + "\t" + entry.getKey() + "\t" + items);
}
TablePrinter powerTable =
new TablePrinter()
.addColumn(
"Unit",
"class='source' width='1%'",
CldrUtility.getDoubleLinkMsg(),
"class='source'",
true)
.setSortPriority(2)
.setRepeatHeader(true)
.addColumn(
"Case",
"class='source' width='1%'",
null,
"class='source'",
true)
.addColumn(
"Gender",
"class='source' width='1%'",
null,
"class='source'",
true);
double width = ((int) ((99.0 / (adjustedPlurals.size() * 2 + 1)) * 1000)) / 1000.0;
String widthStringTarget = "class='target' width='" + width + "%'";
addTwoColumns(powerTable, widthStringTarget, adjustedPlurals, pluralRules, false);
// now get the items
for (String power : Arrays.asList("power2", "power3")) {
String unitCell = power;
for (String gender : sortedGenders) {
Collection<BestUnitForGender> bestUnits = bestUnitForGender.get(gender);
String bestUnit = null;
if (!bestUnits.isEmpty()) {
bestUnit = bestUnits.iterator().next().shortUnit;
}
for (String case1 : sortedCases) { //
// start a row, then add the cells in the row.
powerTable
.addRow()
.addCell(unitCell)
.addCell(case1)
.addCell(gender + (bestUnit == null ? "" : "\n(" + bestUnit + ")"));
for (Count plural : adjustedPlurals) {
String localizedPowerPattern =
UnitPathType.power.getTrans(
cldrFile,
"long",
power,
plural.toString(),
case1,
gender,
null);
localizedPowerPattern = localizedPowerPattern.replace("\u00A0", " ");
powerTable.addCell(localizedPowerPattern);
if (bestUnit == null) {
powerTable.addCell("n/a");
} else {
Double samplePlural = getBestSample(pluralRules, plural);
String localizedUnitPattern =
UnitPathType.unit.getTrans(
cldrFile,
"long",
bestUnit,
plural.toString(),
case1,
gender,
null);
placeholderPosition =
UnitConverter.extractUnit(
placeholderMatcher,
localizedUnitPattern,
unitPatternOut);
if (placeholderPosition != PlaceholderLocation.middle) {
localizedUnitPattern = unitPatternOut.value;
localizedUnitPattern =
localizedUnitPattern.replace("\u00A0", " ");
String placeholderPattern =
placeholderPosition == PlaceholderLocation.missing
? localizedUnitPattern
: placeholderMatcher.group();
String combined;
try {
combined =
UnitConverter.combineLowercasing(
new ULocale(locale),
"long",
localizedPowerPattern,
localizedUnitPattern);
} catch (Exception e) {
throw new IllegalArgumentException(
locale
+ ") Can't combine "
+ "localizedPowerPattern=«"
+ localizedPowerPattern
+ "» with localizedUnitPattern=«"
+ localizedUnitPattern
+ "»");
}
String combinedWithPlaceholder =
UnitConverter.addPlaceholder(
combined,
placeholderPattern,
placeholderPosition);
String sample =
MessageFormat.format(
combinedWithPlaceholder,
decFormat.format(samplePlural));
String caseMinimalPair =
cldrFile.getStringValue(
"//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\""
+ case1
+ "\"]");
String withContext =
caseMinimalPair == null
? sample
: MessageFormat.format(caseMinimalPair, sample);
powerTable.addCell(withContext);
} else {
powerTable.addCell("n/a");
}
}
}
// finish the row
powerTable.finishRow();
}
}
}
info.put(
"Unit Power Components",
new TablePrinterWithHeader(
"<p>This table shows the square (power2) and cubic (power3) patterns, which may vary by case, gender, and plural forms. "
+ "Each gender is illustrated with a unit where possible, such as <i>(second)</i> or <i>(meter)</i>. "
+ "Each plural category is illustrated with a unit where possible, such as <i>(1)</i> or <i>(1.2)</i>. "
+ "The patterns are first supplied, and then combined with the samples and "
+ "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b> "
+ "in the next <b>"
+ FORMATTED_SAMPLE
+ "</b> column."
+ "</p>",
powerTable));
if (!info.isEmpty()) {
String name = ENGLISH.getName(locale);
new Subchart(name + ": Unit Grammar Info", locale, info).writeChart(anchors);
}
}
}
public void addTwoColumns(
TablePrinter caseTablePrinter,
String widthStringTarget,
Collection<Count> adjustedPlurals,
final PluralRules pluralRules,
boolean spanRows) {
for (Count plural : adjustedPlurals) {
Double sample = getBestSample(pluralRules, plural);
// final String pluralHeader = plural.toString() + " (" + sample + ")";
caseTablePrinter
.addColumn(
"Pattern for " + plural.toString(),
widthStringTarget,
null,
"class='target'",
true)
.setSpanRows(spanRows);
caseTablePrinter.addColumn(
"Case MP + pattern with " + sample,
widthStringTarget,
null,
"class='target'",
true);
}
}
static final Map<String, Pair<String, Double>> BEST_UNIT_CACHE = new HashMap<>();
public static String getBestBaseUnit(
UnitConverter uc, final String shortUnit, Output<Double> sizeInBaseUnits) {
Pair<String, Double> cached = BEST_UNIT_CACHE.get(shortUnit);
if (cached != null) {
sizeInBaseUnits.value = cached.getSecond();
return cached.getFirst();
}
if (shortUnit.equals("square-mile")) {
int debug = 0;
}
String unitCell =
ENGLISH.getStringValue(
"//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
+ uc.getLongId(shortUnit)
+ "\"]/displayName");
Output<String> baseUnit = new Output<>();
ConversionInfo info = uc.parseUnitId(shortUnit, baseUnit, false);
if (info != null) {
sizeInBaseUnits.value = info.factor.doubleValue();
Map<Rational, String> factorToUnit = BASE_TO_FACTOR_TO_UNIT.get(baseUnit.value);
if (factorToUnit == null) {
int debug = 0;
}
String bestUnit = null;
Rational bestFactor = null;
Rational inputBoundary = Rational.of(2).multiply(info.factor);
for (Entry<Rational, String> entry : factorToUnit.entrySet()) {
final String possibleUnit = entry.getValue();
if (possibleUnit.equals("cup-jp")) {
continue; // skip odd unit
}
final Rational currentFactor = entry.getKey();
if (bestUnit != null && currentFactor.compareTo(inputBoundary) >= 0) {
break;
}
bestFactor = currentFactor;
bestUnit = possibleUnit;
}
bestFactor = info.factor.divide(bestFactor); // scale for bestUnit
if (!bestFactor.equals(Rational.ONE) || !shortUnit.equals(bestUnit)) {
final String string = bestFactor.toString(FormatStyle.repeating);
final double bestDoubleFactor = bestFactor.doubleValue();
String pluralCategory = ENGLISH_PLURAL_RULES.select(bestDoubleFactor);
final String unitPath =
"//ldml/units/unitLength[@type=\"short\"]/unit[@type=\""
+ uc.getLongId(bestUnit)
+ "\"]/unitPattern[@count=\""
+ pluralCategory
+ "\"]";
String unitPattern = ENGLISH.getStringValue(unitPath);
if (unitPattern == null) {
final UnitId unitId = uc.createUnitId(bestUnit);
unitPattern =
unitId.toString(ENGLISH, "long", pluralCategory, null, null, false);
if (unitPattern == null) {
return null;
}
}
String unitMeasure =
MessageFormat.format(
unitPattern,
string.contains("/") ? "~" + bestDoubleFactor : string);
unitCell = shortUnit + "\n( = " + unitMeasure + ")";
}
} else {
sizeInBaseUnits.value = -1d;
}
BEST_UNIT_CACHE.put(shortUnit, Pair.of(unitCell, sizeInBaseUnits.value));
return unitCell;
}
private Double getBestSample(PluralRules pluralRules, Count plural) {
Collection<Double> samples = pluralRules.getSamples(plural.toString());
if (samples.isEmpty()) {
samples = pluralRules.getSamples(plural.toString(), SampleType.DECIMAL);
}
int size = samples.size();
switch (size) {
case 0:
throw new IllegalArgumentException("shouldn't happen");
case 1:
return samples.iterator().next();
}
return Iterables.skip(samples, 1).iterator().next();
}
private class Subchart extends Chart {
private final String title;
private final String file;
private final Map<String, TablePrinterWithHeader> tablePrinter;
@Override
public boolean getShowDate() {
return false;
}
public Subchart(String title, String file, Map<String, TablePrinterWithHeader> info) {
super();
this.title = title;
this.file = file;
this.tablePrinter = info;
}
@Override
public String getDirectory() {
return DIR;
}
@Override
public String getTitle() {
return title;
}
@Override
public String getFileName() {
return file;
}
@Override
public String getExplanation() {
return MAIN_HEADER
+ "<p><i>Unit Inflections, Phase 1:</i> The end goal is to add full case and gender support for formatted units. "
+ "During Phase 1, a limited number of locales and units of measurement are being handled in CLDR v38, "
+ "so that we can work kinks out of the process before expanding to all units for all locales.</p>\n"
+ "<p>This chart shows grammatical information available for certain unit and/or power patterns. These patterns are also illustrated with a <b>"
+ FORMATTED_SAMPLE
+ "</b> that combine the patterns with sample numbers and "
+ "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b>. "
+ "For example, “… für {0} …” is a <i>case minimal pair pattern</i> that requires the placeholder {0} to be in the accusative case in German. By inserting into a minimal pair pattern, "
+ "it is easier to ensure that the original unit and/or power patterns are correctly inflected. </p>\n"
+ "<p><b>Notes</b>"
+ "<ul><li>We don't have the cross-product of minimal pairs for both case and plural forms, "
+ "so the <i>case minimal pair pattern</i> might not be correct for the row’s plural category, especially in the nominative.</li>"
+ "<li>Translators often have difficulties with the the minimal pair patterns, "
+ "since they are <i>transcreations</i> not translations. The Hindi minimal pair patterns for case and gender have been discarded because they were incorrectly translated.</li>"
+ "<li>We don't expect translators to supply minimal pair patterns that are natural for any kind of placeholder: "
+ "for example, it is probably not typical to use the vocative with 3.2 meters! So look at the <b>"
+ FORMATTED_SAMPLE
+ "</b> as an aid for helping to see the context for grammatical inflections, but one that has limitations.</li></ul>";
}
@Override
public void writeContents(FormattedFileWriter pw) throws IOException {
try (PrintWriter tsv =
FileUtilities.openUTF8Writer(getDirectory() + "tsv/", file + ".tsv"); ) {
if (tablePrinter.size() > 1) {
pw.write("<h2>Table of Contents</h2>\n");
pw.append("<ol>\n");
for (String header : tablePrinter.keySet()) {
pw.write(writeTOC(header));
}
pw.append("</ol>\n");
}
String sep = "";
for (Entry<String, TablePrinterWithHeader> entry : tablePrinter.entrySet()) {
final String header = entry.getKey();
writeHeader(pw, header);
final TablePrinterWithHeader explanation = entry.getValue();
pw.write(explanation.header);
pw.write(explanation.tablePrinter.toTable());
tsv.write(sep + "# " + entry.getKey() + "\n");
explanation.tablePrinter.toTsv(tsv);
sep = "\n";
}
}
}
private void writeHeader(FormattedFileWriter pw, final String header) throws IOException {
pw.write(
"<h2><a name='"
+ FileUtilities.anchorize(header)
+ "'>"
+ header
+ "</a></h2>\n");
}
private String writeTOC(String header) {
return "<li><b>"
+ "<a href='#"
+ FileUtilities.anchorize(header)
+ "'>"
+ header
+ "</a>"
+ "</b></li>\n";
}
}
public static RuleBasedCollator RBC;
static {
Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*");
CLDRFile root = cldrFactory.make("root", false);
String rules =
root.getStringValue(
"//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr");
// if (!rules.contains("'#⃣'")) {
// rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288
// }
try {
RBC = new RuleBasedCollator(rules);
} catch (Exception e) {
throw new IllegalArgumentException(
"Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root",
e);
}
}
}