blob: 7f5868484755f9aeea9c90d3413f22dff260aa20 [file] [log] [blame]
package org.unicode.cldr.test;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
import org.unicode.cldr.util.ApproximateWidth;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.SupplementalDataInfo;
public class CheckWidths extends CheckCLDR {
// remember to add this class to the list in CheckCLDR.getCheckAll
// to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* -t.*CheckWidths.*
private static CoverageLevel2 coverageLevel;
private Level requiredLevel;
SupplementalDataInfo supplementalData;
private static final double EM = ApproximateWidth.getWidth("月");
private static final boolean DEBUG = true;
private enum Measure {
CODE_POINTS, DISPLAY_WIDTH
}
private enum LimitType {
MINIMUM, MAXIMUM
}
private enum Special {
NONE, QUOTES, PLACEHOLDERS, NUMBERSYMBOLS, NUMBERFORMAT
}
private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}");
private static class Limit {
final double warningReference;
final double errorReference;
final LimitType limit;
final Measure measure;
final Special special;
final String message;
final Subtype subtype;
final boolean debug;
public Limit(double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug) {
this.debug = debug;
this.warningReference = warningReference;
this.errorReference = errorReference;
this.limit = limit;
this.measure = measure;
this.special = special;
switch (limit) {
case MINIMUM:
this.message = measure == Measure.CODE_POINTS
? "Expected no fewer than {0} character(s), but was {1}."
: "Too narrow by about {2}% (with common fonts).";
this.subtype = Subtype.valueTooNarrow;
break;
case MAXIMUM:
this.message = measure == Measure.CODE_POINTS
? "Expected no more than {0} character(s), but was {1}."
: "Too wide by about {2}% (with common fonts).";
this.subtype = Subtype.valueTooWide;
break;
default:
throw new IllegalArgumentException();
}
}
public Limit(double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) {
this(d, e, displayWidth, maximum, placeholders, false);
}
boolean hasProblem(String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprenehsive) {
switch (special) {
case NUMBERFORMAT:
String [] values = value.split(";",2);
// If it's a number format with positive and negative subpatterns, just check the longer one.
value = (values.length == 2 && values[1].length() > values[0].length()) ? values[1] : values[0];
value = value.replace("'", "");
break;
case QUOTES:
value = value.replace("'", "");
break;
case PLACEHOLDERS:
value = PLACEHOLDER_PATTERN.matcher(value).replaceAll("");
break;
case NUMBERSYMBOLS:
value = value.replaceAll("[\u200E\u200F\u061C]", ""); // don't include LRM/RLM/ALM when checking length of number symbols
break;
default:
}
double valueMeasure = measure == Measure.CODE_POINTS ? value.codePointCount(0, value.length()) : ApproximateWidth.getWidth(value);
CheckStatus.Type errorType = CheckStatus.warningType;
switch (limit) {
case MINIMUM:
if (valueMeasure >= warningReference) {
return false;
}
if (valueMeasure < errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
errorType = CheckStatus.errorType;
}
break;
case MAXIMUM:
if (valueMeasure <= warningReference) {
return false;
}
if (valueMeasure > errorReference && cause.getPhase() != Phase.BUILD && !aliasedAndComprenehsive) {
errorType = CheckStatus.errorType;
}
break;
}
// the 115 is so that we don't show small percentages
// the /10 ...*10 is to round to multiples of 10% percent
double percent = (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) * 10;
result.add(new CheckStatus().setCause(cause)
.setMainType(errorType)
.setSubtype(subtype)
.setMessage(message, warningReference, valueMeasure, percent));
return true;
}
}
// WARNING: errors must occur before warnings!!
// we allow unusual units and English units to be a little longer
static final String ALLOW_LONGER = "(area-acre" +
"|area-square-foot" +
"|area-square-mile" +
"|length-foot" +
"|length-inch" +
"|length-mile" +
"|length-light-year" +
"|length-yard" +
"|mass-ounce" +
"|mass-pound" +
"|power-horsepower" +
"|pressure-inch-hg" +
"|pressure-millimeter-of-mercury" +
"|speed-mile-per-hour" +
"|temperature-fahrenheit" +
"|volume-cubic-mile" +
"|acceleration-g-force" +
"|speed-kilometer-per-hour" +
"|speed-meter-per-second" +
")";
static final String ALLOW_LONGEST = "consumption-liter-per-100kilometers";
static RegexLookup<Limit[]> lookup = new RegexLookup<Limit[]>()
.setPatternTransform(RegexLookup.RegexFinderTransformPath)
.addVariable("%A", "\"[^\"]+\"")
.addVariable("%P", "\"[ap]m\"")
.addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm
.add("//ldml/delimiters/(quotation|alternateQuotation)", new Limit[] {
new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE)
})
// Numeric items should be no more than a single character
.add("//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", new Limit[] {
new Limit(1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NUMBERSYMBOLS)
})
// Now widths
// The following are rough measures, just to check strange cases
.add("//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", new Limit[] {
new Limit(2 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
.add("//ldml/localeDisplayNames/localeDisplayPattern/", new Limit[] { // {0}: {1}, {0} ({1}), ,
new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
.add("//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", new Limit[] { // {0} and {1}
new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
.add("//ldml/dates/timeZoneNames/fallbackFormat", new Limit[] { // {1} ({0})
new Limit(2 * EM, 3 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
.add("//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", new Limit[] { // {0} Time,
// +HH:mm;-HH:mm
new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
.add("//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", new Limit[] { // GMT{0}, GMT
new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Era Abbreviations
// Allow longer for Japanese calendar eras
.add("//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
new Limit(12 * EM, 16 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// Allow longer for ROC calendar eras
.add("//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", new Limit[] {
new Limit(4 * EM, 8 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
.add("//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", new Limit[] {
new Limit(3 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// am/pm abbreviated
.add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", new Limit[] {
new Limit(4 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// other day periods abbreviated
.add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", new Limit[] {
new Limit(8 * EM, 12 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// am/pm wide
.add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", new Limit[] {
new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// other day periods wide
.add("//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", new Limit[] {
new Limit(10 * EM, 20 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// Narrow items
.add("//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", new Limit[] {
new Limit(1.5 * EM, 2.25 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NONE)
})
// \"(?!am|pm)[^\"]+\"\\
// Compact number formats
.add("//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1",
new Limit[] {
new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.NUMBERFORMAT)
})
// Catch -future/past Narrow units and allow much wider values
.add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"[^\"]+-(future|past)\"]/unitPattern", new Limit[] {
new Limit(10 * EM, 15 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Catch widest units and allow a bit wider
.add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGEST + "\"]/unitPattern", new Limit[] {
new Limit(5 * EM, 6 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Catch special units and allow a bit wider
.add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=\"" + ALLOW_LONGER + "\"]/unitPattern", new Limit[] {
new Limit(4 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Narrow units
.add("//ldml/units/unitLength[@type=\"narrow\"]/unit[@type=%A]/unitPattern", new Limit[] {
new Limit(3 * EM, 4 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Short units
.add("//ldml/units/unitLength[@type=\"short\"]/unit[@type=%A]/unitPattern", new Limit[] {
new Limit(5 * EM, 10 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
})
// Currency Symbols
.add("//ldml/numbers/currencies/currency[@type=%A]/symbol", new Limit[] {
new Limit(3 * EM, 5 * EM, Measure.DISPLAY_WIDTH, LimitType.MAXIMUM, Special.PLACEHOLDERS)
});
Set<Limit> found = new LinkedHashSet<Limit>();
@Override
public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, List<CheckStatus> result) {
if (value == null) {
return this; // skip
}
// String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]";
// if (path.startsWith(testPrefix)) {
// int i = 0;
// }
// Limits item0 =
// lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]");
// item0.check("123456789", result, this);
Limit[] items = lookup.get(path);
CLDRFile.Status status = new CLDRFile.Status();
this.getCldrFileToCheck().getSourceLocaleID(path, status);
// This was put in specifically to deal with the fact that we added a bunch of new units in CLDR 26
// and didn't put the narrow forms of them into modern coverage. If/when the narrow forms of all units
// are modern coverage, then we can safely remove the aliasedAndComprehensive check. Right now if an
// item is aliased and coverage is comprehensive, then it can't generate anything worse than a warning.
Boolean aliasedAndComprenehsive = (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0)
&& (status.pathWhereFound.compareTo(path) != 0);
if (items != null) {
for (Limit item : items) {
if (item.hasProblem(value, result, this, aliasedAndComprenehsive)) {
if (DEBUG && !found.contains(item)) {
found.add(item);
}
break; // only one error per item
}
}
}
return this;
}
public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options,
List<CheckStatus> possibleErrors) {
final String localeID = cldrFileToCheck.getLocaleID();
supplementalData = SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory());
coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID);
super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
return this;
}
}