blob: 6bc130884016945df145c2f4b56ae42f73435760 [file] [log] [blame]
package org.unicode.cldr.test;
import static java.util.Collections.disjoint;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.VersionInfo;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import org.unicode.cldr.tool.ToolConfig;
import org.unicode.cldr.util.Builder;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility.VariableReplacer;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.PathHeader;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.RegexLookup.Finder;
import org.unicode.cldr.util.RegexLookup.RegexFinder;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.ApprovalRequirementMatcher;
import org.unicode.cldr.util.SupplementalDataInfo.CoverageLevelInfo;
import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
import org.unicode.cldr.util.XMLFileReader;
import org.unicode.cldr.util.XPathParts;
public class CoverageLevel2 {
// To modify the results, see /cldr/common/supplemental/coverageLevels.xml
/** Enable to get more verbose output when debugging */
private static final boolean DEBUG_LOOKUP = false;
private RegexLookup<Level> lookup = null;
enum SetMatchType {
Target_Language,
Target_Scripts,
Target_Territories,
Target_TimeZones,
Target_Currencies,
Target_Plurals,
Calendar_List
}
private static class LocaleSpecificInfo {
CoverageVariableInfo cvi;
String targetLanguage;
}
final LocaleSpecificInfo myInfo = new LocaleSpecificInfo();
/**
* We define a regex finder for use in the lookup. It has extra tests based on the ci value and
* the cvi value, duplicating what was in SupplementalDataInfo. It uses the sets instead of
* converting to regex strings.
*
* @author markdavis
*/
public static class MyRegexFinder extends RegexFinder {
private final SetMatchType additionalMatch;
private final CoverageLevelInfo ci;
public MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci) {
super(pattern);
// remove the ${ and the }, and change - to _.
this.additionalMatch =
additionalMatch == null
? null
: SetMatchType.valueOf(
additionalMatch
.substring(2, additionalMatch.length() - 1)
.replace('-', '_'));
this.ci = ci;
}
@Override
public boolean find(String item, Object context, Info info) {
LocaleSpecificInfo localeSpecificInfo = (LocaleSpecificInfo) context;
// Modified the logic to handle the case where we want specific languages and specific
// territories.
// Any match in language script or territory will succeed when multiple items are
// present.
boolean lstOK = false;
if (ci.inLanguage == null && ci.inScriptSet == null && ci.inTerritorySet == null) {
lstOK = true;
} else if (ci.inLanguage != null
&& ci.inLanguage.matcher(localeSpecificInfo.targetLanguage).matches()) {
lstOK = true;
} else if (ci.inScriptSet != null
&& !disjoint(ci.inScriptSet, localeSpecificInfo.cvi.targetScripts)) {
lstOK = true;
} else if (ci.inTerritorySet != null
&& !disjoint(ci.inTerritorySet, localeSpecificInfo.cvi.targetTerritories)) {
lstOK = true;
}
if (!lstOK) {
return false;
}
boolean result = super.find(item, context, info); // also sets matcher in RegexFinder
if (!result) {
return false;
}
if (additionalMatch != null) {
String groupMatch = info.value[1];
// String groupMatch = matcher.group(1);
// we match on a group, so get the right one
switch (additionalMatch) {
case Target_Language:
return localeSpecificInfo.targetLanguage.equals(groupMatch);
case Target_Scripts:
return localeSpecificInfo.cvi.targetScripts.contains(groupMatch);
case Target_Territories:
return localeSpecificInfo.cvi.targetTerritories.contains(groupMatch);
case Target_TimeZones:
return localeSpecificInfo.cvi.targetTimeZones.contains(groupMatch);
case Target_Currencies:
return localeSpecificInfo.cvi.targetCurrencies.contains(groupMatch);
// For Target_Plurals, we have to account for the fact that the @count= part
// might not be in the
// xpath, so we shouldn't reject the match because of that. ( i.e. The regex
// is usually
// ([@count='${Target-Plurals}'])?
case Target_Plurals:
return (groupMatch == null
|| groupMatch.length() == 0
|| localeSpecificInfo.cvi.targetPlurals.contains(groupMatch));
case Calendar_List:
return localeSpecificInfo.cvi.calendars.contains(groupMatch);
}
}
return true;
}
@Override
public boolean equals(Object obj) {
return false;
}
}
private CoverageLevel2(SupplementalDataInfo sdi, String locale) {
myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage();
myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
lookup = sdi.getCoverageLookup();
}
private CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile) {
myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage();
myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
RawCoverageFile rcf = new RawCoverageFile();
lookup = rcf.load(ruleFile);
}
/**
* get an instance, using CldrUtility.SUPPLEMENTAL_DIRECTORY
*
* @param locale
* @return
* @deprecated Don't use this. call the version which takes a SupplementalDataInfo as an
* argument.
* @see #getInstance(SupplementalDataInfo, String)
* @see CLDRPaths#SUPPLEMENTAL_DIRECTORY
*/
@Deprecated
public static CoverageLevel2 getInstance(String locale) {
return new CoverageLevel2(SupplementalDataInfo.getInstance(), locale);
}
public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale) {
return new CoverageLevel2(sdi, locale);
}
public static CoverageLevel2 getInstance(
SupplementalDataInfo sdi, String locale, String ruleFile) {
return new CoverageLevel2(sdi, locale, ruleFile);
}
public Level getLevel(String path) {
if (path == null) {
return Level.UNDETERMINED;
}
synchronized (
lookup) { // synchronize on the class, since the Matchers are changed during the
// matching process
Level result;
if (DEBUG_LOOKUP) { // for testing
Output<String[]> checkItems = new Output<>();
Output<Finder> matcherFound = new Output<>();
List<String> failures = new ArrayList<>();
result = lookup.get(path, myInfo, checkItems, matcherFound, failures);
for (String s : failures) {
System.out.println(s);
}
} else {
result = lookup.get(path, myInfo, null);
}
return result == null ? Level.COMPREHENSIVE : result;
}
}
public int getIntLevel(String path) {
return getLevel(path).getLevel();
}
// Moved code in from SupplementalInfo
//
// TODO:
// 1. drop the corresponding code in SupplementalInfo.
// 2. change SupplementalInfo to skip reading coverageLevels.xml
// 3. change the default creation of CoverageLevels2 to instead use this code with that file.
// Later
// 4. Generalize the RawCoverageFile code, and use with other supplemental files.
// That way supplemental files can be read as needed instead of all at once.
private final List<String> approvalRequirements = new LinkedList<>(); // xpath array
private VariableReplacer coverageVariables = new VariableReplacer();
private SortedSet<CoverageLevelInfo> coverageLevels = new TreeSet<>();
public class RawCoverageFile {
private VersionInfo cldrVersion;
class MyHandler extends XMLFileReader.SimpleHandler {
@Override
public void handlePathValue(String path, String pathValue) {
XPathParts parts = XPathParts.getFrozenInstance(path);
String level1 = parts.size() < 2 ? null : parts.getElement(1);
if (level1.equals("version")) {
if (cldrVersion == null) {
String version = parts.getAttributeValue(1, "cldrVersion");
if (version == null) {
version = parts.getAttributeValue(0, "version");
}
cldrVersion = VersionInfo.getInstance(version);
}
} else if (parts.containsElement("approvalRequirement")) {
approvalRequirements.add(parts.toString());
} else if (parts.containsElement("coverageLevel")) {
String match =
parts.containsAttribute("match")
? coverageVariables.replace(
parts.getAttributeValue(-1, "match"))
: null;
String valueStr = parts.getAttributeValue(-1, "value");
// Ticket 7125: map the number to English. So switch from English to number for
// construction
valueStr = Integer.toString(Level.get(valueStr).getLevel());
String inLanguage =
parts.containsAttribute("inLanguage")
? coverageVariables.replace(
parts.getAttributeValue(-1, "inLanguage"))
: null;
String inScript =
parts.containsAttribute("inScript")
? coverageVariables.replace(
parts.getAttributeValue(-1, "inScript"))
: null;
String inTerritory =
parts.containsAttribute("inTerritory")
? coverageVariables.replace(
parts.getAttributeValue(-1, "inTerritory"))
: null;
Integer value =
(valueStr != null) ? Integer.valueOf(valueStr) : Integer.valueOf("101");
if (cldrVersion.getMajor() < 2) {
value = 40;
}
CoverageLevelInfo ci =
new CoverageLevelInfo(match, value, inLanguage, inScript, inTerritory);
coverageLevels.add(ci);
} else if (parts.containsElement("coverageVariable")) {
String key = parts.getAttributeValue(-1, "key");
String value = parts.getAttributeValue(-1, "value");
coverageVariables.add(key, value);
}
}
public void cleanup() {
CLDRConfig testInfo = ToolConfig.getToolInstance();
SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
CoverageLevelInfo.fixEU(coverageLevels, supplementalDataInfo2);
coverageLevels = Collections.unmodifiableSortedSet(coverageLevels);
}
}
public RegexLookup<Level> makeCoverageLookup() {
RegexLookup<Level> lookup =
new RegexLookup<>(RegexLookup.LookupType.STAR_PATTERN_LOOKUP);
Matcher variable = PatternCache.get("\\$\\{[A-Za-z][\\-A-Za-z]*\\}").matcher("");
for (CoverageLevelInfo ci : coverageLevels) {
String pattern =
ci.match
.replace('\'', '"')
.replace("[@", "\\[@") // make sure that attributes are quoted
.replace("(", "(?:") // make sure that there are no capturing groups
// (beyond what we generate
.replace("(?:?!", "(?!"); // Allow negative lookahead
pattern = "^//ldml/" + pattern + "$"; // for now, force a complete match
String variableType = null;
variable.reset(pattern);
if (variable.find()) {
pattern =
pattern.substring(0, variable.start())
+ "([^\"]*)"
+ pattern.substring(variable.end());
variableType = variable.group();
if (variable.find()) {
throw new IllegalArgumentException(
"We can only handle a single variable on a line");
}
}
// .replaceAll("\\]","\\\\]");
lookup.add(new CoverageLevel2.MyRegexFinder(pattern, variableType, ci), ci.value);
}
return lookup;
}
public RegexLookup<Level> load(String file) {
MyHandler myHandler = new MyHandler();
XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
xfr.read(file, -1, true);
myHandler.cleanup();
return makeCoverageLookup();
}
}
// run these from first to last to get the approval info.
volatile List<ApprovalRequirementMatcher> approvalMatchers = null;
/**
* Get the preliminary number of required votes based on the given locale and PathHeader
*
* <p>Important: this number may not agree with VoteResolver.getRequiredVotes since VoteResolver
* also takes the baseline status into account.
*
* <p>Called by VoteResolver, ShowStarredCoverage, TestCoverage, and TestCoverageLevel.
*
* @param loc the CLDRLocale
* @param ph the PathHeader - which path this is applied to, or null if unknown.
* @return a number such as 4 or 8
*/
public int getRequiredVotes(CLDRLocale loc, PathHeader ph) {
if (approvalMatchers == null) {
approvalMatchers = ApprovalRequirementMatcher.buildAll(approvalRequirements);
}
for (ApprovalRequirementMatcher m : approvalMatchers) {
if (m.matches(loc, ph)) {
return m.getRequiredVotes();
}
}
throw new RuntimeException(
"Error: " + loc + " " + ph + " ran off the end of the approvalMatchers.");
}
// TODO: move to separate tool
public static void main(String[] args) {
// Quick test during development to compare old to new coverageLevels
checkCoverage("root");
checkCoverage("de");
}
private static void checkCoverage(String locale) {
final CLDRConfig testInfo = ToolConfig.getToolInstance();
final SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
CoverageLevel2 cvOld = CoverageLevel2.getInstance(supplementalDataInfo2, locale);
CoverageLevel2 cvNew =
CoverageLevel2.getInstance(
supplementalDataInfo2,
locale,
CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/coverageLevels2.xml");
CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true);
Set<String> paths = Builder.with(new TreeSet<String>()).addAll(cldrFile).get();
PathHeader.Factory phf = PathHeader.getFactory();
Map<PathHeader, String> diff = new TreeMap<>();
Map<PathHeader, String> same = new TreeMap<>();
for (String path : paths) {
Level levelOld = cvOld.getLevel(path);
Level levelNew = cvNew.getLevel(path);
if (levelOld != levelNew) {
diff.put(
phf.fromPath(path),
locale + "\t" + levelOld + "\t" + levelNew + "\t" + path);
} else if (levelOld.compareTo(Level.MODERATE) < 0) {
same.put(phf.fromPath(path), locale + "\t" + path);
}
}
System.out.println("\nLocale\tPath\tPathHeader");
for (Entry<PathHeader, String> line : same.entrySet()) {
System.out.println(line.getValue() + "\t" + line.getKey());
}
System.out.println("\nLocale\tOld\tNew\tPath\tPathHeader");
for (Entry<PathHeader, String> line : diff.entrySet()) {
System.out.println(line.getValue() + "\t" + line.getKey());
}
}
}