| package org.unicode.cldr.test; |
| |
| import java.util.Arrays; |
| import java.util.Calendar; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CldrUtility; |
| import org.unicode.cldr.util.Factory; |
| import org.unicode.cldr.util.PathHeader; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.SimpleXMLSource; |
| import org.unicode.cldr.util.XMLSource; |
| import org.unicode.cldr.util.XPathParts; |
| |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.ImmutableSet; |
| |
| public class CheckDisplayCollisions extends FactoryCheckCLDR { |
| private static final String DEBUG_PATH_PART = "-mass"; // example: "//ldml/dates/fields/field[@type=\"sun-narrow\"]/relative[@type=\"-1\"]"; |
| /** |
| * Set to true to get verbose logging of path removals |
| */ |
| private static final boolean LOG_PATH_REMOVALS = false; |
| |
| /** |
| * Set to true to prevent "Turkey" from being used for both 🇹🇷 -name and 🦃 -name. |
| * (Means clients need to use the "flag: Turkey" format.) |
| */ |
| private static final boolean CHECK_FLAG_AND_EMOJI = false; |
| |
| |
| // Get Date-Time in milliseconds |
| private static long getDateTimeinMillis(int year, int month, int date) { |
| Calendar cal = Calendar.getInstance(); |
| cal.set(year, month, date); |
| return cal.getTimeInMillis(); |
| } |
| |
| // TODO probably need to fix this to be more accurate over time |
| static long year = (long) (365.2425 * 86400 * 1000); // can be approximate |
| static long startDate = getDateTimeinMillis(1995, 1 - 1, 15); // can be approximate |
| static long endDate = getDateTimeinMillis(2011, 1 - 1, 15); // can be approximate |
| |
| /** |
| * An enum representing the types of xpaths that we don't want display collisions for. |
| */ |
| private static enum MatchType { |
| PREFIX, REGEX |
| } |
| |
| private static enum Type { |
| LANGUAGE("//ldml/localeDisplayNames/languages/language", MatchType.PREFIX), |
| SCRIPT("//ldml/localeDisplayNames/scripts/script", MatchType.PREFIX), |
| TERRITORY("//ldml/localeDisplayNames/(territories/territory|subdivisions/subdivision\\[@type=\"gb(eng|sct|wls)\")", MatchType.REGEX), |
| VARIANT("//ldml/localeDisplayNames/variants/variant", MatchType.PREFIX), |
| CURRENCY("//ldml/numbers/currencies/currency", MatchType.PREFIX), |
| ZONE("//ldml/dates/timeZoneNames/zone", MatchType.PREFIX), |
| METAZONE("//ldml/dates/timeZoneNames/metazone", MatchType.PREFIX), |
| DECIMAL_FORMAT("//ldml/numbers/decimalFormats", MatchType.PREFIX), |
| UNIT_PREFIX("//ldml/units/unitLength.*/unitPrefixPattern", MatchType.REGEX), |
| UNITS_COMPOUND_LONG("//ldml/units/unitLength[@type=\"long\"]/compoundUnit", MatchType.PREFIX), |
| UNITS_COMPOUND_SHORT("//ldml/units/unitLength[@type=\"short\"]/compoundUnit", MatchType.PREFIX), |
| UNITS_COORDINATE( "//ldml/units/unitLength\\[@type=\".*\"\\]/coordinateUnit/", MatchType.REGEX), |
| UNITS_IGNORE("//ldml/units/unitLength[@type=\"narrow\"]", MatchType.PREFIX), |
| UNITS("//ldml/units/unitLength.*/(displayName|unitPattern|perUnitPattern)", MatchType.REGEX), |
| FIELDS_NARROW("//ldml/dates/fields/field\\[@type=\"(sun|mon|tue|wed|thu|fri|sat)-narrow\"\\]/relative", MatchType.REGEX), |
| FIELDS_RELATIVE("//ldml/dates/fields/field\\[@type=\".*\"\\]/relative\\[@type=\"(-1|0|1)\"\\]", MatchType.REGEX), |
| ANNOTATIONS("//ldml/annotations/annotation\\[@cp=\".*\"\\]\\[@type=\"tts\"\\]", MatchType.REGEX), |
| CARDINAL_MINIMAL("//ldml/numbers/minimalPairs/pluralMinimalPairs", MatchType.PREFIX), |
| ORDINAL_MINIMAL("//ldml/numbers/minimalPairs/ordinalMinimalPairs", MatchType.PREFIX), |
| TYPOGRAPHIC_AXIS("//ldml/typographicNames/axisName", MatchType.PREFIX), |
| TYPOGRAPHIC_FEATURE("//ldml/typographicNames/featureName", MatchType.PREFIX), |
| TYPOGRAPHIC_STYLE("//ldml/typographicNames/styleName", MatchType.PREFIX), |
| ; |
| |
| private MatchType matchType; |
| private String basePrefix; |
| private Pattern basePattern; |
| |
| private Type(String basePrefix, MatchType matchType) { |
| this.matchType = matchType; |
| this.basePrefix = basePrefix; |
| this.basePattern = PatternCache.get("^" + basePrefix + ".*"); |
| } |
| |
| /** |
| * @return the prefix that all XPaths of this type should start with |
| */ |
| public String getPrefix() { |
| return basePrefix; |
| } |
| |
| /** |
| * @return the regex that matches all XPaths of this type |
| */ |
| public Pattern getPattern() { |
| return basePattern; |
| } |
| |
| /** |
| * @param path |
| * the path to find the type of |
| * @return the type of the path |
| */ |
| public static Type getType(String path) { |
| for (Type type : values()) { |
| if (type==Type.FIELDS_NARROW) continue; // skip FIELDS_NARROW so the corresponding paths are included in FIELDS_RELATIVE |
| if (type.matchType == MatchType.PREFIX) { |
| if (path.startsWith(type.getPrefix())) { |
| return type; |
| } |
| } else { |
| Matcher m = type.getPattern().matcher(path); |
| if (m.matches()) { |
| return type; |
| } |
| } |
| } |
| return null; |
| } |
| } |
| |
| static final boolean SKIP_TYPE_CHECK = true; |
| |
| private final Matcher exclusions = PatternCache.get("=\"narrow\"]").matcher(""); // no matches |
| private final Matcher typePattern = PatternCache.get("\\[@type=\"([^\"]*+)\"]").matcher(""); |
| private final Matcher ignoreAltAndCountAttributes = PatternCache.get("\\[@(?:count|alt|gender|case)=\"[^\"]*+\"]").matcher(""); |
| private final Matcher ignoreAltAttributes = PatternCache.get("\\[@(?:alt)=\"[^\"]*+\"]").matcher(""); |
| private final Matcher ignoreAltShortOrVariantAttributes = PatternCache.get("\\[@(?:alt)=\"(?:short|variant)\"]").matcher(""); |
| private final Matcher compoundUnitPatterns = PatternCache.get("compoundUnitPattern").matcher(""); |
| |
| // map unique path fragment to set of unique fragments for other |
| // paths with which it is OK to have a value collision |
| private static final Map<String, Set<String>> mapPathPartsToSetsForDupOK = createMapPathPartsToSets(); |
| |
| private static Map<String, Set<String>> createMapPathPartsToSets() { |
| Map<String, Set<String>> mapPathPartsToSets = new HashMap<>(); |
| |
| // Add OK collisions for /unit[@type=\"energy-calorie\"] |
| Set<String> set1 = new HashSet<>(); |
| set1.add("/unit[@type=\"energy-foodcalorie\"]"); |
| set1.add("/unit[@type=\"length-inch\"]"); // #11292 |
| mapPathPartsToSets.put("/unit[@type=\"energy-calorie\"]", set1); |
| |
| // Add OK collisions for /unit[@type=\"energy-foodcalorie\"] |
| Set<String> set2 = new HashSet<>(); |
| set2.add("/unit[@type=\"energy-calorie\"]"); |
| set2.add("/unit[@type=\"energy-kilocalorie\"]"); |
| set2.add("/unit[@type=\"length-inch\"]"); // #11292 |
| mapPathPartsToSets.put("/unit[@type=\"energy-foodcalorie\"]", set2); |
| |
| // Add OK collisions for /unit[@type=\"energy-kilocalorie\"] |
| Set<String> set3 = new HashSet<>(); |
| set3.add("/unit[@type=\"energy-foodcalorie\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"energy-kilocalorie\"]", set3); |
| |
| // Add OK collisions for /unit[@type=\"mass-carat\"] |
| Set<String> set4 = new HashSet<>(); |
| set4.add("/unit[@type=\"concentr-karat\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"mass-carat\"]", set4); |
| |
| // Add OK collisions for /unit[@type=\"concentr-karat\"] |
| Set<String> set5 = new HashSet<>(); |
| set5.add("/unit[@type=\"mass-carat\"]"); |
| set5.add("/unit[@type=\"temperature-kelvin\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"concentr-karat\"]", set5); |
| |
| // Add OK collisions for /unit[@type=\"digital-byte\"] |
| Set<String> set6 = new HashSet<>(); |
| set6.add("/unit[@type=\"mass-tonne\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"digital-byte\"]", set6); |
| |
| // Add OK collisions for /unit[@type=\"mass-tonne\"] |
| Set<String> set7 = new HashSet<>(); |
| set7.add("/unit[@type=\"digital-byte\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"mass-tonne\"]", set7); |
| |
| // delete the exceptions allowing acceleration-g-force and mass-gram to have the same symbol, see #7561 |
| |
| // Add OK collisions for /unit[@type=\"length-inch\"] |
| Set<String> set9 = new HashSet<>(); |
| set9.add("/unit[@type=\"energy-calorie\"]"); |
| set9.add("/unit[@type=\"energy-foodcalorie\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"length-inch\"]", set9); |
| |
| // Add OK collisions for /unit[@type=\"length-foot\"] |
| Set<String> set10 = new HashSet<>(); |
| set10.add("/unit[@type=\"angle-arc-minute\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"length-foot\"]", set10); |
| |
| // Add OK collisions for /unit[@type=\"angle-arc-minute\"] |
| Set<String> set11 = new HashSet<>(); |
| set11.add("/unit[@type=\"length-foot\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"angle-arc-minute\"]", set11); |
| |
| // Add OK collisions for /unit[@type=\"temperature-kelvin\"] |
| Set<String> set12 = new HashSet<>(); |
| set12.add("/unit[@type=\"concentr-karat\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"temperature-kelvin\"]", set12); |
| |
| // Add OK collisions for /unit[@type=\"temperature-generic\"] |
| Set<String> set13 = new HashSet<>(); |
| set13.add("/unit[@type=\"angle-degree\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"temperature-generic\"]", set13); |
| |
| // Add OK collisions for /unit[@type=\"angle-degree\"] |
| Set<String> set14 = new HashSet<>(); |
| set14.add("/unit[@type=\"temperature-generic\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"angle-degree\"]", set14); |
| |
| // Add OK collisions for /unit[@type=\"length-point\"] |
| Set<String> set15 = new HashSet<>(); |
| set15.add("/unit[@type=\"volume-pint\"]"); |
| set15.add("/unit[@type=\"mass-pound\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"length-point\"]", set15); |
| |
| // Add OK collisions for /unit[@type=\"volume-pint\"] |
| Set<String> set16 = new HashSet<>(); |
| set16.add("/unit[@type=\"length-point\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"volume-pint\"]", set16); |
| |
| // Add OK collisions for /unit[@type=\"pressure-hectopascal\"] |
| Set<String> set17 = new HashSet<>(); |
| set17.add("/unit[@type=\"pressure-millibar\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"pressure-hectopascal\"]", set17); |
| |
| // Add OK collisions for /unit[@type=\"pressure-millibar\"] |
| Set<String> set18 = new HashSet<>(); |
| set18.add("/unit[@type=\"pressure-hectopascal\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"pressure-millibar\"]", set18); |
| |
| // Add OK collisions for /unit[@type=\"mass-pound\"] |
| Set<String> set19 = new HashSet<>(); |
| set19.add("/unit[@type=\"length-point\"]"); |
| mapPathPartsToSets.put("/unit[@type=\"mass-pound\"]", set19); |
| |
| // Add OK collisions for /unit[@type=\"duration-century\"] |
| Set<String> set20 = new HashSet<>(); |
| set20.add("/unitLength[@type=\"short\"]/unit[@type=\"duration-second\"]"); |
| mapPathPartsToSets.put("/unitLength[@type=\"short\"]/unit[@type=\"duration-century\"]", set20); |
| // Add OK collisions for /unit[@type=\"duration-second\"] |
| Set<String> set21 = new HashSet<>(); |
| set21.add("/unitLength[@type=\"short\"]/unit[@type=\"duration-century\"]"); |
| mapPathPartsToSets.put("/unitLength[@type=\"short\"]/unit[@type=\"duration-second\"]", set21); |
| |
| // Add OK collisions for dot and pixel |
| addNonColliding(mapPathPartsToSets, "[@type=\"graphics-pixel\"]", "[@type=\"graphics-dot\"]"); |
| addNonColliding(mapPathPartsToSets, "[@type=\"graphics-pixel-per-inch\"]", "[@type=\"graphics-dot-per-inch\"]"); |
| addNonColliding(mapPathPartsToSets, "[@type=\"graphics-dot-per-centimeter\"]", "[@type=\"graphics-pixel-per-centimeter\"]"); |
| |
| addNonColliding(mapPathPartsToSets, "[@type=\"duration-year-person\"]", "[@type=\"duration-year\"]"); |
| addNonColliding(mapPathPartsToSets, "[@type=\"duration-month-person\"]", "[@type=\"duration-month\"]"); |
| addNonColliding(mapPathPartsToSets, "[@type=\"duration-week-person\"]", "[@type=\"duration-week\"]"); |
| addNonColliding(mapPathPartsToSets, "[@type=\"duration-day-person\"]", "[@type=\"duration-day\"]"); |
| |
| // all done, return immutable version |
| return ImmutableMap.copyOf(mapPathPartsToSets); |
| } |
| |
| // TODO Clean up the mapPathPartsToSets; clumsy to build and probably not speedy to use. |
| |
| public static void addNonColliding(Map<String, Set<String>> mapPathPartsToSets, String... alternatives) { |
| LinkedHashSet<String> items = new LinkedHashSet<>(Arrays.asList(alternatives)); |
| for (String item : items) { |
| LinkedHashSet<String> others = new LinkedHashSet<>(items); |
| others.remove(item); |
| mapPathPartsToSets.put(item, ImmutableSet.copyOf(others)); |
| } |
| } |
| |
| public CheckDisplayCollisions(Factory factory) { |
| super(factory); |
| } |
| |
| @Override |
| @SuppressWarnings("unused") |
| public CheckCLDR handleCheck(String path, String fullPath, String value, Options options, |
| List<CheckStatus> result) { |
| if (fullPath == null) { |
| return this; // skip paths that we don't have |
| } |
| |
| // get the paths with the same value. If there aren't duplicates, continue; |
| if (value == null || value.length() == 0) { |
| return this; |
| } |
| if (value.equals(CldrUtility.NO_INHERITANCE_MARKER) || value.equals(CldrUtility.INHERITANCE_MARKER)) { |
| return this; |
| } |
| |
| // find my type; bail if I don't have one. |
| Type myType = Type.getType(path); |
| if (myType == null || myType == Type.UNITS_IGNORE) { |
| return this; |
| } |
| String myPrefix = myType.getPrefix(); |
| |
| if (exclusions.reset(path).find() && myType != Type.UNITS_COORDINATE) { |
| return this; |
| } |
| |
| Matcher matcher = null; |
| String message = "Can't have same translation as {0}. Please change either this name or the other one. " |
| + "See <a target='doc' href='http://cldr.unicode.org/translation/short-names-and-keywords#TOC-Unique-Names'>Unique-Names</a>."; |
| Matcher currentAttributesToIgnore = ignoreAltAndCountAttributes; |
| Set<String> paths; |
| if (myType == Type.DECIMAL_FORMAT) { |
| if (!path.contains("[@count=") || "0".equals(value)) { |
| return this; |
| } |
| XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed(); // not frozen, for removeElement |
| String type = parts.getAttributeValue(-1, "type"); |
| myPrefix = parts.removeElement(-1).toString(); |
| matcher = PatternCache.get(myPrefix.replaceAll("\\[", "\\\\[") + |
| "/pattern\\[@type=(?!\"" + type + "\")\"\\d+\"].*").matcher(path); |
| currentAttributesToIgnore = ignoreAltAttributes; |
| message = "Can't have same number pattern as {0}"; |
| paths = getPathsWithValue(getResolvedCldrFileToCheck(), path, value, myType, myPrefix, matcher, currentAttributesToIgnore, Equivalence.exact); |
| } else if (myType == Type.UNITS || myType == Type.UNIT_PREFIX) { |
| currentAttributesToIgnore = ignoreAltAttributes; |
| paths = getPathsWithValue(getResolvedCldrFileToCheck(), path, value, myType, myPrefix, matcher, currentAttributesToIgnore, Equivalence.unit); |
| } else if (myType == Type.CARDINAL_MINIMAL || myType == Type.ORDINAL_MINIMAL) { |
| if (value.equals("{0}?")) { |
| return this; // special root 'other' value |
| } |
| currentAttributesToIgnore = ignoreAltAttributes; |
| paths = getPathsWithValue(getResolvedCldrFileToCheck(), path, value, myType, myPrefix, matcher, currentAttributesToIgnore, Equivalence.normal); |
| } else if (myType == Type.SCRIPT) { |
| currentAttributesToIgnore = ignoreAltShortOrVariantAttributes; // i.e. do NOT ignore alt="stand-alone" |
| paths = getPathsWithValue(getResolvedCldrFileToCheck(), path, value, myType, myPrefix, matcher, currentAttributesToIgnore, Equivalence.normal); |
| } else { |
| paths = getPathsWithValue(getResolvedCldrFileToCheck(), path, value, myType, myPrefix, matcher, currentAttributesToIgnore, Equivalence.normal); |
| } |
| |
| // Group exemplar cities and territories together for display collisions. |
| if (myType == Type.TERRITORY || myType == Type.ZONE) { |
| Type otherType = myType == Type.TERRITORY ? Type.ZONE : Type.TERRITORY; |
| Set<String> duplicatePaths = getPathsWithValue( |
| getResolvedCldrFileToCheck(), path, value, otherType, |
| otherType.getPrefix(), null, currentAttributesToIgnore, Equivalence.normal); |
| String exceptionRegion = getRegionException(getRegion(myType, path)); |
| if (exceptionRegion != null) { |
| for (String duplicatePath : duplicatePaths) { |
| String duplicateRegion = getRegion(otherType, duplicatePath); |
| if (exceptionRegion.equals(duplicateRegion)) { |
| duplicatePaths.remove(duplicatePath); |
| log("Removed duplicate path: '" + duplicatePath + "'"); |
| } |
| } |
| } |
| // account for collisions with England and the UK. Error message is a bit off for now. |
| // String subdivisionPath = nameToSubdivisionId.get(value); |
| // if (subdivisionPath != null) { |
| // paths.add(subdivisionPath); |
| // } |
| paths.addAll(duplicatePaths); |
| } else if (CHECK_FLAG_AND_EMOJI && myType == Type.ANNOTATIONS) { |
| // make sure that annotations don't have same value as regions, eg “日本” for 🇯🇵 & 🗾 |
| // NOTE: this is an asymmetric test; we presume the name of the region is ok. |
| Set<String> duplicatePaths = getPathsWithValue( |
| getResolvedCldrFileToCheck(), path, value, Type.TERRITORY, |
| Type.TERRITORY.getPrefix(), null, currentAttributesToIgnore, Equivalence.normal); |
| if (!duplicatePaths.isEmpty()) { |
| paths.addAll(duplicatePaths); |
| } |
| } |
| |
| if (paths.isEmpty()) { |
| // System.out.println("Paths is empty"); |
| // log("Paths is empty"); |
| return this; |
| } |
| |
| // Collisions between display names and symbols for the same currency are allowed. |
| if (myType == Type.CURRENCY) { |
| if (path.contains("/decimal") || path.contains("/group")) { |
| return this; |
| } |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| String currency = parts.getAttributeValue(-2, "type"); |
| Iterator<String> iterator = paths.iterator(); |
| while (iterator.hasNext()) { |
| String curVal = iterator.next(); |
| parts = XPathParts.getFrozenInstance(curVal); |
| if (currency.equals(parts.getAttributeValue(-2, "type")) || |
| curVal.contains("/decimal") || curVal.contains("/group")) { |
| iterator.remove(); |
| log("Removed '" + curVal + "': COLLISON WITH CURRENCY " + currency); |
| } |
| } |
| } |
| |
| // Collisions between different lengths and counts of the same unit are allowed |
| // Collisions between 'narrow' forms are allowed (the current is filtered by UNITS_IGNORE) |
| //ldml/units/unitLength[@type="narrow"]/unit[@type="duration-day-future"]/unitPattern[@count="one"] |
| if (myType == Type.UNITS || myType == Type.UNIT_PREFIX) { |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| int typeLocation = 3; |
| String myUnit = parts.getAttributeValue(typeLocation, "type"); |
| boolean isDuration = myUnit.startsWith("duration"); |
| Iterator<String> iterator = paths.iterator(); |
| while (iterator.hasNext()) { |
| String curVal = iterator.next(); |
| parts = XPathParts.getFrozenInstance(curVal); |
| String unit = parts.getAttributeValue(typeLocation, "type"); |
| // we also break the units into two groups: durations and others. Also never collide with a compoundUnitPattern. |
| if (unit == null || myUnit.equals(unit) || isDuration != unit.startsWith("duration") || compoundUnitPatterns.reset(curVal).find()) { |
| iterator.remove(); |
| log("Removed '" + curVal + "': COLLISON WITH UNIT " + unit); |
| } else { |
| // Remove allowed collisions, such as between carats and karats (same in many languages), or |
| // between foodcalories and either calories or kilocalories, or |
| // between hectopascal and millibar (physically the same unit, see #10425) |
| for (Map.Entry<String, Set<String>> mapPathPartToSet : mapPathPartsToSetsForDupOK.entrySet()) { |
| if (path.contains(mapPathPartToSet.getKey())) { |
| for (String pathPart : mapPathPartToSet.getValue()) { |
| if (curVal.contains(pathPart)) { |
| iterator.remove(); |
| log("Removed '" + curVal + "': COLLISON WITH UNIT " + unit); |
| break; |
| } |
| } |
| break; |
| } |
| } |
| } |
| } |
| } |
| // Collisions between different lengths and counts of the same field are allowed |
| if (myType == Type.FIELDS_RELATIVE) { |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| String myFieldType = parts.getAttributeValue(3, "type").split("-")[0]; |
| Iterator<String> iterator = paths.iterator(); |
| while (iterator.hasNext()) { |
| String curVal = iterator.next(); |
| parts = XPathParts.getFrozenInstance(curVal); |
| String fieldType = parts.getAttributeValue(3, "type").split("-")[0]; |
| if (myFieldType.equals(fieldType)) { |
| iterator.remove(); |
| log("Removed '" + curVal + "': COLLISON WITH FIELD " + fieldType); |
| } |
| } |
| } |
| // Collisions between different lengths of the same field are allowed |
| if (myType == Type.UNITS_COORDINATE) { |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| String myFieldType = (parts.containsElement("displayName"))? "displayName": parts.findAttributeValue("coordinateUnitPattern", "type"); |
| Iterator<String> iterator = paths.iterator(); |
| while (iterator.hasNext()) { |
| String curVal = iterator.next(); |
| parts = XPathParts.getFrozenInstance(curVal); |
| String fieldType = (parts.containsElement("displayName"))? "displayName": parts.findAttributeValue("coordinateUnitPattern", "type"); |
| if (myFieldType.equals(fieldType)) { |
| iterator.remove(); |
| log("Removed '" + curVal + "': COLLISON WITH FIELD " + fieldType); |
| } |
| } |
| } |
| |
| // removeMatches(myType); |
| // check again on size |
| if (paths.isEmpty()) { |
| return this; |
| } |
| |
| // ok, we probably have a collision! Extract the types |
| Set<String> collidingTypes = new TreeSet<>(); |
| |
| if (SKIP_TYPE_CHECK) { |
| for (String pathName : paths) { |
| currentAttributesToIgnore.reset(pathName); |
| collidingTypes.add(getPathReferenceForMessage(pathName, false)); |
| } |
| } else { |
| for (String dpath : paths) { |
| if (!typePattern.reset(dpath).find()) { |
| throw new IllegalArgumentException("Internal error: " + dpath + " doesn't match " |
| + typePattern.pattern()); |
| } |
| collidingTypes.add(typePattern.group(1)); |
| } |
| |
| // remove my type, and check again |
| if (!typePattern.reset(path).find()) { |
| throw new IllegalArgumentException("Internal error: " + path + " doesn't match " |
| + typePattern.pattern()); |
| } else { |
| collidingTypes.remove(typePattern.group(1)); |
| } |
| |
| // check one last time... |
| if (collidingTypes.isEmpty()) { |
| log("CollidingTypes is empty"); |
| return this; |
| } |
| } |
| |
| log("CollidingTypes has a size of " + collidingTypes.size()); |
| CheckStatus.Type thisErrorType; |
| // Also only do warnings during the build phase, so that SmokeTest will build. |
| if (getPhase() == Phase.BUILD) { |
| thisErrorType = CheckStatus.warningType; |
| } else { |
| thisErrorType = CheckStatus.errorType; |
| } |
| |
| // Check to see if we're colliding between standard and generic within the same metazone. |
| // If so, then it should be a warning instead of an error, since such collisions are acceptable |
| // as long as the context ( generic/recurring vs. specific time ) is known. |
| // ( JCE: 8/7/2012 ) |
| |
| // When long/short standard names for Etc/UTC is added to locale's <zone> items, |
| // a collision between exemplarCity and short-standard format was detected. |
| // |
| // CLDR tool code automatically generate exemplarCity value from zone ID algorithmically, |
| // but it should not be used for zones not associated with a location. For Etc/UTC, |
| // exemplarCity should be undefined. The value is generated by TimeZoneFormatter#getFallbackName(). |
| // There are many calling sites in CLDR tool code, and it looks all of them expect non-null |
| // value is returned. So, at this point, it's dangerous to touch the code to return |
| // null, or throw an exception. |
| // |
| // In addition to above, collisions between exemplarCity and other zone display name |
| // values should be accepted, because exmemplarCity is always formatted with <regionFormat> |
| // pattern. However, the collision issue only occurs for the special case - Etc/UTC, |
| // we handle the specific case as exception. We may revisit this issue later. |
| // (Yoshito 2017-01-27) |
| |
| if (path.contains("timeZoneNames") && collidingTypes.size() == 1) { |
| PathHeader pathHeader = getPathHeaderFactory().fromPath(path); |
| String thisZone = pathHeader.getHeader(); |
| String thisZoneType = pathHeader.getCode(); |
| String collisionString = collidingTypes.toString(); |
| int csStart, csEnd; |
| if (collisionString.startsWith("[<a")) { |
| csStart = collisionString.indexOf('>') + 1; |
| csEnd = collisionString.indexOf('<', csStart); |
| } else { |
| csStart = collisionString.indexOf('[') + 1; |
| csEnd = collisionString.indexOf(']', csStart); |
| } |
| collisionString = collisionString.substring(csStart, csEnd); |
| int delimiter_index = collisionString.indexOf(':'); |
| String collidingZone = collisionString.substring(0, delimiter_index); |
| String collidingZoneType = collisionString.substring(delimiter_index + 2); |
| if (thisZone.equals(collidingZone)) { |
| if (thisZone.startsWith("Etc/") |
| && (thisZoneType.equals("exemplarCity") || collidingZoneType.equals("exemplarCity"))) { |
| log("Ignore a collision between exemplarCity and another name for Etc/* zones"); |
| return this; |
| } |
| Set<String> collidingZoneTypes = new TreeSet<>(); |
| collidingZoneTypes.add(thisZoneType); |
| collidingZoneTypes.add(collidingZoneType); |
| if (collidingZoneTypes.size() == 2 && |
| collidingZoneTypes.contains("standard-short") && |
| collidingZoneTypes.contains("generic-short")) { |
| thisErrorType = CheckStatus.warningType; |
| } |
| } |
| } else if (myType == Type.SCRIPT && collidingTypes.size() == 1) { |
| String collisionString = collidingTypes.toString(); |
| if (path.contains("stand-alone") || collisionString.contains("stand-alone")) { |
| thisErrorType = CheckStatus.warningType; |
| } |
| } |
| CheckStatus item = new CheckStatus().setCause(this) |
| .setMainType(thisErrorType) |
| .setSubtype(Subtype.displayCollision) |
| .setCheckOnSubmit(false) |
| .setMessage(message, new Object[] { collidingTypes.toString() }); |
| result.add(item); |
| return this; |
| } |
| |
| /* |
| * Log a message |
| */ |
| private void log(String string) { |
| if (LOG_PATH_REMOVALS) { |
| System.out.println(string); |
| } |
| } |
| |
| enum Equivalence { |
| normal, exact, unit |
| } |
| |
| private Set<String> getPathsWithValue(CLDRFile file, String path, |
| String value, Type myType, |
| String myPrefix, Matcher matcher, |
| Matcher currentAttributesToIgnore, |
| Equivalence equivalence) { |
| |
| if (DEBUG_PATH_PART != null & path.contains(DEBUG_PATH_PART)) { |
| int debug = 0; |
| } |
| |
| Set<String> retrievedPaths = new HashSet<>(); |
| if (myType.matchType == MatchType.PREFIX) { |
| file.getPathsWithValue(value, myPrefix, matcher, retrievedPaths); |
| } else { |
| file.getPathsWithValue(value, "//ldml", myType.getPattern().matcher(""), retrievedPaths); |
| } |
| |
| String normValue = null; |
| if (equivalence == Equivalence.unit) { |
| normValue = SimpleXMLSource.normalizeCaseSensitive(value); |
| // System.out.println("DEBUG:\t" + "units"); |
| // for (String s : retrievedPaths) { |
| // System.out.println("DEBUG:\t" + file.getStringValue(s) + "\t" + s); |
| // } |
| } |
| |
| // Do first cleanup |
| // remove paths with "alt/count" per currentAttributesToIgnore; they can be duplicates |
| Set<String> paths = new HashSet<>(); |
| for (String pathName : retrievedPaths) { |
| Type thisPathType = Type.getType(pathName); |
| // If the colliding path is of a different type than the original, |
| // then it can't be a collision we care about. |
| if (myType != thisPathType) { |
| continue; |
| } |
| if (exclusions.reset(pathName).find() && thisPathType != Type.UNITS_COORDINATE) { |
| continue; |
| } |
| // we only care about winning paths |
| if (!getResolvedCldrFileToCheck().isWinningPath(path)) { |
| continue; |
| } |
| // special cases: don't look at CODE_FALLBACK |
| if (myType == Type.CURRENCY && isCodeFallback(path)) { |
| continue; |
| } |
| if (equivalence == Equivalence.exact) { |
| String otherValue = file.getWinningValue(pathName); |
| if (!otherValue.equals(value)) { |
| continue; |
| } |
| } else if (equivalence == Equivalence.unit) { |
| String otherValue = SimpleXMLSource.normalizeCaseSensitive(file.getWinningValue(pathName)); |
| if (!otherValue.equals(normValue)) { |
| continue; |
| } |
| } |
| // clean up the pat |
| String newPath = currentAttributesToIgnore.reset(pathName).replaceAll(""); |
| paths.add(newPath); |
| } |
| // System.out.println("Paths has a size of:"+paths.size()); |
| String cleanPath = currentAttributesToIgnore.reset(path).replaceAll(""); |
| paths.remove(cleanPath); |
| // System.out.println("Removed path: '"+cleanPath+"'"); |
| //System.out.println("Paths returned has a size of "+paths.size()); |
| return paths; |
| } |
| |
| private boolean isCodeFallback(String dpath) { |
| String locale = getResolvedCldrFileToCheck().getSourceLocaleID(dpath, null); |
| return locale.equals(XMLSource.CODE_FALLBACK_ID); |
| } |
| |
| // private Map<String,String> nameToSubdivisionId = Collections.emptyMap(); |
| |
| @Override |
| public CheckCLDR setCldrFileToCheck(CLDRFile cldrFileToCheck, Options options, |
| List<CheckStatus> possibleErrors) { |
| if (cldrFileToCheck == null) return this; |
| super.setCldrFileToCheck(cldrFileToCheck, options, possibleErrors); |
| // pick up the 3 subdivisions |
| // nameToSubdivisionId = EmojiSubdivisionNames.getNameToSubdivisionPath(cldrFileToCheck.getLocaleID()); |
| return this; |
| } |
| |
| /** |
| * @param type |
| * the type of the xpath |
| * @param xpath |
| * @return the region code of the xpath |
| */ |
| private String getRegion(Type type, String xpath) { |
| int index = type == Type.ZONE ? -2 : -1; |
| return XPathParts.getFrozenInstance(xpath).getAttributeValue(index, "type"); |
| } |
| |
| /** |
| * Map with the exceptions |
| */ |
| private Map<String, String> exceptions; |
| |
| /** |
| * Checks if the specified region code has any exceptions to the requirement |
| * that all exemplar cities and territory names have to be unique. |
| * |
| * @param regionCode |
| * the region code to be checked |
| * @return the corresponding region code that can have a value identical to |
| * the specified region code |
| */ |
| public String getRegionException(String regionCode) { |
| if (exceptions != null) { |
| String lookup = exceptions.get(regionCode); |
| return lookup; |
| } |
| |
| CLDRFile english = getEnglishFile(); |
| // Pick up all instances in English where the exemplarCity and territory match |
| // and include them as exceptions. |
| exceptions = new HashMap<>(); |
| for (Iterator<String> it = english.iterator(Type.ZONE.getPrefix()); it.hasNext();) { |
| String xpath = it.next(); |
| if (!xpath.endsWith("/exemplarCity")) continue; |
| String value = english.getStringValue(xpath); |
| Set<String> duplicates = getPathsWithValue(english, xpath, value, |
| Type.TERRITORY, Type.TERRITORY.getPrefix(), null, ignoreAltAndCountAttributes, Equivalence.normal); |
| if (duplicates.size() > 0) { |
| // Assume only 1 duplicate. |
| String duplicatePath = duplicates.iterator().next(); |
| String exemplarCity = getRegion(Type.ZONE, xpath); |
| String territory = getRegion(Type.TERRITORY, duplicatePath); |
| addRegionException(exemplarCity, territory); |
| } |
| } |
| |
| // Add hardcoded exceptions |
| addRegionException("America/Antigua", "AG"); // Antigua and Barbados |
| addRegionException("Atlantic/Canary", "IC"); // Canary Islands |
| addRegionException("America/Cayman", "KY"); // Cayman Islands |
| addRegionException("Indian/Christmas", "CX"); // Christmas Islands |
| addRegionException("Indian/Cocos", "CC"); // Cocos [Keeling] Islands |
| addRegionException("Indian/Comoro", "KM"); // Comoros Islands, Eastern Africa |
| addRegionException("Atlantic/Faeroe", "FO"); // Faroe Islands |
| addRegionException("Pacific/Pitcairn", "PN"); // Pitcairn Islands |
| addRegionException("Atlantic/St_Helena", "SH"); // Saint Helena |
| addRegionException("America/St_Kitts", "KN"); // Saint Kitts and Nevis |
| addRegionException("America/St_Lucia", "LC"); // Saint Lucia |
| addRegionException("Europe/Vatican", "VA"); // Vatican City |
| addRegionException("Pacific/Norfolk", "NF"); // Norfolk Island |
| // Some languages don't distinguish between the following city/territory |
| // pairs because the city is in the territory and sounds too similar. |
| addRegionException("Africa/Algiers", "DZ"); // Algeria |
| addRegionException("Africa/Tunis", "TN"); // Tunisia |
| return exceptions.get(regionCode); |
| } |
| |
| /** |
| * Adds an exemplarCity/territory pair to the list of region exceptions. |
| */ |
| private void addRegionException(String exemplarCity, String territory) { |
| exceptions.put(exemplarCity, territory); |
| exceptions.put(territory, exemplarCity); |
| } |
| } |