blob: b64ad69a6d3c2a3c68d9547922c716c5db5c5900 [file] [log] [blame]
package org.unicode.cldr.unittest;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.Status;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.ChainedMap;
import org.unicode.cldr.util.ChainedMap.M3;
import org.unicode.cldr.util.ChainedMap.M4;
import org.unicode.cldr.util.ChainedMap.M5;
import org.unicode.cldr.util.DtdData;
import org.unicode.cldr.util.DtdData.Attribute;
import org.unicode.cldr.util.DtdData.Element;
import org.unicode.cldr.util.DtdData.ElementType;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.PathHeader;
import org.unicode.cldr.util.PathHeader.Factory;
import org.unicode.cldr.util.PathHeader.PageId;
import org.unicode.cldr.util.PathHeader.SectionId;
import org.unicode.cldr.util.PathStarrer;
import org.unicode.cldr.util.XMLFileReader;
import org.unicode.cldr.util.XPathParts;
import com.google.common.collect.ImmutableSet;
public class TestPaths extends TestFmwkPlus {
static CLDRConfig testInfo = CLDRConfig.getInstance();
public static void main(String[] args) {
new TestPaths().run(args);
}
public void VerifyEnglishVsRoot() {
HashSet<String> rootPaths = new HashSet<>();
testInfo.getRoot().forEach(rootPaths::add);
HashSet<String> englishPaths = new HashSet<>();
testInfo.getEnglish().forEach(englishPaths::add);
englishPaths.removeAll(rootPaths);
if (englishPaths.size() == 0) {
return;
}
Factory phf = PathHeader.getFactory(testInfo.getEnglish());
Status status = new Status();
Set<PathHeader> suspiciousPaths = new TreeSet<>();
Set<PathHeader> errorPaths = new TreeSet<>();
ImmutableSet<String> SKIP_VARIANT = ImmutableSet.of(
"ps-variant", "ug-variant", "ky-variant", "az-short",
"Arab-variant", "am-variant", "pm-variant");
for (String path : englishPaths) {
// skip aliases, other counts
if (!status.pathWhereFound.equals(path)
|| path.contains("[@count=\"one\"]")) {
continue;
}
PathHeader ph = phf.fromPath(path);
if (ph.getSectionId() == SectionId.Special
|| ph.getCode().endsWith("-name-other")) {
continue;
}
if (path.contains("@alt") && !SKIP_VARIANT.contains(ph.getCode())
&& ph.getPageId() != PageId.Alphabetic_Information) {
errorPaths.add(ph);
} else {
suspiciousPaths.add(ph);
}
}
if (errorPaths.size() != 0) {
errln("Error: paths in English but not root:"
+ getPaths(errorPaths));
}
logln("Suspicious: paths in English but not root:"
+ getPaths(suspiciousPaths));
}
private String getPaths(Set<PathHeader> altPaths) {
StringBuilder b = new StringBuilder();
for (PathHeader path : altPaths) {
b.append("\n\t\t")
.append(path)
.append(":\t")
.append(testInfo.getEnglish().getStringValue(
path.getOriginalPath()));
}
return b.toString();
}
/**
* For each locale to test, loop through all the paths, including "extra" paths,
* checking for each path: checkFullpathValue; checkPrettyPaths
*/
public void TestPathHeadersAndValues() {
/*
* Use the pathsSeen hash to keep track of which paths have
* already been seen. Since the test checkPrettyPaths isn't really
* locale-dependent, run it only once for each path, for the first
* locale in which the path occurs.
*/
Set<String> pathsSeen = new HashSet<>();
CLDRFile englishFile = testInfo.getCldrFactory().make("en", true);
PathHeader.Factory phf = PathHeader.getFactory(englishFile);
Status status = new Status();
for (String locale : getLocalesToTest()) {
CLDRFile file = testInfo.getCLDRFile(locale, true);
logln("Testing path headers and values for locale => " + locale);
final Collection<String> extraPaths = file.getExtraPaths();
for (Iterator<String> it = file.iterator(); it.hasNext();) {
String path = it.next();
if (extraPaths.contains(path)) {
continue;
}
checkFullpathValue(path, file, locale, status, false /* not extra path */);
if (!pathsSeen.contains(path)) {
pathsSeen.add(path);
checkPrettyPaths(path, phf);
}
}
for (String path : extraPaths) {
checkFullpathValue(path, file, locale, status, true /* extra path */);
if (!pathsSeen.contains(path)) {
pathsSeen.add(path);
checkPrettyPaths(path, phf);
}
}
}
}
/**
* For the given path and CLDRFile, check that fullPath, value, and source are all non-null.
*
* Allow null value for some exceptional extra paths.
*
* @param path the path, such as '//ldml/dates/fields/field[@type="tue"]/relative[@type="1"]'
* @param file the CLDRFile
* @param locale the locale string
* @param status the Status to be used/set by getSourceLocaleID
* @param isExtraPath true if the path is an "extra" path, else false
*/
private void checkFullpathValue(String path, CLDRFile file, String locale, Status status, boolean isExtraPath) {
String fullPath = file.getFullXPath(path);
String value = file.getStringValue(path);
String source = file.getSourceLocaleID(path, status);
assertEquals("CanonicalOrder", XPathParts.getFrozenInstance(path).toString(), path);
if (fullPath == null) {
errln("Locale: " + locale + ",\t Null FullPath: " + path);
} else if (!path.equals(fullPath)) {
assertEquals("CanonicalOrder (FP)", XPathParts.getFrozenInstance(fullPath).toString(), fullPath);
}
if (value == null) {
if (allowsExtraPath(path, isExtraPath)) {
return;
}
errln("Locale: " + locale + ",\t Value=null, \tPath: " + path + ",\t IsExtraPath: " + isExtraPath);
}
if (source == null) {
errln("Locale: " + locale + ",\t Source=null, \tPath: " + path);
}
if (status.pathWhereFound == null) {
errln("Locale: " + locale + ",\t Path=null, \tPath: " + path);
}
}
final ImmutableSet<String> ALLOWED_NULL = ImmutableSet.of(
"//ldml/dates/timeZoneNames/zone[@type=\"Australia/Currie\"]/exemplarCity",
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Enderbury\"]/exemplarCity"
);
/**
* Is the path allowed to have a null value?
*/
public boolean allowsExtraPath(String path, boolean isExtraPath) {
return (isExtraPath
&& extraPathAllowsNullValue(path))
|| ALLOWED_NULL.contains(path);
}
/**
* Is the given extra path exceptional in the sense that null value is allowed?
*
* @param path the extra path
* @return true if null value is allowed for path, else false
*
* As of 2019-08-09, null values are found for many "metazone" paths like:
* //ldml/dates/timeZoneNames/metazone[@type="Galapagos"]/long/standard
* for many locales. Also for some "zone" paths like:
* //ldml/dates/timeZoneNames/zone[@type="Pacific/Honolulu"]/short/generic
* for locales including root, ja, and ar. Also for some "dayPeriods" paths like
* //ldml/dates/calendars/calendar[@type="gregorian"]/dayPeriods/dayPeriodContext[@type="stand-alone"]/dayPeriodWidth[@type="wide"]/dayPeriod[@type="midnight"]
* only for these six locales: bs_Cyrl, bs_Cyrl_BA, pa_Arab, pa_Arab_PK, uz_Arab, uz_Arab_AF.
*
* This function is nearly identical to the JavaScript function with the same name.
* Keep the two functions consistent with each other. It would be more ideal if this
* knowledge were encapsulated on the server and the client didn't need to know about it.
* The server could send the client special fallback values instead of null.
*
* Extra paths are generated by CLDRFile.getRawExtraPathsPrivate; this function may need
* updating (to allow null for other paths) if that function changes.
*
* Reference: https://unicode-org.atlassian.net/browse/CLDR-11238
*/
private boolean extraPathAllowsNullValue(String path) {
if (path.contains("/timeZoneNames/metazone")
|| path.contains("/timeZoneNames/zone")
|| path.contains("/dayPeriods/dayPeriodContext")
|| path.contains("/unitPattern")
|| path.contains("/gender")
|| path.contains("/caseMinimalPairs")
|| path.contains("/genderMinimalPairs")
// || path.equals("//ldml/dates/timeZoneNames/zone[@type=\"Australia/Currie\"]/exemplarCity")
// || path.equals("//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Enderbury\"]/exemplarCity")
//+
) {
return true;
}
return false;
}
/**
* Check that the given path and PathHeader.Factory undergo correct
* roundtrip conversion between original and pretty paths.
*
* @param path the path string
* @param phf the PathHeader.Factory
*/
private void checkPrettyPaths(String path, PathHeader.Factory phf) {
if (path.endsWith("/alias")) {
return;
}
logln("Testing ==> " + path);
String prettied = phf.fromPath(path).toString();
String unprettied = phf.fromPath(path).getOriginalPath();
if (!path.equals(unprettied)) {
errln("Path Header doesn't roundtrip:\t" + path + "\t" + prettied
+ "\t" + unprettied);
} else {
logln(prettied + "\t" + path);
}
}
private Collection<String> getLocalesToTest() {
return params.inclusion <= 5 ? Arrays.asList("root", "en", "ja", "ar", "de", "ru")
: params.inclusion < 10 ? testInfo.getCldrFactory().getAvailableLanguages()
: testInfo.getCldrFactory().getAvailable();
}
/**
* find all the items that are deprecated, but appear in paths
* and the items that aren't deprecated, but don't appear in paths
*/
static final class CheckDeprecated {
M5<DtdType, String, String, String, Boolean> data = ChainedMap.of(
new HashMap<DtdType, Object>(),
new HashMap<String, Object>(),
new HashMap<String, Object>(),
new HashMap<String, Object>(),
Boolean.class);
private TestPaths testPaths;
public CheckDeprecated(TestPaths testPaths) {
this.testPaths = testPaths;
}
static final Set<String> ALLOWED = new HashSet<>(Arrays.asList("postalCodeData", "postCodeRegex"));
static final Set<String> OK_IF_MISSING = new HashSet<>(Arrays.asList("alt", "draft", "references"));
public boolean check(DtdData dtdData, XPathParts parts, String fullName) {
for (int i = 0; i < parts.size(); ++i) {
String elementName = parts.getElement(i);
if (dtdData.isDeprecated(elementName, "*", "*")) {
if (ALLOWED.contains(elementName)) {
return false;
}
testPaths.errln("Deprecated element in data: "
+ dtdData.dtdType
+ ":" + elementName
+ " \t;" + fullName);
return true;
}
data.put(dtdData.dtdType, elementName, "*", "*", true);
for (Entry<String, String> attributeNValue : parts.getAttributes(i).entrySet()) {
String attributeName = attributeNValue.getKey();
if (dtdData.isDeprecated(elementName, attributeName, "*")) {
if (attributeName.equals("draft")) {
testPaths.errln("Deprecated attribute in data: "
+ dtdData.dtdType
+ ":" + elementName
+ ":" + attributeName
+ " \t;" + fullName +
" - consider adding to DtdData.DRAFT_ON_NON_LEAF_ALLOWED if you are sure this is ok.");
} else {
testPaths.errln("Deprecated attribute in data: "
+ dtdData.dtdType
+ ":" + elementName
+ ":" + attributeName
+ " \t;" + fullName);
}
return true;
}
String attributeValue = attributeNValue.getValue();
if (dtdData.isDeprecated(elementName, attributeName, attributeValue)) {
testPaths.errln("Deprecated attribute value in data: "
+ dtdData.dtdType
+ ":" + elementName
+ ":" + attributeName
+ ":" + attributeValue
+ " \t;" + fullName);
return true;
}
data.put(dtdData.dtdType, elementName, attributeName, "*", true);
data.put(dtdData.dtdType, elementName, attributeName, attributeValue, true);
}
}
return false;
}
public void show(int inclusion) {
for (DtdType dtdType : DtdType.values()) {
if (dtdType == DtdType.ldmlICU ||
(inclusion <= 5 && dtdType == DtdType.platform)) { // keyboards/*/_platform.xml won't be in the list for non-exhaustive runs
continue;
}
M4<String, String, String, Boolean> infoEAV = data.get(dtdType);
if (infoEAV == null) {
testPaths.warnln("Data doesn't contain: "
+ dtdType);
continue;
}
DtdData dtdData = DtdData.getInstance(dtdType);
for (Element element : dtdData.getElements()) {
if (element.isDeprecated() || element == dtdData.ANY || element == dtdData.PCDATA) {
continue;
}
M3<String, String, Boolean> infoAV = infoEAV.get(element.name);
if (infoAV == null) {
testPaths.logln("Data doesn't contain: "
+ dtdType
+ ":" + element.name);
continue;
}
for (Attribute attribute : element.getAttributes().keySet()) {
if (attribute.isDeprecated() || OK_IF_MISSING.contains(attribute.name)) {
continue;
}
Map<String, Boolean> infoV = infoAV.get(attribute.name);
if (infoV == null) {
testPaths.logln("Data doesn't contain: "
+ dtdType
+ ":" + element.name
+ ":" + attribute.name);
continue;
}
for (String value : attribute.values.keySet()) {
if (attribute.isDeprecatedValue(value)) {
continue;
}
if (!infoV.containsKey(value)) {
testPaths.logln("Data doesn't contain: "
+ dtdType
+ ":" + element.name
+ ":" + attribute.name
+ ":" + value);
}
}
}
}
}
}
}
public void TestNonLdml() {
int maxPerDirectory = getInclusion() <= 5 ? 20 : Integer.MAX_VALUE;
CheckDeprecated checkDeprecated = new CheckDeprecated(this);
PathStarrer starrer = new PathStarrer();
StringBuilder removed = new StringBuilder();
Set<String> nonFinalValues = new LinkedHashSet<>();
Set<String> skipLast = new HashSet(Arrays.asList("version", "generation"));
String[] normalizedPath = { "" };
int counter = 0;
for (String directory : Arrays.asList("keyboards/", "common/", "seed/", "exemplars/")) {
String dirPath = CLDRPaths.BASE_DIRECTORY + directory;
for (String fileName : new File(dirPath).list()) {
File dir2 = new File(dirPath + fileName);
if (!dir2.isDirectory()
|| fileName.equals("properties") // TODO as flat files
// || fileName.equals(".DS_Store")
// || ChartDelta.LDML_DIRECTORIES.contains(dir)
// || fileName.equals("dtd") // TODO as flat files
// || fileName.equals(".project") // TODO as flat files
// //|| dir.equals("uca") // TODO as flat files
) {
continue;
}
Set<Pair<String, String>> seen = new HashSet<>();
Set<String> seenStarred = new HashSet<>();
int count = 0;
Set<Element> haveErrorsAlready = new HashSet<>();
for (String file : dir2.list()) {
if (!file.endsWith(".xml")) {
continue;
}
if (++count > maxPerDirectory) {
break;
}
DtdType type = null;
DtdData dtdData = null;
String fullName = dir2 + "/" + file;
for (Pair<String, String> pathValue : XMLFileReader.loadPathValues(fullName, new ArrayList<Pair<String, String>>(), true)) {
String path = pathValue.getFirst();
final String value = pathValue.getSecond();
XPathParts parts = XPathParts.getFrozenInstance(path);
if (dtdData == null) {
type = DtdType.valueOf(parts.getElement(0));
dtdData = DtdData.getInstance(type);
}
XPathParts pathParts = XPathParts.getFrozenInstance(path);
String finalElementString = pathParts.getElement(-1);
Element finalElement = dtdData.getElementFromName().get(finalElementString);
if (!haveErrorsAlready.contains(finalElement)) {
ElementType elementType = finalElement.getType();
// HACK!!
if (pathParts.size() > 1 && "identity".equals(pathParts.getElement(1))) {
elementType = ElementType.EMPTY;
logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
} else if (pathParts.size() > 2
&& "validity".equals(pathParts.getElement(2))
&& value.isEmpty()) {
String typeValue = pathParts.getAttributeValue(-1, "type");
if ("TODO".equals(typeValue)
|| "locale".equals(typeValue)) {
elementType = ElementType.EMPTY;
logKnownIssue("cldrbug:9784", "fix TODO's in Attribute validity tests");
}
}
if ((elementType == ElementType.PCDATA) == (value.isEmpty())
&& !finalElement.name.equals("nameOrderLocales")) {
errln("PCDATA ≠ emptyValue inconsistency:"
+ "\tfile=" + fileName + "/" + file
+ "\telementType=" + elementType
+ "\tvalue=«" + value + "»"
+ "\tpath=" + path);
haveErrorsAlready.add(finalElement); // suppress all but first error
}
}
if (checkDeprecated.check(dtdData, parts, fullName)) {
break;
}
String last = parts.getElement(-1);
if (skipLast.contains(last)) {
continue;
}
String dpath = CLDRFile.getDistinguishingXPath(path, normalizedPath);
if (!dpath.equals(path)) {
checkParts(dpath, dtdData);
}
if (!normalizedPath.equals(path) && !normalizedPath[0].equals(dpath)) {
checkParts(normalizedPath[0], dtdData);
}
parts = parts.cloneAsThawed();
counter = removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
String cleaned = parts.toString();
Pair<String, String> pair = Pair.of(type == DtdType.ldml ? file : type.toString(), cleaned);
if (seen.contains(pair)) {
// parts.set(path);
// removeNonDistinguishing(parts, dtdData, counter, removed, nonFinalValues);
errln("Duplicate: " + file + ", " + path + ", " + cleaned + ", " + value);
} else {
seen.add(pair);
if (!nonFinalValues.isEmpty()) {
String starredPath = starrer.set(path);
if (!seenStarred.contains(starredPath)) {
seenStarred.add(starredPath);
logln("Non-node values: " + nonFinalValues + "\t" + path);
}
}
if (isVerbose()) {
String starredPath = starrer.set(path);
if (!seenStarred.contains(starredPath)) {
seenStarred.add(starredPath);
logln("@" + "\t" + cleaned + "\t" + removed);
}
}
}
}
}
}
}
checkDeprecated.show(getInclusion());
}
private void checkParts(String path, DtdData dtdData) {
XPathParts parts = XPathParts.getFrozenInstance(path);
Element current = dtdData.ROOT;
for (int i = 0; i < parts.size(); ++i) {
String elementName = parts.getElement(i);
if (i == 0) {
assertEquals("root", current.name, elementName);
} else {
current = current.getChildNamed(elementName);
if (!assertNotNull("element", current)) {
return; // failed
}
}
for (String attributeName : parts.getAttributeKeys(i)) {
Attribute attribute = current.getAttributeNamed(attributeName);
if (!assertNotNull("attribute", attribute)) {
return; // failed
}
// later, check values
}
}
}
static final Set<String> SKIP_NON_NODE = new HashSet<>(Arrays.asList("references", "visibility", "access"));
/**
*
* @param parts the thawed XPathParts (can't be frozen, for putAttributeValue)
* @param data
* @param counter
* @param removed
* @param nonFinalValues
* @return
*/
private int removeNonDistinguishing(XPathParts parts, DtdData data, int counter, StringBuilder removed, Set<String> nonFinalValues) {
removed.setLength(0);
nonFinalValues.clear();
HashSet<String> toRemove = new HashSet<>();
nonFinalValues.clear();
int size = parts.size();
int last = size - 1;
for (int i = 0; i < size; ++i) {
removed.append("/");
String element = parts.getElement(i);
if (data.isOrdered(element)) {
parts.putAttributeValue(i, "_q", String.valueOf(counter));
counter++;
}
for (String attribute : parts.getAttributeKeys(i)) {
if (!data.isDistinguishing(element, attribute)) {
toRemove.add(attribute);
if (i != last && !SKIP_NON_NODE.contains(attribute)) {
if (attribute.equals("draft")
&& (parts.getElement(1).equals("transforms") || parts.getElement(1).equals("collations"))) {
// do nothing
} else {
nonFinalValues.add(attribute);
}
}
}
}
if (!toRemove.isEmpty()) {
for (String attribute : toRemove) {
removed.append("[@" + attribute + "=\"" + parts.getAttributeValue(i, attribute) + "\"]");
parts.removeAttribute(i, attribute);
}
toRemove.clear();
}
}
return counter;
}
}