| package org.unicode.cldr.unittest; |
| |
| import java.io.File; |
| import java.io.FileInputStream; |
| import java.io.InputStream; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.LinkedHashSet; |
| import java.util.List; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.concurrent.ConcurrentHashMap; |
| import java.util.concurrent.atomic.AtomicInteger; |
| import java.util.regex.Matcher; |
| import java.util.stream.Collectors; |
| import java.util.stream.Stream; |
| |
| import javax.xml.stream.XMLInputFactory; |
| import javax.xml.stream.XMLStreamConstants; |
| import javax.xml.stream.XMLStreamException; |
| import javax.xml.stream.XMLStreamReader; |
| |
| import org.unicode.cldr.tool.VerifyAttributeValues; |
| import org.unicode.cldr.tool.VerifyAttributeValues.Errors; |
| import org.unicode.cldr.util.AttributeValueValidity; |
| import org.unicode.cldr.util.AttributeValueValidity.AttributeValueSpec; |
| import org.unicode.cldr.util.AttributeValueValidity.MatcherPattern; |
| import org.unicode.cldr.util.AttributeValueValidity.Status; |
| import org.unicode.cldr.util.CLDRConfig; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.ChainedMap; |
| import org.unicode.cldr.util.ChainedMap.M4; |
| import org.unicode.cldr.util.DtdData; |
| import org.unicode.cldr.util.DtdData.ValueStatus; |
| import org.unicode.cldr.util.DtdType; |
| import org.unicode.cldr.util.LanguageInfo; |
| import org.unicode.cldr.util.Organization; |
| import org.unicode.cldr.util.StackTracker; |
| import org.unicode.cldr.util.StandardCodes; |
| import org.unicode.cldr.util.StandardCodes.LstrField; |
| import org.unicode.cldr.util.StandardCodes.LstrType; |
| import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; |
| import org.unicode.cldr.util.Validity; |
| import org.unicode.cldr.util.XPathParts; |
| import org.xml.sax.Attributes; |
| |
| import com.google.common.base.Joiner; |
| import com.google.common.base.Splitter; |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableMap; |
| import com.google.common.collect.ImmutableSet; |
| import com.google.common.collect.ImmutableSortedSet; |
| import com.google.common.collect.Multimap; |
| import com.ibm.icu.dev.test.TestFmwk; |
| import com.ibm.icu.impl.Row.R3; |
| import com.ibm.icu.util.Output; |
| |
| public class TestAttributeValues extends TestFmwk { |
| private static final boolean SERIAL = false; |
| |
| private static final Validity VALIDITY = Validity.getInstance(); |
| private static final File BASE_DIR = new File(CLDRPaths.BASE_DIRECTORY); |
| public static final Joiner SPACE_JOINER = Joiner.on(' '); |
| public static final Splitter SPACE_SPLITTER = Splitter.on(' ').trimResults().omitEmptyStrings(); |
| static final Splitter SEMI_SPACE = Splitter.on(';').trimResults().omitEmptyStrings(); |
| private static final CLDRConfig config = CLDRConfig.getInstance(); |
| |
| static final List<String> COMMON_AND_SEED = ImmutableList.of(CLDRPaths.COMMON_DIRECTORY, CLDRPaths.SEED_DIRECTORY); |
| |
| public static void main(String[] args) { |
| new TestAttributeValues().run(args); |
| } |
| |
| public void TestValid() { |
| String dtdTypeArg = params.props == null ? null : (String) params.props.get("dtdtype"); |
| |
| // short- circuits for testing. null means do all |
| Set<DtdType> checkTypes = dtdTypeArg == null ? DtdType.STANDARD_SET |
| : Collections.singleton(DtdType.valueOf(dtdTypeArg)) ; |
| ImmutableSet<ValueStatus> showStatuses = null ; // ImmutableSet.of(ValueStatus.invalid, ValueStatus.unknown); |
| |
| for (DtdType dtdType : checkTypes) { |
| PathChecker pathChecker = new PathChecker(this, DtdData.getInstance(dtdType)); |
| for (String mainDirs : COMMON_AND_SEED) { |
| Set<String> files = new TreeSet<>(); |
| for (String stringDir : dtdType.directories) { |
| addXMLFiles(dtdType, mainDirs + stringDir, files); |
| if (isVerbose()) |
| synchronized (pathChecker.testLog) { |
| warnln(mainDirs + stringDir); |
| } |
| } |
| Stream<String> stream = SERIAL ? files.stream() : files.parallelStream(); |
| stream.forEach(file -> checkFile(pathChecker, file)); |
| |
| // for (String file : files) { |
| // checkFile(pathChecker, file); |
| // } |
| } |
| pathChecker.show(isVerbose(), showStatuses); |
| } |
| // List<String> localesToTest = Arrays.asList("en", "root"); // , "zh", "hi", "ja", "ru", "cy" |
| // Set<String> localesToTest = config.getCommonAndSeedAndMainAndAnnotationsFactory().getAvailable(); |
| // // TODO, add all other files |
| |
| // for (String locale : localesToTest) { |
| // CLDRFile file = config.getCLDRFile(locale, false); |
| // for (String dpath : file) { |
| // String path = file.getFullXPath(dpath); |
| // pathChecker.checkPath(path); |
| // } |
| // } |
| } |
| |
| |
| static final Set<String> CLDR_LOCALES = ImmutableSortedSet.copyOf(StandardCodes.make() |
| .getLocaleCoverageLocales(Organization.cldr) |
| .stream() |
| .map(x -> x + ".xml") |
| .collect(Collectors.toSet())); |
| |
| private void addXMLFiles(DtdType dtdType, String path, Set<String> files) { |
| File dirFile = new File(path); |
| if (!dirFile.exists()) { |
| return; |
| } |
| if (!dirFile.isDirectory()) { |
| // if (getInclusion() <= 5 |
| // && dtdType == DtdType.ldml) { |
| // if (path.contains("/annotationsDerived/")) { |
| // return; |
| // } |
| // String ending = path.substring(path.lastIndexOf('/')+1); |
| // if (!CLDR_LOCALES.contains(ending)) { |
| // return; |
| // } |
| // } |
| files.add(path); |
| } else { |
| for (String file : dirFile.list()) { |
| addXMLFiles(dtdType, path + "/" + file, files); |
| } |
| } |
| } |
| |
| |
| private void checkFile(PathChecker pathChecker, String fullFile) { |
| if (!fullFile.endsWith(".xml")) { |
| return; |
| } |
| pathChecker.fileCount.incrementAndGet(); |
| // if (isVerbose()) synchronized (this) { |
| // logln(fullFile); |
| // } |
| XMLInputFactory f = XMLInputFactory.newInstance(); |
| // XMLInputFactory f = XMLInputFactory.newFactory("org.apache.xerces.jaxp.SAXParserFactoryImpl", |
| // ClassLoader.getSystemClassLoader()); |
| |
| int _elementCount = 0; |
| int _attributeCount = 0; |
| String lastElement = null; |
| |
| try { |
| XMLStreamReader r = null; |
| try (InputStream fis = new FileInputStream(fullFile)) { |
| r = f.createXMLStreamReader(fullFile, fis); |
| String element = null; |
| while(r.hasNext()) { |
| try { |
| switch(r.next()){ |
| case XMLStreamConstants.START_ELEMENT: |
| element = r.getLocalName(); |
| lastElement = element; |
| ++_elementCount; |
| int attributeSize = r.getAttributeCount(); |
| for (int i = 0; i < attributeSize; ++i) { |
| ++_attributeCount; |
| String attribute = r.getAttributeLocalName(i); |
| String attributeValue = r.getAttributeValue(i); |
| pathChecker.checkAttribute(element, attribute, attributeValue); |
| } |
| break; |
| } |
| } catch (XMLStreamException e) { |
| synchronized (pathChecker.testLog) { |
| pathChecker.testLog.errln(fullFile + "error"); |
| } |
| e.printStackTrace(pathChecker.testLog.getLogPrintWriter()); |
| } |
| } |
| //XMLFileReader.read("noId", inputStreamReader, -1, true, myHandler); |
| } catch (XMLStreamException e) { |
| if (!logKnownIssue("cldrbug 10120", "XML reading issue")) { |
| warnln("Can't read " + fullFile); |
| } else { |
| throw (IllegalArgumentException) new IllegalArgumentException("Can't read " + fullFile).initCause(e); |
| } |
| } catch (Throwable e) { |
| if(r == null) throw e; |
| throw (IllegalArgumentException) new IllegalArgumentException(" at " + r.getLocation(), e); |
| } |
| } catch (Exception e) { |
| e.printStackTrace(this.getErrorLogPrintWriter()); |
| errln("Exception occured in " + fullFile + " after parsing " + lastElement + " - " + e); |
| } |
| pathChecker.elementCount.addAndGet(_elementCount); |
| pathChecker.attributeCount.addAndGet(_attributeCount); |
| } |
| |
| static class PathChecker { |
| private final ChainedMap.M5<ValueStatus, String, String, String, Boolean> valueStatusInfo |
| = ChainedMap.of(new TreeMap(), new TreeMap(), new TreeMap(), new TreeMap(), Boolean.class); |
| private final Set<String> seen = new HashSet<>(); |
| private final Map<String,Map<String,Map<String,Boolean>>> seenEAV = new ConcurrentHashMap<>(); |
| private final TestFmwk testLog; |
| private final DtdData dtdData; |
| private final Multimap<String, String> needsTesting; |
| private final Map<String,String> matchValues; |
| |
| private final AtomicInteger fileCount = new AtomicInteger(); |
| private final AtomicInteger elementCount = new AtomicInteger(); |
| private final AtomicInteger attributeCount = new AtomicInteger(); |
| |
| public PathChecker(TestFmwk testLog, DtdData dtdData) { |
| this.testLog = testLog; |
| this.dtdData = dtdData; |
| Map<String,String> _matchValues = new TreeMap<>(); |
| needsTesting = dtdData.getNonEnumerated(_matchValues); |
| matchValues = ImmutableMap.copyOf(_matchValues); |
| } |
| |
| private void checkPath(String path) { |
| if (seen.contains(path)) { |
| return; |
| } |
| seen.add(path); |
| if (path.contains("length-point")) { |
| int debug = 0; |
| } |
| XPathParts parts = XPathParts.getFrozenInstance(path); |
| for (int elementIndex = 0; elementIndex < parts.size(); ++elementIndex) { |
| String element = parts.getElement(elementIndex); |
| for (Entry<String, String> entry : parts.getAttributes(elementIndex).entrySet()) { |
| String attribute = entry.getKey(); |
| String attrValue = entry.getValue(); |
| checkAttribute(element, attribute, attrValue); |
| } |
| } |
| } |
| |
| public void checkElement(String element, Attributes atts) { |
| int length = atts.getLength(); |
| for (int i = 0; i < length; ++i) { |
| checkAttribute(element, atts.getQName(i), atts.getValue(i)); |
| } |
| } |
| |
| private void checkAttribute(String element, String attribute, String attrValue) { |
| // skip cases we know we don't need to test |
| if (!needsTesting.containsEntry(element, attribute)) { |
| return; |
| } |
| // check if we've seen the EAV yet |
| // we don't need to synchronize because a miss isn't serious |
| Map<String, Map<String, Boolean>> sub = seenEAV.get(element); |
| if (sub == null) { |
| Map<String, Map<String, Boolean>> subAlready = seenEAV.putIfAbsent(element, sub = new ConcurrentHashMap<>()); |
| if (subAlready != null) { |
| sub = subAlready; // discards empty map |
| } |
| } |
| Map<String, Boolean> set = sub.get(attribute); |
| if (set == null) { |
| Map<String, Boolean> setAlready = sub.putIfAbsent(attribute, set = new ConcurrentHashMap<>()); |
| if (setAlready != null) { |
| set = setAlready; // discards empty map |
| } |
| } |
| if (set.putIfAbsent(attrValue, Boolean.TRUE) != null) { |
| return; |
| } |
| |
| // get the status & store |
| ValueStatus valueStatus = dtdData.getValueStatus(element, attribute, attrValue); |
| if (valueStatus != ValueStatus.valid) { |
| // Set breakpoint here for debugging (referenced from http://cldr.unicode.org/development/testattributevalues) |
| dtdData.getValueStatus(element, attribute, attrValue); |
| } |
| synchronized (valueStatusInfo) { |
| valueStatusInfo.put(valueStatus, element, attribute, attrValue, Boolean.TRUE); |
| } |
| } |
| |
| void show(boolean verbose, ImmutableSet<ValueStatus> retain) { |
| boolean haveProblems = false; |
| // if (testLog.logKnownIssue("cldrbug 10120", "Don't enable error until complete")) { |
| // testLog.warnln("Counts: " + counter.toString()); |
| // } else |
| for (ValueStatus valueStatus : ValueStatus.values()) { |
| if (valueStatus == ValueStatus.valid) { |
| continue; |
| } |
| M4<String, String, String, Boolean> info = valueStatusInfo.get(valueStatus); |
| if (info != null) { |
| haveProblems = true; |
| } |
| } |
| |
| if (!verbose && !haveProblems) { |
| return; |
| } |
| StringBuilder out = new StringBuilder(); |
| out.append("\nIf the test fails, look at https://cldr.unicode.org/development/cldr-development-site/testattributevalues\n"); |
| |
| out.append("file\tCount:\t" + dtdData.dtdType + "\t" + fileCount + "\n"); |
| out.append("element\tCount:\t" + dtdData.dtdType + "\t" + elementCount + "\n"); |
| out.append("attribute\tCount:\t" + dtdData.dtdType + "\t" + attributeCount + "\n"); |
| |
| out.append("\nStatus\tDtdType\tElement\tAttribute\tMatch expression\t#Failures\tFailing values\n"); |
| |
| for (Entry<ValueStatus, Map<String, Map<String, Map<String, Boolean>>>> entry : valueStatusInfo) { |
| ValueStatus valueStatus = entry.getKey(); |
| if (retain != null && !retain.contains(valueStatus)) { |
| continue; |
| } |
| if (!verbose && haveProblems && valueStatus == ValueStatus.valid) { |
| continue; |
| } |
| for (Entry<String, Map<String, Map<String, Boolean>>> entry2 : entry.getValue().entrySet()) { |
| String elementName = entry2.getKey(); |
| for (Entry<String, Map<String, Boolean>> entry3 : entry2.getValue().entrySet()) { |
| String attributeName = entry3.getKey(); |
| Set<String> validFound = entry3.getValue().keySet(); |
| String matchValue = matchValues.get(elementName + "\t" + attributeName); |
| out.append( |
| valueStatus |
| + "\t" + dtdData.dtdType |
| + "\t" + elementName |
| + "\t" + attributeName |
| + "\t" + (matchValue == null ? "" : matchValue) |
| + "\t" + validFound.size() |
| + "\t" + Joiner.on(", ").join(validFound) |
| + "\n" |
| ); |
| if (valueStatus == ValueStatus.valid) try { |
| LstrType lstr = LstrType.fromString(elementName); |
| Map<String, Validity.Status> codeToStatus = VALIDITY.getCodeToStatus(lstr); |
| Set<String> missing = new TreeSet<>(codeToStatus.keySet()); |
| if (lstr == LstrType.variant) { |
| for (String item : validFound) { |
| missing.remove(item.toLowerCase(Locale.ROOT)); |
| } |
| } else { |
| missing.removeAll(validFound); |
| } |
| Set<String> deprecated = VALIDITY.getStatusToCodes(lstr).get(LstrField.Deprecated); |
| if (deprecated != null) { |
| missing.removeAll(deprecated); |
| } |
| if (!missing.isEmpty()) { |
| out.append( |
| "unused" |
| + "\t" + dtdData.dtdType |
| + "\t" + elementName |
| + "\t" + attributeName |
| + "\t" + "" |
| + "\t" + "" |
| + "\t" + Joiner.on(", ").join(missing) |
| + "\n" |
| ); |
| } |
| } catch (Exception e) {} |
| } |
| } |
| } |
| synchronized (testLog) { |
| testLog.errln(out.toString()); |
| } |
| } |
| } |
| |
| public void xTestA() { |
| MatcherPattern mp = AttributeValueValidity.getMatcherPattern("$language"); |
| for (String language : LanguageInfo.getAvailable()) { |
| if (mp.matches(language, null)) { |
| LanguageInfo languageInfo = LanguageInfo.get(language); |
| show(language, languageInfo); |
| } |
| } |
| } |
| |
| private void show(String language, LanguageInfo languageInfo) { |
| logln(language |
| + "\t" + config.getEnglish().getName(CLDRFile.LANGUAGE_NAME, language) |
| + "\t" + languageInfo); |
| } |
| |
| // public void TestAttributeValueValidity() { |
| // for (String test : Arrays.asList( |
| // "supplementalData; territoryAlias; replacement; AA")) { |
| // quickTest(test); |
| // } |
| // } |
| |
| private Status quickTest(String test) { |
| List<String> parts = SEMI_SPACE.splitToList(test); |
| Output<String> reason = new Output<>(); |
| Status value = AttributeValueValidity.check(DtdData.getInstance(DtdType.valueOf(parts.get(0))), parts.get(1), parts.get(2), parts.get(3), reason); |
| if (value != Status.ok) { |
| errln(test + "\t" + value + "\t" + reason); |
| } |
| return value; |
| } |
| |
| public void oldTestSingleFile() { |
| Errors errors = new Errors(); |
| Set<AttributeValueSpec> missing = new TreeSet<>(); |
| VerifyAttributeValues.check(CLDRPaths.MAIN_DIRECTORY + "en.xml", errors, missing); |
| for (AttributeValueSpec entry1 : missing) { |
| errln("Missing Tests: " + entry1); |
| } |
| for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { |
| errln(item.get0() + "; \t" + item.get2() + "; \t" + item.get1()); |
| } |
| } |
| |
| public void oldTestCoreValidity() { |
| int maxPerDirectory = |
| // getInclusion() <= 5 ? 20 : |
| Integer.MAX_VALUE; |
| Matcher fileMatcher = null; |
| Set<AttributeValueSpec> missing = new LinkedHashSet<>(); |
| Errors errors = new Errors(); |
| VerifyAttributeValues.findAttributeValues(BASE_DIR, maxPerDirectory, fileMatcher, errors, missing, isVerbose() ? getErrorLogPrintWriter() : null); |
| |
| int count = 0; |
| for (Entry<AttributeValidityInfo, String> entry : AttributeValueValidity.getReadFailures().entrySet()) { |
| errln("Read error: " + ++count + "\t" + entry.getKey() + " => " + entry.getValue()); |
| } |
| |
| count = 0; |
| for (R3<DtdType, String, String> entry1 : AttributeValueValidity.getTodoTests()) { |
| warnln("Unfinished Test: " + ++count + "\t" + new AttributeValueSpec(entry1.get0(), entry1.get1(), entry1.get2(), "").toString()); |
| } |
| |
| count = 0; |
| for (AttributeValueSpec entry1 : missing) { |
| errln("Missing Test: " + entry1); |
| } |
| |
| count = 0; |
| for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { |
| if ("deprecated".equals(item.get2())) |
| errln("Deprecated: " + ++count |
| + "; \t" + item.get0() |
| + "; \t" + item.get1().type |
| + "; \t" + item.get1().element |
| + "; \t" + item.get1().attribute |
| + "; \t" + item.get1().attributeValue |
| + "; \t" + item.get2()); |
| } |
| |
| count = 0; |
| for (R3<String, AttributeValueSpec, String> item : errors.getRows()) { |
| if (!"deprecated".equals(item.get2())) |
| errln("Invalid: " + ++count |
| + "; \t" + item.get0() |
| + "; \t" + item.get1().type |
| + "; \t" + item.get1().element |
| + "; \t" + item.get1().attribute |
| + "; \t" + item.get1().attributeValue |
| + "; \t" + item.get2()); |
| } |
| } |
| } |