| package org.unicode.cldr.tool; |
| |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.TreeMap; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.unicode.cldr.draft.FileUtilities; |
| import org.unicode.cldr.test.DisplayAndInputProcessor; |
| import org.unicode.cldr.test.SubmissionLocales; |
| import org.unicode.cldr.tool.FormattedFileWriter.Anchors; |
| import org.unicode.cldr.tool.Option.Options; |
| import org.unicode.cldr.tool.Option.Params; |
| import org.unicode.cldr.util.CLDRConfig; |
| import org.unicode.cldr.util.CLDRFile; |
| import org.unicode.cldr.util.CLDRFile.Status; |
| import org.unicode.cldr.util.CLDRPaths; |
| import org.unicode.cldr.util.CldrUtility; |
| import org.unicode.cldr.util.Counter; |
| import org.unicode.cldr.util.DtdData; |
| import org.unicode.cldr.util.DtdType; |
| import org.unicode.cldr.util.Factory; |
| import org.unicode.cldr.util.LanguageTagParser; |
| import org.unicode.cldr.util.Level; |
| import org.unicode.cldr.util.LocaleIDParser; |
| import org.unicode.cldr.util.Organization; |
| import org.unicode.cldr.util.Pair; |
| import org.unicode.cldr.util.PathHeader; |
| import org.unicode.cldr.util.PathHeader.PageId; |
| import org.unicode.cldr.util.PathStarrer; |
| import org.unicode.cldr.util.PatternCache; |
| import org.unicode.cldr.util.SimpleXMLSource; |
| import org.unicode.cldr.util.StandardCodes; |
| import org.unicode.cldr.util.SupplementalDataInfo; |
| import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; |
| import org.unicode.cldr.util.TransliteratorUtilities; |
| import org.unicode.cldr.util.XMLFileReader; |
| import org.unicode.cldr.util.XPathParts; |
| |
| import com.google.common.base.Joiner; |
| import com.google.common.base.Splitter; |
| import com.google.common.collect.Multimap; |
| import com.google.common.collect.TreeMultimap; |
| import com.ibm.icu.impl.Relation; |
| import com.ibm.icu.impl.Row.R2; |
| import com.ibm.icu.impl.Row.R3; |
| import com.ibm.icu.impl.Row.R4; |
| import com.ibm.icu.text.NumberFormat; |
| import com.ibm.icu.text.UnicodeSet; |
| import com.ibm.icu.util.ICUUncheckedIOException; |
| import com.ibm.icu.util.Output; |
| |
| public class ChartDelta extends Chart { |
| private static final boolean verbose_skipping = false; |
| |
| private static final String DEFAULT_DELTA_DIR_NAME = "delta"; |
| private static final String DEFAULT_CHURN_DIR_NAME = "churn"; |
| |
| private static final boolean SKIP_REFORMAT_ANNOTATIONS = ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0; |
| |
| private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod; |
| |
| private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo(); |
| |
| private enum MyOptions { |
| fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*")), |
| orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")), |
| Vxml(new Params().setHelp("use cldr-aux for the base directory")), |
| coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")), |
| directory(new Params().setHelp("Set the output directory name").setDefault(DEFAULT_DELTA_DIR_NAME).setMatch(".*")), |
| verbose(new Params().setHelp("verbose debugging messages")), |
| highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')), |
| ; |
| |
| // BOILERPLATE TO COPY |
| final Option option; |
| |
| private MyOptions(Params params) { |
| option = new Option(this, params); |
| } |
| |
| private static Options myOptions = new Options(); |
| static { |
| for (MyOptions option : MyOptions.values()) { |
| myOptions.add(option, option.option); |
| } |
| } |
| |
| private static Set<String> parse(String[] args) { |
| return myOptions.parse(MyOptions.values()[0], args, true); |
| } |
| } |
| |
| private final Matcher fileFilter; |
| private final String dirName; // "delta" or "churn" or set as option |
| private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn" |
| private final String DIR; // full path of output folder |
| private final Level minimumPathCoverage; |
| private final boolean verbose; |
| |
| /** |
| * If true, check only high-level paths, i.e., paths for which any changes |
| * have high potential to cause disruptive "churn" |
| */ |
| private final boolean highLevelOnly; |
| |
| public static void main(String[] args) { |
| main(args, false); |
| } |
| |
| public static void main(String[] args, boolean highLevelOnly) { |
| System.out.println("use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34."); |
| MyOptions.parse(args); |
| Matcher fileFilter = !MyOptions.fileFilter.option.doesOccur() ? null : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher(""); |
| if (MyOptions.orgFilter.option.doesOccur()) { |
| if (MyOptions.fileFilter.option.doesOccur()) { |
| throw new IllegalArgumentException("Can't have both fileFilter and orgFilter"); |
| } |
| String rawOrg = MyOptions.orgFilter.option.getValue(); |
| Organization org = Organization.fromString(rawOrg); |
| Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org); |
| fileFilter = PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$").matcher(""); |
| } |
| Level coverage = !MyOptions.coverageFilter.option.doesOccur() ? null : Level.fromString(MyOptions.coverageFilter.option.getValue()); |
| boolean verbose = MyOptions.verbose.option.doesOccur(); |
| if (MyOptions.highLevelOnly.option.doesOccur()) { |
| highLevelOnly = true; |
| } |
| String dirName = MyOptions.directory.option.getValue(); |
| if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) { |
| System.out.println("For highLevelOnly, changing directory from " + DEFAULT_DELTA_DIR_NAME |
| + " to " + DEFAULT_CHURN_DIR_NAME); |
| dirName = DEFAULT_CHURN_DIR_NAME; |
| } |
| ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly); |
| temp.writeChart(null); |
| temp.showTotals(); |
| if (highLevelOnly) { |
| HighLevelPaths.reportHighLevelPathUsage(); |
| } |
| System.out.println("Finished. Files may have been created in these directories:"); |
| System.out.println(temp.DIR); |
| System.out.println(getTsvDir(temp.DIR, temp.dirName)); |
| } |
| |
| private ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly) { |
| this.fileFilter = fileFilter; |
| this.verbose = verbose; |
| this.highLevelOnly = highLevelOnly; |
| this.dirName = dirName; |
| this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1); |
| this.DIR = CLDRPaths.CHART_DIRECTORY + dirName; |
| this.minimumPathCoverage = coverage; |
| } |
| |
| private static final String SEP = "\u0001"; |
| private static final boolean DEBUG = false; |
| private static final String DEBUG_FILE = null; // "windowsZones.xml"; |
| static Pattern fileMatcher = PatternCache.get(".*"); |
| |
| static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH); |
| static final Set<String> DONT_CARE = new HashSet<>(Arrays.asList("draft", "standard", "reference")); |
| |
| @Override |
| public String getDirectory() { |
| return DIR; |
| } |
| |
| @Override |
| public String getTitle() { |
| return chartNameCap + " Charts"; |
| } |
| |
| @Override |
| public String getFileName() { |
| return "index"; |
| } |
| |
| @Override |
| public String getExplanation() { |
| return "<p>Charts showing the differences from the last version. " |
| + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. " |
| + "Not all changed data is charted yet. For details see each chart.</p>"; |
| } |
| |
| @Override |
| public void writeContents(FormattedFileWriter pw) throws IOException { |
| FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); |
| FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory()); |
| FormattedFileWriter.copyIncludeHtmls(getDirectory(), true); |
| counter.clear(); |
| fileCounters.clear(); |
| writeNonLdmlPlain(anchors); |
| writeLdml(anchors); |
| pw.setIndex("Main Chart Index", "../index.html"); |
| pw.write(anchors.toString()); |
| } |
| |
| private static class PathHeaderSegment extends R3<PathHeader, Integer, String> { |
| public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) { |
| super(b, elementIndex, attribute); |
| } |
| } |
| |
| private static class PathDiff extends R4<PathHeaderSegment, String, String, String> { |
| public PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue) { |
| super(pathHeaderSegment, locale, oldValue, newValue); |
| } |
| } |
| |
| private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze()); |
| |
| private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory(); |
| |
| private enum ChangeType { |
| added, deleted, changed, same; |
| public static ChangeType get(String oldValue, String currentValue) { |
| return oldValue == null ? added |
| : currentValue == null ? deleted |
| : oldValue.equals(currentValue) ? same |
| : changed; |
| } |
| } |
| |
| private Counter<ChangeType> counter = new Counter<>(); |
| private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>(); |
| private Set<String> badHeaders = new TreeSet<>(); |
| |
| /** |
| * Add the count of changed items |
| */ |
| private void addChange(String file, ChangeType changeType, int count) { |
| counter.add(changeType, count); // unified add |
| Counter<ChangeType> fileCounter = fileCounters.get(file); |
| if (fileCounter == null) { |
| fileCounters.put(file, fileCounter = new Counter<>()); |
| } |
| fileCounter.add(changeType, count); |
| } |
| |
| private void showTotals() { |
| try (PrintWriter pw = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) { |
| // pw.println("# percentages are of *new* total"); |
| pw.print("# dir\tfile"); |
| for (ChangeType item : ChangeType.values()) { |
| pw.print("\t" + (item == ChangeType.same ? "total" : item.toString())); |
| } |
| pw.println(); |
| showTotal(pw, "TOTAL/", counter); |
| |
| for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) { |
| showTotal(pw, entry.getKey(), entry.getValue()); |
| } |
| for (String s : badHeaders) { |
| pw.println(s); |
| } |
| // pw.println("# EOF"); |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) { |
| long total = counter2.getTotal(); |
| NumberFormat pf = NumberFormat.getPercentInstance(); |
| pf.setMinimumFractionDigits(2); |
| NumberFormat nf = NumberFormat.getIntegerInstance(); |
| pw.print(title2.replace("/", "\t")); |
| for (ChangeType item : ChangeType.values()) { |
| if (item == ChangeType.same) { |
| pw.print("\t" + nf.format(total)); |
| } else { |
| final long current = counter2.getCount(item); |
| pw.print("\t" + nf.format(current)); |
| } |
| } |
| pw.println(); |
| } |
| |
| /** |
| * |
| * @param anchors |
| * @throws IOException |
| * |
| * TODO: shorten the function using subroutines |
| */ |
| private void writeLdml(Anchors anchors) throws IOException { |
| try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv"); |
| PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_count.tsv"); |
| ) { |
| tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel"); |
| |
| // set up factories |
| List<Factory> factories = new ArrayList<>(); |
| List<Factory> oldFactories = new ArrayList<>(); |
| |
| Counter<PathHeader> counts = new Counter<>(); |
| |
| String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION); |
| String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION); |
| |
| for (String dir : DtdType.ldml.directories) { |
| if (dir.equals("annotationsDerived") || dir.equals("casing")) { |
| continue; |
| } |
| String current = dirBase + "common/" + dir; |
| String past = prevDirBase + "common/" + dir; |
| try { |
| factories.add(Factory.make(current, ".*")); |
| } catch (Exception e1) { |
| System.out.println("Skipping: " + dir + "\t" + e1.getMessage()); |
| continue; // skip where the directories don't exist in old versions |
| } |
| try { |
| oldFactories.add(Factory.make(past, ".*")); |
| } catch (Exception e) { |
| System.out.println("Couldn't open factory: " + past); |
| past = null; |
| oldFactories.add(null); |
| } |
| System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past); |
| } |
| if (factories.isEmpty()) { |
| throw new IllegalArgumentException("No factories found for " |
| + dirBase + ": " + DtdType.ldml.directories); |
| } |
| // get a list of all the locales to cycle over |
| |
| Relation<String, String> baseToLocales = Relation.of(new TreeMap<String, Set<String>>(), HashSet.class); |
| Matcher m = fileMatcher.matcher(""); |
| Set<String> defaultContents = SDI.getDefaultContentLocales(); |
| LanguageTagParser ltp = new LanguageTagParser(); |
| LikelySubtags ls = new LikelySubtags(); |
| for (String file : factories.get(0).getAvailable()) { |
| if (defaultContents.contains(file)) { |
| continue; |
| } |
| if (!m.reset(file).matches()) { |
| continue; |
| } |
| String base = file.equals("root") ? "root" : ltp.set(ls.minimize(file)).getLanguageScript(); |
| baseToLocales.put(base, file); |
| } |
| |
| // do keyboards later |
| |
| Status currentStatus = new Status(); |
| Status oldStatus = new Status(); |
| Set<PathDiff> diff = new TreeSet<>(); |
| Set<String> paths = new HashSet<>(); |
| |
| Relation<PathHeader, String> diffAll = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); |
| for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) { |
| String base = baseNLocale.getKey(); |
| for (int i = 0; i < factories.size(); ++i) { |
| Factory factory = factories.get(i); |
| Factory oldFactory = oldFactories.get(i); |
| List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories()); |
| if (sourceDirs.size() != 1) { |
| throw new IllegalArgumentException("Internal error: expect single source dir"); |
| } |
| File sourceDir = sourceDirs.get(0); |
| String sourceDirLeaf = sourceDir.getName(); |
| boolean resolving = !sourceDirLeaf.contains("subdivisions") |
| && !sourceDirLeaf.contains("transforms"); |
| |
| for (String locale : baseNLocale.getValue()) { |
| String nameAndLocale = sourceDirLeaf + "/" + locale; |
| if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) { |
| if (verbose && verbose_skipping) { |
| System.out.println("SKIPPING: " + nameAndLocale); |
| } |
| continue; |
| } |
| if (verbose) { |
| System.out.println(nameAndLocale); |
| } |
| CLDRFile current = makeWithFallback(factory, locale, resolving); |
| CLDRFile old = makeWithFallback(oldFactory, locale, resolving); |
| DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old); |
| |
| if (!locale.equals("root") && current.getLocaleID().equals("root") && old.getLocaleID().equals("root")) { |
| continue; |
| } |
| if (old == EMPTY_CLDR && current == EMPTY_CLDR) { |
| continue; |
| } |
| if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) { |
| continue; |
| } |
| paths.clear(); |
| for (String path : current.fullIterable()) { |
| if (allowPath(locale, path)) { |
| paths.add(path); |
| } |
| } |
| for (String path : old.fullIterable()) { |
| if (!paths.contains(path) && allowPath(locale, path)) { |
| paths.add(path); |
| } |
| } |
| |
| Output<String> reformattedValue = new Output<>(); |
| Output<Boolean> hasReformattedValue = new Output<>(); |
| |
| for (String path : paths) { |
| if (path.startsWith("//ldml/identity") |
| || path.endsWith("/alias") |
| || path.startsWith("//ldml/segmentations") // do later |
| || path.startsWith("//ldml/rbnf") // do later |
| ) { |
| continue; |
| } |
| PathHeader ph = getPathHeader(path); |
| if (ph == null) { |
| continue; |
| } |
| |
| String oldValue; |
| String currentValue; |
| |
| { |
| String sourceLocaleCurrent = current.getSourceLocaleID(path, currentStatus); |
| String sourceLocaleOld = getReformattedPath(oldStatus, old, path, reformattedValue, hasReformattedValue); |
| |
| // filter out stuff that differs at a higher level |
| if (!sourceLocaleCurrent.equals(locale) |
| && !sourceLocaleOld.equals(locale)) { |
| continue; |
| } |
| if (!path.equals(currentStatus.pathWhereFound) |
| && !path.equals(oldStatus.pathWhereFound)) { |
| continue; |
| } |
| // fix some incorrect cases? |
| |
| currentValue = current.getStringValue(path); |
| if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) { |
| currentValue = current.getBaileyValue(path, null, null); |
| } |
| |
| String oldRawValue = hasReformattedValue.value ? reformattedValue.value : old.getStringValue(path); |
| if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) { |
| oldRawValue = old.getBaileyValue(path, null, null); |
| } |
| // ignore differences due to old DAIP |
| oldValue = dontDaipValue(oldRawValue, path) ? oldRawValue : daip.processInput(path, oldRawValue, null); |
| } |
| if (highLevelOnly && new SuspiciousChange(oldValue, currentValue, path, locale).isDisruptive() == false) { |
| continue; |
| } |
| // handle non-distinguishing attributes |
| addPathDiff(sourceDir, old, current, locale, ph, diff); |
| |
| addValueDiff(sourceDir, oldValue, currentValue, locale, ph, diff, diffAll); |
| } |
| } |
| } |
| writeDiffs(anchors, base, diff, tsvFile, counts); |
| diff.clear(); |
| } |
| writeDiffs(diffAll); |
| |
| writeCounter(tsvCountFile, "Count", counts); |
| } |
| } |
| |
| public boolean dontDaipValue(String oldRawValue, String path) { |
| return oldRawValue == null || path.startsWith("//ldml/collations"); |
| } |
| |
| private boolean allowPath(String locale, String path) { |
| if (minimumPathCoverage != null) { |
| Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale); |
| if (minimumPathCoverage.compareTo(pathLevel) < 0) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private String getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue) { |
| if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) { |
| hasReformattedValue.value = Boolean.FALSE; |
| return old.getSourceLocaleID(path, oldStatus); |
| } |
| // OLD: <annotation cp='[😀]' tts='grinning face'>face; grin</annotation> |
| // NEW: <annotation cp="😀">face | grin</annotation> |
| // <annotation cp="😀" type="tts">grinning face</annotation> |
| // from the NEW paths, get the OLD values |
| XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed(); // not frozen, for removeAttribute |
| boolean isTts = parts.getAttributeValue(-1, "type") != null; |
| if (isTts) { |
| parts.removeAttribute(-1, "type"); |
| } |
| String cp = parts.getAttributeValue(-1, "cp"); |
| parts.setAttribute(-1, "cp", "[" + cp + "]"); |
| |
| String oldStylePath = parts.toString(); |
| String temp = old.getStringValue(oldStylePath); |
| if (temp == null) { |
| hasReformattedValue.value = Boolean.FALSE; |
| } else if (isTts) { |
| String temp2 = old.getFullXPath(oldStylePath); |
| value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts"); |
| hasReformattedValue.value = Boolean.TRUE; |
| } else { |
| value.value = temp.replaceAll("\\s*;\\s*", " | "); |
| hasReformattedValue.value = Boolean.TRUE; |
| } |
| return old.getSourceLocaleID(oldStylePath, oldStatus); |
| } |
| |
| PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); |
| |
| private PathHeader getPathHeader(String path) { |
| try { |
| PathHeader ph = phf.fromPath(path); |
| if (ph.getPageId() == PageId.Unknown) { |
| String star = starrer.set(path); |
| badHeaders.add(star); |
| return null; |
| } |
| return ph; |
| } catch (Exception e) { |
| String star = starrer.set(path); |
| badHeaders.add(star); |
| // System.err.println("Skipping path with bad PathHeader: " + path); |
| return null; |
| } |
| } |
| |
| private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) { |
| if (oldFactory == null) { |
| return EMPTY_CLDR; |
| } |
| CLDRFile old; |
| String oldLocale = locale; |
| while (true) { // fall back for old, maybe to root |
| try { |
| old = oldFactory.make(oldLocale, resolving); |
| break; |
| } catch (Exception e) { |
| oldLocale = LocaleIDParser.getParent(oldLocale); |
| if (oldLocale == null) { |
| return EMPTY_CLDR; |
| } |
| } |
| } |
| return old; |
| } |
| |
| private void addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2) { |
| String path = ph.getOriginalPath(); |
| String fullPathCurrent = current.getFullXPath(path); |
| String fullPathOld = old.getFullXPath(path); |
| if (Objects.equals(fullPathCurrent, fullPathOld)) { |
| return; |
| } |
| XPathParts pathPlain = XPathParts.getFrozenInstance(path); |
| XPathParts pathCurrent = fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent); |
| XPathParts pathOld = fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld); |
| TreeSet<String> fullAttributes = null; |
| int size = pathCurrent.size(); |
| String parentAndName = parentAndName(sourceDir, locale); |
| for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size |
| Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex); |
| Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex); |
| Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex); |
| if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) { |
| continue; |
| } |
| if (fullAttributes == null) { |
| fullAttributes = new TreeSet<>(); |
| } else { |
| fullAttributes.clear(); |
| } |
| fullAttributes.addAll(attributesCurrent); |
| fullAttributes.addAll(attributesOld); |
| fullAttributes.removeAll(distinguishing); |
| fullAttributes.removeAll(DONT_CARE); |
| |
| // at this point we only have non-distinguishing |
| for (String attribute : fullAttributes) { |
| String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute); |
| String attributeValueCurrent = pathCurrent.getAttributeValue(elementIndex, attribute); |
| if (Objects.equals(attributeValueOld, attributeValueCurrent)) { |
| addChange(parentAndName, ChangeType.same, 1); |
| continue; |
| } |
| addChange(parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1); |
| |
| PathDiff row = new PathDiff( |
| locale, |
| new PathHeaderSegment(ph, size - elementIndex - 1, attribute), |
| attributeValueOld, |
| attributeValueCurrent); |
| if (DEBUG) { |
| System.out.println(row); |
| } |
| diff2.add(row); |
| } |
| } |
| } |
| |
| private String parentAndName(File sourceDir, String locale) { |
| return sourceDir.getName() + "/" + locale + ".xml"; |
| } |
| |
| private void addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, |
| Relation<PathHeader, String> diffAll) { |
| // handle stuff that can be split specially |
| Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent); |
| int count = 1; |
| String parentAndName = parentAndName(sourceDir, locale); |
| if (Objects.equals(valueCurrent, valueOld)) { |
| if (splitter != null && valueCurrent != null) { |
| count = splitHandlingNull(splitter, valueCurrent).size(); |
| } |
| addChange(parentAndName, ChangeType.same, count); |
| } else { |
| if (splitter != null) { |
| List<String> setOld = splitHandlingNull(splitter, valueOld); |
| List<String> setNew = splitHandlingNull(splitter, valueCurrent); |
| int[] sameAndNotInSecond = new int[2]; |
| valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond); |
| addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]); |
| addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]); |
| sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0; |
| valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond); |
| addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]); |
| } else if (hasUnicodeSetValue(ph.getOriginalPath())) { |
| UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld); |
| UnicodeSet usCurrent = valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent); |
| UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent); |
| UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld); |
| addChange(parentAndName, ChangeType.same, usOld.size()-oldOnly.size()); |
| addChange(parentAndName, ChangeType.deleted, oldOnly.size()); |
| addChange(parentAndName, ChangeType.added, currentOnly.size()); |
| valueOld = usOld.size()==oldOnly.size() ? oldOnly.toPattern(false) : "…" + oldOnly + "…"; |
| valueCurrent = usCurrent.size()==currentOnly.size() ? currentOnly.toPattern(false) : "…" + currentOnly + "…"; |
| } else { |
| addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count); |
| } |
| PathDiff row = new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent); |
| diff.add(row); |
| diffAll.put(ph, locale); |
| } |
| } |
| |
| private boolean hasUnicodeSetValue(String xpath) { |
| return xpath.startsWith("//ldml/characters/exemplar"); |
| } |
| |
| private List<String> splitHandlingNull(Splitter splitter, String value) { |
| return value == null ? null : splitter.splitToList(value); |
| } |
| |
| private Splitter getSplitter(String path, String valueOld, String valueCurrent) { |
| if (path.contains("/annotation") && !path.contains("tts")) { |
| return DtdData.BAR_SPLITTER; |
| } else if (valueOld != null && valueOld.contains("\n") || valueCurrent != null && valueCurrent.contains("\n")) { |
| return DtdData.CR_SPLITTER; |
| } else { |
| return null; |
| } |
| } |
| |
| /** |
| * Return string with all lines from linesToRemove removed |
| * @param toGetStringFor |
| * @param linesToRemove |
| * @return |
| */ |
| private String getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove, |
| int[] sameAndDiff) { |
| if (toGetStringFor == null) { |
| return null; |
| } |
| StringBuilder buf = new StringBuilder(); |
| Set<String> toRemove = linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove); |
| boolean removed = false; |
| for (String old : toGetStringFor) { |
| if (toRemove.contains(old)) { |
| removed = true; |
| sameAndDiff[0]++; |
| } else { |
| sameAndDiff[1]++; |
| if (removed) { |
| buf.append("…\n"); |
| removed = false; |
| } |
| buf.append(old).append('\n'); |
| } |
| } |
| if (removed) { |
| buf.append("…"); |
| } else if (buf.length() > 0) { |
| buf.setLength(buf.length() - 1); // remove final \n |
| } |
| return buf.toString(); |
| } |
| |
| private void writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile) { |
| if (bcp.isEmpty()) { |
| System.out.println("\tDeleting: " + DIR + "/" + file); |
| new File(DIR + file).delete(); |
| return; |
| } |
| TablePrinter tablePrinter = new TablePrinter() |
| .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)//.setRepeatDivider(true) |
| .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Code", "class='source'", null, "class='source'", false) |
| .addColumn("Old", "class='target'", null, "class='target'", false) // width='20%' |
| .addColumn("New", "class='target'", null, "class='target'", false); // width='20%' |
| PathHeader ph1 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason"); |
| PathHeader ph2 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement"); |
| ph1.compareTo(ph2); |
| for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) { |
| PathHeader ph = entry.getKey(); |
| if (ph.getPageId() == DEBUG_PAGE_ID) { |
| System.out.println(ph + "\t" + ph.getOriginalPath()); |
| } |
| for (String value : entry.getValue()) { |
| String[] oldNew = value.split(SEP); |
| tablePrinter.addRow() |
| .addCell(ph.getSectionId()) |
| .addCell(ph.getPageId()) |
| .addCell(ph.getHeader()) |
| .addCell(ph.getCode()) |
| .addCell(oldNew[0]) |
| .addCell(oldNew[1]) |
| .finishRow(); |
| } |
| } |
| writeTable(anchors, file, tablePrinter, title, tsvFile); |
| } |
| |
| private void writeDiffs(Relation<PathHeader, String> diffAll) { |
| TablePrinter tablePrinter = new TablePrinter() |
| .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Code", "class='source'", null, "class='source'", true) |
| .addColumn("Locales where different", "class='target'", null, "class='target'", true); |
| for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) { |
| PathHeader ph = row.getKey(); |
| Set<String> locales = row.getValue(); |
| tablePrinter.addRow() |
| .addCell(ph.getSectionId()) |
| .addCell(ph.getPageId()) |
| .addCell(ph.getHeader()) |
| .addCell(ph.getCode()) |
| .addCell(Joiner.on(" ").join(locales)) |
| .finishRow(); |
| } |
| } |
| |
| private void writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts) { |
| if (diff.isEmpty()) { |
| return; |
| } |
| TablePrinter tablePrinter = new TablePrinter() |
| .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true) |
| .addColumn("Code", "class='source'", null, "class='source'", true) |
| .addColumn("Locale", "class='source'", null, "class='source'", true) |
| .addColumn("Old", "class='target'", null, "class='target'", true) // width='20%' |
| .addColumn("New", "class='target'", null, "class='target'", true) // width='20%' |
| .addColumn("Level", "class='target'", null, "class='target'", true); |
| |
| for (PathDiff row : diff) { |
| PathHeaderSegment phs = row.get0(); |
| counts.add(phs.get0(), 1); |
| String locale = row.get1(); |
| String oldValue = row.get2(); |
| String currentValue = row.get3(); |
| |
| PathHeader ph = phs.get0(); |
| Integer pathIndex = phs.get1(); |
| String attribute = phs.get2(); |
| String specialCode = ph.getCode(); |
| |
| if (!attribute.isEmpty()) { |
| specialCode += "_" + attribute; |
| if (pathIndex != 0) { |
| specialCode += "|" + pathIndex; |
| } |
| } |
| Level coverageLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale); |
| String fixedOldValue = oldValue == null ? "▷missing◁" : TransliteratorUtilities.toHTML.transform(oldValue); |
| String fixedNewValue = currentValue == null ? "▷removed◁" : TransliteratorUtilities.toHTML.transform(currentValue); |
| |
| tablePrinter.addRow() |
| .addCell(ph.getSectionId()) |
| .addCell(ph.getPageId()) |
| .addCell(ph.getHeader()) |
| .addCell(specialCode) |
| .addCell(locale) |
| .addCell(fixedOldValue) |
| .addCell(fixedNewValue) |
| .addCell(coverageLevel) |
| .finishRow(); |
| |
| } |
| String title = ENGLISH.getName(file) + " " + chartNameCap; |
| writeTable(anchors, file, tablePrinter, title, tsvFile); |
| |
| diff.clear(); |
| } |
| |
| private class ChartDeltaSub extends Chart { |
| private String title; |
| private String file; |
| private TablePrinter tablePrinter; |
| private PrintWriter tsvFile; |
| |
| private ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) { |
| super(); |
| this.title = title; |
| this.file = file; |
| this.tablePrinter = tablePrinter; |
| this.tsvFile = tsvFile; |
| } |
| |
| @Override |
| public String getDirectory() { |
| return DIR; |
| } |
| |
| @Override |
| public boolean getShowDate() { |
| return false; |
| } |
| |
| @Override |
| public String getTitle() { |
| return title; |
| } |
| |
| @Override |
| public String getFileName() { |
| return file; |
| } |
| |
| @Override |
| public String getExplanation() { |
| return "<p>Lists data fields that differ from the last major version (see versions above)." |
| + " Inherited differences in locales are suppressed, except where the source locales are different. " |
| + "<p>"; |
| } |
| |
| @Override |
| public void writeContents(FormattedFileWriter pw) throws IOException { |
| pw.write(tablePrinter.toTable()); |
| tablePrinter.toTsv(tsvFile); |
| } |
| } |
| |
| private void writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile) { |
| ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile); |
| chartDeltaSub.writeChart(anchors); |
| } |
| |
| private void writeNonLdmlPlain(Anchors anchors) throws IOException { |
| try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp.tsv"); |
| PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp_count.tsv"); |
| ) { |
| tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew"); |
| |
| Multimap<PathHeader, String> bcp = TreeMultimap.create(); |
| Multimap<PathHeader, String> supplemental = TreeMultimap.create(); |
| Multimap<PathHeader, String> transforms = TreeMultimap.create(); |
| |
| Counter<PathHeader> countSame = new Counter<>(); |
| Counter<PathHeader> countAdded = new Counter<>(); |
| Counter<PathHeader> countDeleted = new Counter<>(); |
| |
| for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) { |
| if (DtdType.ldml.directories.contains(dir) |
| || dir.equals(".DS_Store") |
| || dir.equals("dtd") // TODO as flat files |
| || dir.equals("properties") // TODO as flat files |
| || dir.equals("uca") // TODO as flat files |
| ) { |
| continue; |
| } |
| File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir); |
| System.out.println("\tLast dir: " + dirOld); |
| File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir); |
| System.out.println("\tCurr dir: " + dir2); |
| |
| for (String file : dir2.list()) { |
| if (!file.endsWith(".xml")) { |
| continue; |
| } |
| String parentAndFile = dir + "/" + file; |
| String base = file.substring(0, file.length() - 4); |
| if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) { |
| if (verbose) { // && verbose_skipping |
| System.out.println("SKIPPING: " + dir + "/" + base); |
| } |
| continue; |
| } |
| if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) { |
| continue; |
| } |
| if (verbose) { |
| System.out.println(file); |
| } |
| Relation<PathHeader, String> contentsOld = fillData(dirOld.toString() + "/", file, base); |
| Relation<PathHeader, String> contents2 = fillData(dir2.toString() + "/", file, base); |
| |
| Set<PathHeader> keys = new TreeSet<>(CldrUtility.ifNull(contentsOld.keySet(), Collections.<PathHeader> emptySet())); |
| keys.addAll(CldrUtility.ifNull(contents2.keySet(), Collections.<PathHeader> emptySet())); |
| DtdType dtdType = null; |
| for (PathHeader key : keys) { |
| String originalPath = key.getOriginalPath(); |
| if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) { |
| continue; |
| } |
| boolean isTransform = originalPath.contains("/tRule"); |
| if (dtdType == null) { |
| dtdType = DtdType.fromPath(originalPath); |
| } |
| Multimap<PathHeader, String> target = dtdType == DtdType.ldmlBCP47 ? bcp |
| : isTransform ? transforms |
| : supplemental; |
| Set<String> setOld = contentsOld.get(key); |
| Set<String> set2 = contents2.get(key); |
| |
| if (Objects.equals(setOld, set2)) { |
| if (file.equals(DEBUG_FILE)) { // for debugging |
| System.out.println("**Same: " + key + "\t" + setOld); |
| } |
| addChange(parentAndFile, ChangeType.same, setOld.size()); |
| countSame.add(key, 1); |
| continue; |
| } |
| if (setOld == null) { |
| addChange(parentAndFile, ChangeType.added, set2.size()); |
| for (String s : set2) { |
| addRow(target, key, "▷missing◁", s); |
| countAdded.add(key, 1); |
| } |
| } else if (set2 == null) { |
| addChange(parentAndFile, ChangeType.deleted, setOld.size()); |
| for (String s : setOld) { |
| addRow(target, key, s, "▷removed◁"); |
| countDeleted.add(key, 1); |
| } |
| } else { |
| Set<String> s1MOld = setOld; |
| Set<String> s2M1 = set2; |
| if (s1MOld.isEmpty()) { |
| addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1)); |
| addChange(parentAndFile, ChangeType.added, s2M1.size()); |
| countAdded.add(key, 1); |
| } else if (s2M1.isEmpty()) { |
| addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁"); |
| addChange(parentAndFile, ChangeType.deleted, s1MOld.size()); |
| countDeleted.add(key, 1); |
| } else { |
| String valueOld; |
| String valueCurrent; |
| |
| int[] sameAndNotInSecond = new int[2]; |
| valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond); |
| addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]); |
| countSame.add(key, 1); |
| addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]); |
| sameAndNotInSecond[1] = 0; |
| countDeleted.add(key, 1); |
| valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond); |
| addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]); |
| addRow(target, key, valueOld, valueCurrent); |
| countAdded.add(key, 1); |
| } |
| } |
| } |
| } |
| } |
| writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile); |
| writeDiffs(anchors, "supplemental-data", "¤¤Supplemental " + chartNameCap, supplemental, tsvFile); |
| writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile); |
| |
| writeCounter(tsvCountFile, "CountSame", countSame); |
| tsvCountFile.println(); |
| writeCounter(tsvCountFile, "CountAdded", countAdded); |
| tsvCountFile.println(); |
| writeCounter(tsvCountFile, "CountDeleted", countDeleted); |
| |
| //tsvFile.println("# EOF"); |
| //tsvCountFile.println("# EOF"); |
| } |
| } |
| |
| private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) { |
| tsvFile.append("# " |
| + title |
| + "\tSection\tPage\tSubhead\tCode\n\n"); |
| for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) { |
| tsvFile.println(entry.get0() + "\t" + entry.get1()); |
| } |
| } |
| |
| private void addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) { |
| if (oldItem.isEmpty() || newItem.isEmpty()) { |
| throw new IllegalArgumentException(); |
| } |
| target.put(key, oldItem + SEP + newItem); |
| } |
| |
| /** |
| * Fill in the chart data for the specified file |
| * |
| * @param directory |
| * @param file like "xx.xml" where "xx" may be a locale name |
| * @param fileBase like "xx", same as file without ".xml" |
| * @return the Relation |
| */ |
| private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) { |
| Relation<PathHeader, String> results = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); |
| |
| List<Pair<String, String>> contents1; |
| try { |
| contents1 = XMLFileReader.loadPathValues(directory + file, new ArrayList<Pair<String, String>>(), true); |
| } catch (Exception e) { |
| /* |
| * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0 |
| */ |
| return results; |
| } |
| DtdType dtdType = null; |
| DtdData dtdData = null; |
| Multimap<String, String> extras = TreeMultimap.create(); |
| |
| for (Pair<String, String> s : contents1) { |
| String path = s.getFirst(); |
| if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) { |
| continue; |
| } |
| String value = s.getSecond(); |
| if (dtdType == null) { |
| /* |
| * Note: although dtdType and dtdData depend on path, they are the same for all paths |
| * in the same file, so they only need to be set the first time through this loop. |
| * |
| * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support |
| * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug. |
| */ |
| dtdType = DtdType.fromPath(path); |
| dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR); |
| } |
| XPathParts pathPlain = XPathParts.getFrozenInstance(path); |
| try { |
| if (dtdData.isMetadata(pathPlain)) { |
| continue; |
| } |
| } catch (NullPointerException e) { |
| /* |
| * TODO: this happens for "grammaticalState" in this path from version 37: |
| * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"] |
| * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306 |
| */ |
| System.out.println("Caught NullPointerException in fillData calling isMetadata, path = " + path); |
| continue; |
| } |
| Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras); |
| if (pathForValues != null) { |
| for (String pathForValue : pathForValues) { |
| PathHeader pathHeader = phf.fromPath(pathForValue); |
| if (pathHeader.getPageId() == PageId.Suppress) { |
| continue; |
| } |
| Splitter splitter = DtdData.getValueSplitter(pathPlain); |
| for (String line : splitter.split(value)) { |
| // special case # in transforms |
| if (isComment(pathPlain, line)) { |
| continue; |
| } |
| results.put(pathHeader, line); |
| } |
| } |
| } |
| for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) { |
| final String extraPath = entry.getKey(); |
| final PathHeader pathHeaderExtra = phf.fromPath(extraPath); |
| if (pathHeaderExtra.getPageId() == PageId.Suppress) { |
| continue; |
| } |
| final Collection<String> extraValue = entry.getValue(); |
| if (isExtraSplit(extraPath)) { |
| for (String items : extraValue) { |
| results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items)); |
| } |
| } else { |
| results.putAll(pathHeaderExtra, extraValue); |
| } |
| } |
| if (pathForValues == null && !value.isEmpty()) { |
| System.err.println("Shouldn't happen"); |
| } |
| } |
| return results; |
| } |
| |
| private boolean isExtraSplit(String extraPath) { |
| if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { |
| return true; |
| } |
| return false; |
| } |
| |
| private static boolean isComment(XPathParts pathPlain, String line) { |
| if (pathPlain.contains("transform")) { |
| if (line.startsWith("#")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Determine when changes to the values for paths should be treated as |
| * potentially "disruptive" for the purpose of "churn" reporting |
| */ |
| private class SuspiciousChange { |
| /** |
| * the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v" |
| */ |
| private String oldValue, newValue; |
| |
| /** |
| * the path, such as //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"] |
| */ |
| private String path; |
| |
| /** |
| * the locale (such as "doi") in which the path was found, or null, or possibly |
| * the base file name without extension, like "xx" if the file name is "xx.xml", |
| * where "xx" may or may not be a locale; e.g., "supplementalData" |
| */ |
| private String locale; |
| |
| SuspiciousChange(String oldValue, String newValue, String path, String locale) { |
| this.oldValue = oldValue; |
| this.newValue = newValue; |
| this.path = path; |
| this.locale = locale; |
| } |
| |
| /** |
| * Is the change from the old value to the new value, for this path and locale, potentially disruptive? |
| * |
| * @return true or false |
| */ |
| public boolean isDisruptive() { |
| /* |
| * OR, not AND: certain changes in value are disruptive even for paths not |
| * otherwise treated as high-level, and changes for high-level paths are |
| * disruptive even if the changes in values themselves are not identified |
| * as disruptive. |
| */ |
| return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale); |
| } |
| |
| /** |
| * Is the change from the old value to the current value potentially disruptive, based (primarily) on |
| * the values themselves? |
| * |
| * @return true or false |
| */ |
| private boolean valueChangeIsDisruptive() { |
| if (oldValue == null || newValue == null || oldValue.equals(newValue)) { |
| return false; |
| } |
| if (valueChangeIsDisruptiveWhitespaceOnly()) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Is the change disruptive whitespace only? |
| * Per design doc, "Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP |
| * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none." |
| * |
| * @return true or false |
| */ |
| private boolean valueChangeIsDisruptiveWhitespaceOnly() { |
| /* |
| * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive |
| */ |
| if (path.startsWith("//ldml/annotations")) { |
| return false; |
| } |
| if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Remove whitespace from the given string |
| * |
| * Remove whitespace as defined by regex \s, and also |
| * U+00A0 NO-BREAK SPACE |
| * U+2007 FIGURE SPACE |
| * U+202F NARROW NO-BREAK SPACE |
| * |
| * @param s the string |
| * @return the modified string |
| */ |
| private String removeWhitespace(String s) { |
| return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", ""); |
| } |
| } |
| |
| /** |
| * Determine which paths are considered "high-level" paths, i.e., |
| * paths for which any changes have high potential to cause disruptive "churn". |
| * Whether a path is high-level sometimes depends on the locale or xml file in |
| * which it occurs. |
| * Some paths are high-level regardless of the locale in which they are located. |
| * Other paths are high-level for some locales but not others. For example, |
| * //ldml/localeDisplayNames/languages/language[@type="xx"] |
| * is high level in locale "xx", and maybe "en", but not for all locales. |
| */ |
| private static class HighLevelPaths { |
| /** |
| * A set of paths to be treated as "high-level". |
| * These are complete paths to be matched exactly. |
| * Other paths are recognized by special functions like isHighLevelTerritoryName. |
| * |
| * The ordering and comments are based on the design spec. |
| */ |
| final private static Set<String> highLevelPaths = new HashSet<>(Arrays.asList( |
| /* |
| * Core data |
| */ |
| "//ldml/characters/exemplarCharacters", |
| "//ldml/numbers/defaultNumberingSystem", |
| "//ldml/numbers/otherNumberingSystems/native", |
| /* |
| * Territory and Language names |
| * Country/Region names (English and Native names) -- see isHighLevelTerritoryName |
| * //ldml/localeDisplayName/territories/territory/... |
| * Language names (English and Native) -- see isHighLevelLangName |
| * //ldml/localeDisplayNames/languages/language/... |
| */ |
| /* |
| * Date |
| * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type, |
| * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication |
| * to filter out "fri", "zone", etc. |
| */ |
| "//ldml/dates/fields/field[@type=\"year\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"month\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"week\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"day\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"hour\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"era\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"minute\"]/displayName", |
| "//ldml/dates/fields/field[@type=\"second\"]/displayName", |
| /* |
| * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay |
| * First week of year: see isHighLevelWeekOfPreference |
| */ |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]", |
| /* |
| * Time |
| */ |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]", |
| "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]", |
| /* |
| * Currency (English and Native) -- see isHighLevelCurrencyName |
| * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName" |
| * |
| * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode |
| */ |
| /* |
| * Currency Formats |
| * a. Currency thousand separator |
| * b. Currency decimal separator |
| * c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol |
| * d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]" |
| * |
| * See isHighLevelCurrencySeparatorOrSymbol |
| */ |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| /* |
| * Number Symbols |
| */ |
| "//ldml/numbers/minimumGroupingDigits", |
| "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal", |
| "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group", |
| "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal", |
| "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group", |
| /* |
| * Number formats |
| */ |
| "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]", |
| "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", |
| "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]" |
| /* |
| * "Complementary Observations" |
| */ |
| /* |
| * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias |
| * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"] |
| */ |
| /* |
| * Changes in the containment graph -- see isHighLevelTerritoryContainment |
| * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control. |
| * Users use the containment graph in a variety of ways. |
| * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] |
| */ |
| /* |
| * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP |
| * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none. |
| * -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly |
| */ |
| /* |
| * TODO: per design doc, "Adding a timezone" |
| * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown |
| * currency symbol change '???' -> '¤'." |
| * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially |
| * newly adding or removing punctuation. Frequently irritates parsers." |
| */ |
| )); |
| |
| static Pattern currencyPattern = Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*"); |
| |
| /** |
| * Should the given path in the given locale be taken into account for generating "churn" reports? |
| * |
| * @param path the path of interest |
| * @param locale the locale in which the path was found, or null, or possibly |
| * the base file name without extension, like "xx" if the file name is "xx.xml", |
| * where "xx" may or may not be a locale; e.g., "supplementalData" |
| * @return true if it counts, else false to ignore |
| */ |
| private static boolean pathIsHighLevel(String path, String locale) { |
| if (path == null || locale == null) { |
| return false; |
| } |
| if (!localeIsHighLevel(locale)) { // for efficiency, this should be caught at a higher level |
| System.out.println("locale [" + locale + "] failed localeIsHighLevel in pathIsHighLevel; path = " + path); |
| return false; |
| } |
| if (pathIsReallyHighLevel(path, locale)) { |
| if (verboseHighLevelReporting) { |
| recordHighLevelMatch(path); |
| } |
| return true; |
| } |
| return false; |
| } |
| |
| private static boolean pathIsReallyHighLevel(String path, String locale) { |
| if (highLevelPaths.contains(path)) { |
| return true; |
| } else if (isHighLevelTerritoryName(path, locale)) { |
| return true; |
| } else if (isHighLevelLangName(path, locale)) { |
| return true; |
| } else if (isHighLevelCurrencyName(path, locale)) { |
| return true; |
| } else if (isHighLevelCurrencyCode(path, locale)) { |
| return true; |
| } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) { |
| return true; |
| } else if (isHighLevelLangAlias(path, locale)) { |
| return true; |
| } else if (isHighLevelTerritoryContainment(path, locale)) { |
| return true; |
| } else if (isHighLevelFirstDay(path, locale)) { |
| return true; |
| } else if (isHighLevelWeekOfPreference(path, locale)) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given locale, or base name, to be considered for "high level" churn report? |
| * |
| * @param locale the locale string, or base name like "supplementalData" as in "supplementalData.xml" |
| * @return true or false |
| */ |
| private static boolean localeIsHighLevel(String locale) { |
| return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale) |
| || "supplementalData".equals(locale); |
| } |
| |
| /** |
| * Changes to language aliases (supplemental metadata) |
| * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"] |
| * |
| * @param path |
| * @param locale must be "supplementalData" to match |
| * @return true or false |
| */ |
| private static boolean isHighLevelLangAlias(String path, String locale) { |
| if ("supplementalData".equals(locale)) { |
| if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Changes in the containment graph |
| * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control. |
| * Users use the containment graph in a variety of ways. |
| * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] |
| * |
| * @param path |
| * @param locale must be "supplementalData" to match |
| * @return true or false |
| */ |
| private static boolean isHighLevelTerritoryContainment(String path, String locale) { |
| if ("supplementalData".equals(locale)) { |
| if (path.startsWith("//supplementalData/territoryContainment")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level territory name path in the given locale? |
| * |
| * E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"] |
| * if type "NNN" CORRESPONDS TO the locale or the locale is "en" |
| * |
| * English names (en.xml): match all types |
| * Native: check each territory type NNN corresponding to the given locale |
| * |
| * Exclude "alt" |
| * |
| * @param path |
| * @param locale |
| * @return true or false |
| */ |
| private static boolean isHighLevelTerritoryName(String path, String locale) { |
| if (path.startsWith("//ldml/localeDisplayNames/territories/territory") |
| && !path.contains("[@alt=")) { |
| if ("en".equals(locale)) { |
| return true; |
| } |
| CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); |
| if (cvi != null) { |
| for (String type : cvi.targetTerritories) { |
| if (path.contains("[@type=\"" + type + "\"]")) { |
| return true; |
| } |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level language name path in the given locale? |
| * |
| * E.g., //ldml/localeDisplayNames/languages/language[@type="xx"] |
| * if type "xx" matches the locale or the locale is "en" |
| * |
| * Exclude "alt" |
| * |
| * @param path |
| * @param locale |
| * @return true or false |
| */ |
| private static boolean isHighLevelLangName(String path, String locale) { |
| if (path.startsWith("//ldml/localeDisplayNames/languages/language") |
| && !path.contains("[@alt=")) { |
| if ("en".equals(locale)) { |
| /* |
| * English names (en.xml): match all types |
| */ |
| return true; |
| } else if (path.contains("[@type=\"" + locale + "\"]")) { |
| /* |
| * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml |
| */ |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level currency name path in the given locale? |
| * |
| * E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName |
| * if type "AAA" CORRESPONDS TO the locale or the locale is "en" |
| * |
| * English names (en.xml): match all types |
| * Native: check each currency type AAA corresponding to the given locale |
| * |
| * Do NOT exclude "alt"; e.g., |
| * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"] |
| * |
| * @param path |
| * @param locale |
| * @return true or false |
| */ |
| private static boolean isHighLevelCurrencyName(String path, String locale) { |
| if (currencyPattern.matcher(path).matches()) { |
| if ("en".equals(locale)) { |
| return true; |
| } |
| CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); |
| if (cvi != null) { |
| for (String type : cvi.targetCurrencies) { |
| if (path.contains("[@type=\"" + type + "\"]")) { |
| return true; |
| } |
| } |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level currency code path in the given locale? |
| * |
| * E.g., //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"] |
| * |
| * @param path |
| * @param locale must be "supplementalData" to match |
| * @return true or false |
| */ |
| private static boolean isHighLevelCurrencyCode(String path, String locale) { |
| if ("supplementalData".equals(locale)) { |
| if (path.contains("iso3166")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level currency thousands-separator or decimal-separator path in the given locale? |
| * |
| * E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group |
| * //ldml/numbers/currencies/currency[@type="ESP"]/decimal |
| * //ldml/numbers/currencies/currency[@type="CNY"]/symbol |
| * //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]" |
| * |
| * @param path |
| * @param locale |
| * @return true or false |
| */ |
| private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) { |
| if (path.startsWith("//ldml/numbers/currencies/currency") |
| && (path.contains("group") || path.contains("decimal") || path.contains("symbol"))) { |
| return true; |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level weekData/firstDay in the given locale? |
| * |
| * E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"] |
| * |
| * @param path |
| * @param locale must be "supplementalData" to match |
| * @return true or false |
| */ |
| private static boolean isHighLevelFirstDay(String path, String locale) { |
| if ("supplementalData".equals(locale)) { |
| if (path.startsWith("//supplementalData/weekData/firstDay")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Is the given path a high-level weekOfPreference in the given locale? |
| * |
| * E.g., //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"] |
| * |
| * @param path |
| * @param locale must be "supplementalData" to match |
| * @return true or false |
| */ |
| private static boolean isHighLevelWeekOfPreference(String path, String locale) { |
| if ("supplementalData".equals(locale)) { |
| if (path.startsWith("//supplementalData/weekData/weekOfPreference")) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * For debugging, testing |
| */ |
| private static Set<String> highLevelPathMatched = null; |
| private static boolean verboseHighLevelReporting = false; |
| |
| private static void recordHighLevelMatch(String path) { |
| if (highLevelPathMatched == null) { |
| highLevelPathMatched = new HashSet<>(); |
| } |
| highLevelPathMatched.add(path); |
| } |
| |
| /** |
| * For debugging, report on any paths in highLevelPaths that never matched |
| */ |
| private static void reportHighLevelPathUsage() { |
| if (!verboseHighLevelReporting) { |
| return; |
| } |
| if (highLevelPathMatched == null) { |
| System.out.println("Zero high-level paths were matched!"); |
| return; |
| } |
| for (String path : highLevelPaths) { |
| if (!highLevelPathMatched.contains(path)) { |
| System.out.println("Unmatched high-level path: " + path); |
| } |
| } |
| for (String path : highLevelPathMatched) { |
| if (!highLevelPaths.contains(path)) { |
| System.out.println("Special matched high-level path: " + path); |
| } |
| } |
| } |
| } |
| } |