tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java - platform/external/cldr - Git at Google

 package org.unicode.cldr.tool;

 import java.io.File;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Objects;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.unicode.cldr.draft.FileUtilities;
 import org.unicode.cldr.test.DisplayAndInputProcessor;
 import org.unicode.cldr.test.SubmissionLocales;
 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
 import org.unicode.cldr.tool.Option.Options;
 import org.unicode.cldr.tool.Option.Params;
 import org.unicode.cldr.util.CLDRConfig;
 import org.unicode.cldr.util.CLDRFile;
 import org.unicode.cldr.util.CLDRFile.Status;
 import org.unicode.cldr.util.CLDRPaths;
 import org.unicode.cldr.util.CldrUtility;
 import org.unicode.cldr.util.Counter;
 import org.unicode.cldr.util.DtdData;
 import org.unicode.cldr.util.DtdType;
 import org.unicode.cldr.util.Factory;
 import org.unicode.cldr.util.LanguageTagParser;
 import org.unicode.cldr.util.Level;
 import org.unicode.cldr.util.LocaleIDParser;
 import org.unicode.cldr.util.Organization;
 import org.unicode.cldr.util.Pair;
 import org.unicode.cldr.util.PathHeader;
 import org.unicode.cldr.util.PathHeader.PageId;
 import org.unicode.cldr.util.PathStarrer;
 import org.unicode.cldr.util.PatternCache;
 import org.unicode.cldr.util.SimpleXMLSource;
 import org.unicode.cldr.util.StandardCodes;
 import org.unicode.cldr.util.SupplementalDataInfo;
 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
 import org.unicode.cldr.util.TransliteratorUtilities;
 import org.unicode.cldr.util.XMLFileReader;
 import org.unicode.cldr.util.XPathParts;

 import com.google.common.base.Joiner;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.TreeMultimap;
 import com.ibm.icu.impl.Relation;
 import com.ibm.icu.impl.Row.R2;
 import com.ibm.icu.impl.Row.R3;
 import com.ibm.icu.impl.Row.R4;
 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.UnicodeSet;
 import com.ibm.icu.util.ICUUncheckedIOException;
 import com.ibm.icu.util.Output;

 public class ChartDelta extends Chart {
     private static final boolean verbose_skipping = false;

     private static final String DEFAULT_DELTA_DIR_NAME = "delta";
     private static final String DEFAULT_CHURN_DIR_NAME = "churn";

     private static final boolean SKIP_REFORMAT_ANNOTATIONS = ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0;

     private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod;

     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = CLDRConfig.getInstance().getSupplementalDataInfo();

     private enum MyOptions {
         fileFilter(new Params().setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en").setMatch(".*")),
         orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")),
         Vxml(new Params().setHelp("use cldr-aux for the base directory")),
         coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")),
         directory(new Params().setHelp("Set the output directory name").setDefault(DEFAULT_DELTA_DIR_NAME).setMatch(".*")),
         verbose(new Params().setHelp("verbose debugging messages")),
         highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')),
         ;

         // BOILERPLATE TO COPY
         final Option option;

         private MyOptions(Params params) {
             option = new Option(this, params);
         }

         private static Options myOptions = new Options();
         static {
             for (MyOptions option : MyOptions.values()) {
                 myOptions.add(option, option.option);
             }
         }

         private static Set<String> parse(String[] args) {
             return myOptions.parse(MyOptions.values()[0], args, true);
         }
     }

     private final Matcher fileFilter;
     private final String dirName; // "delta" or "churn" or set as option
     private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn"
     private final String DIR; // full path of output folder
     private final Level minimumPathCoverage;
     private final boolean verbose;

     /**
      * If true, check only high-level paths, i.e., paths for which any changes
      * have high potential to cause disruptive "churn"
      */
     private final boolean highLevelOnly;

     public static void main(String[] args) {
         main(args, false);
     }

     public static void main(String[] args, boolean highLevelOnly) {
         System.out.println("use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34.");
         MyOptions.parse(args);
         Matcher fileFilter = !MyOptions.fileFilter.option.doesOccur() ? null : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher("");
         if (MyOptions.orgFilter.option.doesOccur()) {
             if (MyOptions.fileFilter.option.doesOccur()) {
                 throw new IllegalArgumentException("Can't have both fileFilter and orgFilter");
             }
             String rawOrg = MyOptions.orgFilter.option.getValue();
             Organization org = Organization.fromString(rawOrg);
             Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org);
             fileFilter = PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$").matcher("");
         }
         Level coverage = !MyOptions.coverageFilter.option.doesOccur() ? null : Level.fromString(MyOptions.coverageFilter.option.getValue());
         boolean verbose = MyOptions.verbose.option.doesOccur();
         if (MyOptions.highLevelOnly.option.doesOccur()) {
             highLevelOnly = true;
         }
         String dirName = MyOptions.directory.option.getValue();
         if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) {
             System.out.println("For highLevelOnly, changing directory from " + DEFAULT_DELTA_DIR_NAME
                     + " to " + DEFAULT_CHURN_DIR_NAME);
             dirName = DEFAULT_CHURN_DIR_NAME;
         }
         ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly);
         temp.writeChart(null);
         temp.showTotals();
         if (highLevelOnly) {
             HighLevelPaths.reportHighLevelPathUsage();
         }
         System.out.println("Finished. Files may have been created in these directories:");
         System.out.println(temp.DIR);
         System.out.println(getTsvDir(temp.DIR, temp.dirName));
     }

     private ChartDelta(Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly) {
         this.fileFilter = fileFilter;
         this.verbose = verbose;
         this.highLevelOnly = highLevelOnly;
         this.dirName = dirName;
         this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1);
         this.DIR = CLDRPaths.CHART_DIRECTORY + dirName;
         this.minimumPathCoverage = coverage;
     }

     private static final String SEP = "\u0001";
     private static final boolean DEBUG = false;
     private static final String DEBUG_FILE = null; // "windowsZones.xml";
     static Pattern fileMatcher = PatternCache.get(".*");

     static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH);
     static final Set<String> DONT_CARE = new HashSet<>(Arrays.asList("draft", "standard", "reference"));

     @Override
     public String getDirectory() {
         return DIR;
     }

     @Override
     public String getTitle() {
         return chartNameCap + " Charts";
     }

     @Override
     public String getFileName() {
         return "index";
     }

     @Override
     public String getExplanation() {
         return "<p>Charts showing the differences from the last version. "
             + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. "
             + "Not all changed data is charted yet. For details see each chart.</p>";
     }

     @Override
     public void writeContents(FormattedFileWriter pw) throws IOException {
         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
         FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory());
         FormattedFileWriter.copyIncludeHtmls(getDirectory(), true);
         counter.clear();
         fileCounters.clear();
         writeNonLdmlPlain(anchors);
         writeLdml(anchors);
         pw.setIndex("Main Chart Index", "../index.html");
         pw.write(anchors.toString());
     }

     private static class PathHeaderSegment extends R3<PathHeader, Integer, String> {
         public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) {
             super(b, elementIndex, attribute);
         }
     }

     private static class PathDiff extends R4<PathHeaderSegment, String, String, String> {
         public PathDiff(String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue) {
             super(pathHeaderSegment, locale, oldValue, newValue);
         }
     }

     private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze());

     private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory();

     private enum ChangeType {
         added, deleted, changed, same;
         public static ChangeType get(String oldValue, String currentValue) {
             return oldValue == null ? added
                 : currentValue == null ? deleted
                     : oldValue.equals(currentValue) ? same
                         : changed;
         }
     }

     private Counter<ChangeType> counter = new Counter<>();
     private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>();
     private Set<String> badHeaders = new TreeSet<>();

     /**
      * Add the count of changed items
      */
     private void addChange(String file, ChangeType changeType, int count) {
         counter.add(changeType, count); // unified add
         Counter<ChangeType> fileCounter = fileCounters.get(file);
         if (fileCounter == null) {
             fileCounters.put(file, fileCounter = new Counter<>());
         }
         fileCounter.add(changeType, count);
     }

     private void showTotals() {
         try (PrintWriter pw = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) {
             // pw.println("# percentages are of *new* total");
             pw.print("# dir\tfile");
             for (ChangeType item : ChangeType.values()) {
                 pw.print("\t" + (item == ChangeType.same ? "total" : item.toString()));
             }
             pw.println();
             showTotal(pw, "TOTAL/", counter);

             for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) {
                 showTotal(pw, entry.getKey(), entry.getValue());
             }
             for (String s : badHeaders) {
                 pw.println(s);
             }
             // pw.println("# EOF");
         } catch (IOException e) {
             throw new ICUUncheckedIOException(e);
         }
     }

     private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) {
         long total = counter2.getTotal();
         NumberFormat pf = NumberFormat.getPercentInstance();
         pf.setMinimumFractionDigits(2);
         NumberFormat nf = NumberFormat.getIntegerInstance();
         pw.print(title2.replace("/", "\t"));
         for (ChangeType item : ChangeType.values()) {
             if (item == ChangeType.same) {
                 pw.print("\t" + nf.format(total));
             } else {
                 final long current = counter2.getCount(item);
                 pw.print("\t" + nf.format(current));
             }
         }
         pw.println();
     }

     /**
      *
      * @param anchors
      * @throws IOException
      *
      * TODO: shorten the function using subroutines
      */
     private void writeLdml(Anchors anchors)  throws IOException {
         try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv");
             PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_count.tsv");
             ) {
             tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel");

             // set up factories
             List<Factory> factories = new ArrayList<>();
             List<Factory> oldFactories = new ArrayList<>();

             Counter<PathHeader> counts = new Counter<>();

             String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION);
             String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION);

             for (String dir : DtdType.ldml.directories) {
                 if (dir.equals("annotationsDerived") || dir.equals("casing")) {
                     continue;
                 }
                 String current = dirBase + "common/" + dir;
                 String past = prevDirBase + "common/" + dir;
                 try {
                     factories.add(Factory.make(current, ".*"));
                 } catch (Exception e1) {
                     System.out.println("Skipping: " + dir + "\t" + e1.getMessage());
                     continue; // skip where the directories don't exist in old versions
                 }
                 try {
                     oldFactories.add(Factory.make(past, ".*"));
                 } catch (Exception e) {
                     System.out.println("Couldn't open factory: " + past);
                     past = null;
                     oldFactories.add(null);
                 }
                 System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past);
             }
             if (factories.isEmpty()) {
                 throw new IllegalArgumentException("No factories found for "
                     + dirBase + ": " + DtdType.ldml.directories);
             }
             // get a list of all the locales to cycle over

             Relation<String, String> baseToLocales = Relation.of(new TreeMap<String, Set<String>>(), HashSet.class);
             Matcher m = fileMatcher.matcher("");
             Set<String> defaultContents = SDI.getDefaultContentLocales();
             LanguageTagParser ltp = new LanguageTagParser();
             LikelySubtags ls = new LikelySubtags();
             for (String file : factories.get(0).getAvailable()) {
                 if (defaultContents.contains(file)) {
                     continue;
                 }
                 if (!m.reset(file).matches()) {
                     continue;
                 }
                 String base = file.equals("root") ? "root" : ltp.set(ls.minimize(file)).getLanguageScript();
                 baseToLocales.put(base, file);
             }

             // do keyboards later

             Status currentStatus = new Status();
             Status oldStatus = new Status();
             Set<PathDiff> diff = new TreeSet<>();
             Set<String> paths = new HashSet<>();

             Relation<PathHeader, String> diffAll = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);
             for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) {
                 String base = baseNLocale.getKey();
                 for (int i = 0; i < factories.size(); ++i) {
                     Factory factory = factories.get(i);
                     Factory oldFactory = oldFactories.get(i);
                     List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories());
                     if (sourceDirs.size() != 1) {
                         throw new IllegalArgumentException("Internal error: expect single source dir");
                     }
                     File sourceDir = sourceDirs.get(0);
                     String sourceDirLeaf = sourceDir.getName();
                     boolean resolving = !sourceDirLeaf.contains("subdivisions")
                         && !sourceDirLeaf.contains("transforms");

                     for (String locale : baseNLocale.getValue()) {
                         String nameAndLocale = sourceDirLeaf + "/" + locale;
                         if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) {
                             if (verbose && verbose_skipping) {
                                 System.out.println("SKIPPING: " + nameAndLocale);
                             }
                             continue;
                         }
                         if (verbose) {
                             System.out.println(nameAndLocale);
                         }
                         CLDRFile current = makeWithFallback(factory, locale, resolving);
                         CLDRFile old = makeWithFallback(oldFactory, locale, resolving);
                         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old);

                         if (!locale.equals("root") && current.getLocaleID().equals("root") && old.getLocaleID().equals("root")) {
                             continue;
                         }
                         if (old == EMPTY_CLDR && current == EMPTY_CLDR) {
                             continue;
                         }
                         if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) {
                             continue;
                         }
                         paths.clear();
                         for (String path : current.fullIterable()) {
                             if (allowPath(locale, path)) {
                                 paths.add(path);
                             }
                         }
                         for (String path : old.fullIterable()) {
                             if (!paths.contains(path) && allowPath(locale, path)) {
                                 paths.add(path);
                             }
                         }

                         Output<String> reformattedValue = new Output<>();
                         Output<Boolean> hasReformattedValue = new Output<>();

                         for (String path : paths) {
                             if (path.startsWith("//ldml/identity")
                                 || path.endsWith("/alias")
                                 || path.startsWith("//ldml/segmentations") // do later
                                 || path.startsWith("//ldml/rbnf") // do later
                                 ) {
                                 continue;
                             }
                             PathHeader ph = getPathHeader(path);
                             if (ph == null) {
                                 continue;
                             }

                             String oldValue;
                             String currentValue;

                             {
                                 String sourceLocaleCurrent = current.getSourceLocaleID(path, currentStatus);
                                 String sourceLocaleOld = getReformattedPath(oldStatus, old, path, reformattedValue, hasReformattedValue);

                                 // filter out stuff that differs at a higher level
                                 if (!sourceLocaleCurrent.equals(locale)
                                     && !sourceLocaleOld.equals(locale)) {
                                     continue;
                                 }
                                 if (!path.equals(currentStatus.pathWhereFound)
                                     && !path.equals(oldStatus.pathWhereFound)) {
                                     continue;
                                 }
                                 // fix some incorrect cases?

                                 currentValue = current.getStringValue(path);
                                 if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) {
                                     currentValue = current.getBaileyValue(path, null, null);
                                 }

                                 String oldRawValue = hasReformattedValue.value ? reformattedValue.value : old.getStringValue(path);
                                 if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) {
                                     oldRawValue = old.getBaileyValue(path, null, null);
                                 }
                                 // ignore differences due to old DAIP
                                 oldValue = dontDaipValue(oldRawValue, path) ? oldRawValue : daip.processInput(path, oldRawValue, null);
                             }
                             if (highLevelOnly && new SuspiciousChange(oldValue, currentValue, path, locale).isDisruptive() == false) {
                                 continue;
                             }
                             // handle non-distinguishing attributes
                             addPathDiff(sourceDir, old, current, locale, ph, diff);

                             addValueDiff(sourceDir, oldValue, currentValue, locale, ph, diff, diffAll);
                         }
                     }
                 }
                 writeDiffs(anchors, base, diff, tsvFile, counts);
                 diff.clear();
             }
             writeDiffs(diffAll);

             writeCounter(tsvCountFile, "Count", counts);
         }
     }

     public boolean dontDaipValue(String oldRawValue, String path) {
         return oldRawValue == null || path.startsWith("//ldml/collations");
     }

     private boolean allowPath(String locale, String path) {
         if (minimumPathCoverage != null) {
             Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale);
             if (minimumPathCoverage.compareTo(pathLevel) < 0) {
                 return false;
             }
         }
         return true;
     }

     private String getReformattedPath(Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue) {
         if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) {
             hasReformattedValue.value = Boolean.FALSE;
             return old.getSourceLocaleID(path, oldStatus);
         }
         // OLD:     <annotation cp='[😀]' tts='grinning face'>face; grin</annotation>
         // NEW:     <annotation cp="😀">face | grin</annotation>
         //          <annotation cp="😀" type="tts">grinning face</annotation>
         // from the NEW paths, get the OLD values
         XPathParts parts = XPathParts.getFrozenInstance(path).cloneAsThawed(); // not frozen, for removeAttribute
         boolean isTts = parts.getAttributeValue(-1, "type") != null;
         if (isTts) {
             parts.removeAttribute(-1, "type");
         }
         String cp = parts.getAttributeValue(-1, "cp");
         parts.setAttribute(-1, "cp", "[" + cp + "]");

         String oldStylePath = parts.toString();
         String temp = old.getStringValue(oldStylePath);
         if (temp == null) {
             hasReformattedValue.value = Boolean.FALSE;
         } else if (isTts) {
             String temp2 = old.getFullXPath(oldStylePath);
             value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts");
             hasReformattedValue.value = Boolean.TRUE;
         } else {
             value.value = temp.replaceAll("\\s*;\\s*", " | ");
             hasReformattedValue.value = Boolean.TRUE;
         }
         return old.getSourceLocaleID(oldStylePath, oldStatus);
     }

     PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");

     private PathHeader getPathHeader(String path) {
         try {
             PathHeader ph = phf.fromPath(path);
             if (ph.getPageId() == PageId.Unknown) {
                 String star = starrer.set(path);
                 badHeaders.add(star);
                 return null;
             }
             return ph;
         } catch (Exception e) {
             String star = starrer.set(path);
             badHeaders.add(star);
             // System.err.println("Skipping path with bad PathHeader: " + path);
             return null;
         }
     }

     private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) {
         if (oldFactory == null) {
             return EMPTY_CLDR;
         }
         CLDRFile old;
         String oldLocale = locale;
         while (true) { // fall back for old, maybe to root
             try {
                 old = oldFactory.make(oldLocale, resolving);
                 break;
             } catch (Exception e) {
                 oldLocale = LocaleIDParser.getParent(oldLocale);
                 if (oldLocale == null) {
                     return EMPTY_CLDR;
                 }
             }
         }
         return old;
     }

     private void addPathDiff(File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2) {
         String path = ph.getOriginalPath();
         String fullPathCurrent = current.getFullXPath(path);
         String fullPathOld = old.getFullXPath(path);
         if (Objects.equals(fullPathCurrent, fullPathOld)) {
             return;
         }
         XPathParts pathPlain = XPathParts.getFrozenInstance(path);
         XPathParts pathCurrent = fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent);
         XPathParts pathOld = fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld);
         TreeSet<String> fullAttributes = null;
         int size = pathCurrent.size();
         String parentAndName = parentAndName(sourceDir, locale);
         for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size
             Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex);
             Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex);
             Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex);
             if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) {
                 continue;
             }
             if (fullAttributes == null) {
                 fullAttributes = new TreeSet<>();
             } else {
                 fullAttributes.clear();
             }
             fullAttributes.addAll(attributesCurrent);
             fullAttributes.addAll(attributesOld);
             fullAttributes.removeAll(distinguishing);
             fullAttributes.removeAll(DONT_CARE);

             // at this point we only have non-distinguishing
             for (String attribute : fullAttributes) {
                 String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute);
                 String attributeValueCurrent = pathCurrent.getAttributeValue(elementIndex, attribute);
                 if (Objects.equals(attributeValueOld, attributeValueCurrent)) {
                     addChange(parentAndName, ChangeType.same, 1);
                     continue;
                 }
                 addChange(parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1);

                 PathDiff row = new PathDiff(
                     locale,
                     new PathHeaderSegment(ph, size - elementIndex - 1, attribute),
                     attributeValueOld,
                     attributeValueCurrent);
                 if (DEBUG) {
                     System.out.println(row);
                 }
                 diff2.add(row);
             }
         }
     }

     private String parentAndName(File sourceDir, String locale) {
         return sourceDir.getName() + "/" + locale + ".xml";
     }

     private void addValueDiff(File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff,
         Relation<PathHeader, String> diffAll) {
         // handle stuff that can be split specially
         Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent);
         int count = 1;
         String parentAndName = parentAndName(sourceDir, locale);
         if (Objects.equals(valueCurrent, valueOld)) {
             if (splitter != null && valueCurrent != null) {
                 count = splitHandlingNull(splitter, valueCurrent).size();
             }
             addChange(parentAndName, ChangeType.same, count);
         } else {
             if (splitter != null) {
                 List<String> setOld = splitHandlingNull(splitter, valueOld);
                 List<String> setNew = splitHandlingNull(splitter, valueCurrent);
                 int[] sameAndNotInSecond = new int[2];
                 valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond);
                 addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]);
                 addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]);
                 sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0;
                 valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond);
                 addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]);
             } else if (hasUnicodeSetValue(ph.getOriginalPath())) {
                 UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld);
                 UnicodeSet usCurrent = valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent);
                 UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent);
                 UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld);
                 addChange(parentAndName, ChangeType.same, usOld.size()-oldOnly.size());
                 addChange(parentAndName, ChangeType.deleted, oldOnly.size());
                 addChange(parentAndName, ChangeType.added, currentOnly.size());
                 valueOld = usOld.size()==oldOnly.size() ? oldOnly.toPattern(false) : "…" + oldOnly + "…";
                 valueCurrent = usCurrent.size()==currentOnly.size() ? currentOnly.toPattern(false) : "…" + currentOnly + "…";
             } else {
                 addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count);
             }
             PathDiff row = new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent);
             diff.add(row);
             diffAll.put(ph, locale);
         }
     }

     private boolean hasUnicodeSetValue(String xpath) {
         return xpath.startsWith("//ldml/characters/exemplar");
     }

     private List<String> splitHandlingNull(Splitter splitter, String value) {
         return value == null ? null : splitter.splitToList(value);
     }

     private Splitter getSplitter(String path, String valueOld, String valueCurrent) {
         if (path.contains("/annotation") && !path.contains("tts")) {
             return DtdData.BAR_SPLITTER;
         } else if (valueOld != null && valueOld.contains("\n") || valueCurrent != null && valueCurrent.contains("\n")) {
             return DtdData.CR_SPLITTER;
         } else {
             return null;
         }
     }

     /**
      * Return string with all lines from linesToRemove removed
      * @param toGetStringFor
      * @param linesToRemove
      * @return
      */
     private String getFilteredValue(Collection<String> toGetStringFor, Collection<String> linesToRemove,
         int[] sameAndDiff) {
         if (toGetStringFor == null) {
             return null;
         }
         StringBuilder buf = new StringBuilder();
         Set<String> toRemove = linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove);
         boolean removed = false;
         for (String old : toGetStringFor) {
             if (toRemove.contains(old)) {
                 removed = true;
                 sameAndDiff[0]++;
             } else {
                 sameAndDiff[1]++;
                 if (removed) {
                     buf.append("…\n");
                     removed = false;
                 }
                 buf.append(old).append('\n');
             }
         }
         if (removed) {
             buf.append("…");
         } else if (buf.length() > 0) {
             buf.setLength(buf.length() - 1); // remove final \n
         }
         return buf.toString();
     }

     private void writeDiffs(Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile) {
         if (bcp.isEmpty()) {
             System.out.println("\tDeleting: " + DIR + "/" + file);
             new File(DIR + file).delete();
             return;
         }
         TablePrinter tablePrinter = new TablePrinter()
             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)//.setRepeatDivider(true)
             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Code", "class='source'", null, "class='source'", false)
             .addColumn("Old", "class='target'", null, "class='target'", false) //  width='20%'
             .addColumn("New", "class='target'", null, "class='target'", false); //  width='20%'
         PathHeader ph1 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason");
         PathHeader ph2 = phf.fromPath("//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement");
         ph1.compareTo(ph2);
         for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) {
             PathHeader ph = entry.getKey();
             if (ph.getPageId() == DEBUG_PAGE_ID) {
                 System.out.println(ph + "\t" + ph.getOriginalPath());
             }
             for (String value : entry.getValue()) {
                 String[] oldNew = value.split(SEP);
                 tablePrinter.addRow()
                 .addCell(ph.getSectionId())
                 .addCell(ph.getPageId())
                 .addCell(ph.getHeader())
                 .addCell(ph.getCode())
                 .addCell(oldNew[0])
                 .addCell(oldNew[1])
                 .finishRow();
             }
         }
         writeTable(anchors, file, tablePrinter, title, tsvFile);
     }

     private void writeDiffs(Relation<PathHeader, String> diffAll) {
         TablePrinter tablePrinter = new TablePrinter()
             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Code", "class='source'", null, "class='source'", true)
             .addColumn("Locales where different", "class='target'", null, "class='target'", true);
         for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) {
             PathHeader ph = row.getKey();
             Set<String> locales = row.getValue();
             tablePrinter.addRow()
             .addCell(ph.getSectionId())
             .addCell(ph.getPageId())
             .addCell(ph.getHeader())
             .addCell(ph.getCode())
             .addCell(Joiner.on(" ").join(locales))
             .finishRow();
         }
     }

     private void writeDiffs(Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts) {
         if (diff.isEmpty()) {
             return;
         }
         TablePrinter tablePrinter = new TablePrinter()
             .addColumn("Section", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Page", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Header", "class='source'", CldrUtility.getDoubleLinkMsg(), "class='source'", true)
             .addColumn("Code", "class='source'", null, "class='source'", true)
             .addColumn("Locale", "class='source'", null, "class='source'", true)
             .addColumn("Old", "class='target'", null, "class='target'", true) //  width='20%'
             .addColumn("New", "class='target'", null, "class='target'", true) //  width='20%'
             .addColumn("Level", "class='target'", null, "class='target'", true);

         for (PathDiff row : diff) {
             PathHeaderSegment phs = row.get0();
             counts.add(phs.get0(), 1);
             String locale = row.get1();
             String oldValue = row.get2();
             String currentValue = row.get3();

             PathHeader ph = phs.get0();
             Integer pathIndex = phs.get1();
             String attribute = phs.get2();
             String specialCode = ph.getCode();

             if (!attribute.isEmpty()) {
                 specialCode += "_" + attribute;
                 if (pathIndex != 0) {
                     specialCode += "|" + pathIndex;
                 }
             }
             Level coverageLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale);
             String fixedOldValue = oldValue == null ? "▷missing◁" : TransliteratorUtilities.toHTML.transform(oldValue);
             String fixedNewValue = currentValue == null ? "▷removed◁" : TransliteratorUtilities.toHTML.transform(currentValue);

             tablePrinter.addRow()
             .addCell(ph.getSectionId())
             .addCell(ph.getPageId())
             .addCell(ph.getHeader())
             .addCell(specialCode)
             .addCell(locale)
             .addCell(fixedOldValue)
             .addCell(fixedNewValue)
             .addCell(coverageLevel)
             .finishRow();

         }
         String title = ENGLISH.getName(file) + " " + chartNameCap;
         writeTable(anchors, file, tablePrinter, title, tsvFile);

         diff.clear();
     }

     private class ChartDeltaSub extends Chart {
         private String title;
         private String file;
         private TablePrinter tablePrinter;
         private PrintWriter tsvFile;

         private ChartDeltaSub(String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) {
             super();
             this.title = title;
             this.file = file;
             this.tablePrinter = tablePrinter;
             this.tsvFile = tsvFile;
         }

         @Override
         public String getDirectory() {
             return DIR;
         }

         @Override
         public boolean getShowDate() {
             return false;
         }

         @Override
         public String getTitle() {
             return title;
         }

         @Override
         public String getFileName() {
             return file;
         }

         @Override
         public String getExplanation() {
             return "<p>Lists data fields that differ from the last major version (see versions above)."
                 + " Inherited differences in locales are suppressed, except where the source locales are different. "
                 + "<p>";
         }

         @Override
         public void writeContents(FormattedFileWriter pw) throws IOException {
             pw.write(tablePrinter.toTable());
             tablePrinter.toTsv(tsvFile);
         }
     }

     private void writeTable(Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile) {
         ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile);
         chartDeltaSub.writeChart(anchors);
     }

     private void writeNonLdmlPlain(Anchors anchors) throws IOException {
         try (PrintWriter tsvFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp.tsv");
             PrintWriter tsvCountFile = FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_supp_count.tsv");
             ) {
             tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew");

             Multimap<PathHeader, String> bcp = TreeMultimap.create();
             Multimap<PathHeader, String> supplemental = TreeMultimap.create();
             Multimap<PathHeader, String> transforms = TreeMultimap.create();

             Counter<PathHeader> countSame = new Counter<>();
             Counter<PathHeader> countAdded = new Counter<>();
             Counter<PathHeader> countDeleted = new Counter<>();

             for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) {
                 if (DtdType.ldml.directories.contains(dir)
                     || dir.equals(".DS_Store")
                     || dir.equals("dtd") // TODO as flat files
                     || dir.equals("properties") // TODO as flat files
                     || dir.equals("uca") // TODO as flat files
                     ) {
                     continue;
                 }
                 File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir);
                 System.out.println("\tLast dir: " + dirOld);
                 File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir);
                 System.out.println("\tCurr dir: " + dir2);

                 for (String file : dir2.list()) {
                     if (!file.endsWith(".xml")) {
                         continue;
                     }
                     String parentAndFile = dir + "/" + file;
                     String base = file.substring(0, file.length() - 4);
                     if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) {
                         if (verbose) { //  && verbose_skipping
                             System.out.println("SKIPPING: " + dir + "/" + base);
                         }
                         continue;
                     }
                     if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) {
                         continue;
                     }
                     if (verbose) {
                         System.out.println(file);
                     }
                     Relation<PathHeader, String> contentsOld = fillData(dirOld.toString() + "/", file, base);
                     Relation<PathHeader, String> contents2 = fillData(dir2.toString() + "/", file, base);

                     Set<PathHeader> keys = new TreeSet<>(CldrUtility.ifNull(contentsOld.keySet(), Collections.<PathHeader> emptySet()));
                     keys.addAll(CldrUtility.ifNull(contents2.keySet(), Collections.<PathHeader> emptySet()));
                     DtdType dtdType = null;
                     for (PathHeader key : keys) {
                         String originalPath = key.getOriginalPath();
                         if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) {
                             continue;
                         }
                         boolean isTransform = originalPath.contains("/tRule");
                         if (dtdType == null) {
                             dtdType = DtdType.fromPath(originalPath);
                         }
                         Multimap<PathHeader, String> target = dtdType == DtdType.ldmlBCP47 ? bcp
                             : isTransform ? transforms
                                 : supplemental;
                         Set<String> setOld = contentsOld.get(key);
                         Set<String> set2 = contents2.get(key);

                         if (Objects.equals(setOld, set2)) {
                             if (file.equals(DEBUG_FILE)) { // for debugging
                                 System.out.println("**Same: " + key + "\t" + setOld);
                             }
                             addChange(parentAndFile, ChangeType.same, setOld.size());
                             countSame.add(key, 1);
                             continue;
                         }
                         if (setOld == null) {
                             addChange(parentAndFile, ChangeType.added, set2.size());
                             for (String s : set2) {
                                 addRow(target, key, "▷missing◁", s);
                                 countAdded.add(key, 1);
                             }
                         } else if (set2 == null) {
                             addChange(parentAndFile, ChangeType.deleted, setOld.size());
                             for (String s : setOld) {
                                 addRow(target, key, s, "▷removed◁");
                                 countDeleted.add(key, 1);
                             }
                         } else {
                             Set<String> s1MOld = setOld;
                             Set<String> s2M1 = set2;
                             if (s1MOld.isEmpty()) {
                                 addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1));
                                 addChange(parentAndFile, ChangeType.added, s2M1.size());
                                 countAdded.add(key, 1);
                             } else if (s2M1.isEmpty()) {
                                 addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁");
                                 addChange(parentAndFile, ChangeType.deleted, s1MOld.size());
                                 countDeleted.add(key, 1);
                             } else {
                                 String valueOld;
                                 String valueCurrent;

                                 int[] sameAndNotInSecond = new int[2];
                                 valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond);
                                 addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]);
                                 countSame.add(key, 1);
                                 addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]);
                                 sameAndNotInSecond[1] = 0;
                                 countDeleted.add(key, 1);
                                 valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond);
                                 addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]);
                                 addRow(target, key, valueOld, valueCurrent);
                                 countAdded.add(key, 1);
                             }
                         }
                     }
                 }
             }
             writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile);
             writeDiffs(anchors, "supplemental-data", "¤¤Supplemental " + chartNameCap, supplemental, tsvFile);
             writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile);

             writeCounter(tsvCountFile, "CountSame", countSame);
             tsvCountFile.println();
             writeCounter(tsvCountFile, "CountAdded", countAdded);
             tsvCountFile.println();
             writeCounter(tsvCountFile, "CountDeleted", countDeleted);

             //tsvFile.println("# EOF");
             //tsvCountFile.println("# EOF");
         }
     }

     private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) {
         tsvFile.append("# "
             + title
             + "\tSection\tPage\tSubhead\tCode\n\n");
         for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) {
             tsvFile.println(entry.get0() + "\t" + entry.get1());
         }
     }

     private void addRow(Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) {
         if (oldItem.isEmpty() || newItem.isEmpty()) {
             throw new IllegalArgumentException();
         }
         target.put(key, oldItem + SEP + newItem);
     }

     /**
      * Fill in the chart data for the specified file
      *
      * @param directory
      * @param file like "xx.xml" where "xx" may be a locale name
      * @param fileBase like "xx", same as file without ".xml"
      * @return the Relation
      */
     private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) {
         Relation<PathHeader, String> results = Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);

         List<Pair<String, String>> contents1;
         try {
             contents1 = XMLFileReader.loadPathValues(directory + file, new ArrayList<Pair<String, String>>(), true);
         } catch (Exception e) {
             /*
              * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0
              */
             return results;
         }
         DtdType dtdType = null;
         DtdData dtdData = null;
         Multimap<String, String> extras = TreeMultimap.create();

         for (Pair<String, String> s : contents1) {
             String path = s.getFirst();
             if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) {
                 continue;
             }
             String value = s.getSecond();
             if (dtdType == null) {
                 /*
                  * Note: although dtdType and dtdData depend on path, they are the same for all paths
                  * in the same file, so they only need to be set the first time through this loop.
                  *
                  * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support
                  * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug.
                  */
                 dtdType = DtdType.fromPath(path);
                 dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR);
             }
             XPathParts pathPlain = XPathParts.getFrozenInstance(path);
             try {
                 if (dtdData.isMetadata(pathPlain)) {
                     continue;
                 }
             } catch (NullPointerException e) {
                 /*
                  * TODO: this happens for "grammaticalState" in this path from version 37:
                  * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"]
                  * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306
                  */
                 System.out.println("Caught NullPointerException in fillData calling isMetadata, path = " + path);
                 continue;
             }
             Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras);
             if (pathForValues != null) {
                 for (String pathForValue : pathForValues) {
                     PathHeader pathHeader = phf.fromPath(pathForValue);
                     if (pathHeader.getPageId() == PageId.Suppress) {
                         continue;
                     }
                     Splitter splitter = DtdData.getValueSplitter(pathPlain);
                     for (String line : splitter.split(value)) {
                         // special case # in transforms
                         if (isComment(pathPlain, line)) {
                             continue;
                         }
                         results.put(pathHeader, line);
                     }
                 }
             }
             for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) {
                 final String extraPath = entry.getKey();
                 final PathHeader pathHeaderExtra = phf.fromPath(extraPath);
                 if (pathHeaderExtra.getPageId() == PageId.Suppress) {
                     continue;
                 }
                 final Collection<String> extraValue = entry.getValue();
                 if (isExtraSplit(extraPath)) {
                     for (String items : extraValue) {
                         results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items));
                     }
                 } else {
                     results.putAll(pathHeaderExtra, extraValue);
                 }
             }
             if (pathForValues == null && !value.isEmpty()) {
                 System.err.println("Shouldn't happen");
             }
         }
         return results;
     }

     private boolean isExtraSplit(String extraPath) {
         if (extraPath.endsWith("/_type") && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
             return true;
         }
         return false;
     }

     private static boolean isComment(XPathParts pathPlain, String line) {
         if (pathPlain.contains("transform")) {
             if (line.startsWith("#")) {
                 return true;
             }
         }
         return false;
     }

     /**
      * Determine when changes to the values for paths should be treated as
      * potentially "disruptive" for the purpose of "churn" reporting
      */
     private class SuspiciousChange {
         /**
          * the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v"
          */
         private String oldValue, newValue;

         /**
          * the path, such as //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"]
          */
         private String path;

         /**
          * the locale (such as "doi") in which the path was found, or null, or possibly
          * the base file name without extension, like "xx" if the file name is "xx.xml",
          * where "xx" may or may not be a locale; e.g., "supplementalData"
          */
         private String locale;

         SuspiciousChange(String oldValue, String newValue, String path, String locale) {
             this.oldValue = oldValue;
             this.newValue = newValue;
             this.path = path;
             this.locale = locale;
         }

         /**
          * Is the change from the old value to the new value, for this path and locale, potentially disruptive?
          *
          * @return true or false
          */
         public boolean isDisruptive() {
             /*
              * OR, not AND: certain changes in value are disruptive even for paths not
              * otherwise treated as high-level, and changes for high-level paths are
              * disruptive even if the changes in values themselves are not identified
              * as disruptive.
              */
             return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale);
         }

         /**
          * Is the change from the old value to the current value potentially disruptive, based (primarily) on
          * the values themselves?
          *
          * @return true or false
          */
         private boolean valueChangeIsDisruptive() {
             if (oldValue == null || newValue == null || oldValue.equals(newValue)) {
                 return false;
             }
             if (valueChangeIsDisruptiveWhitespaceOnly()) {
                 return true;
             }
             return false;
         }

         /**
          * Is the change disruptive whitespace only?
          * Per design doc, "Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP
          * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none."
          *
          * @return true or false
          */
         private boolean valueChangeIsDisruptiveWhitespaceOnly() {
             /*
              * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive
              */
             if (path.startsWith("//ldml/annotations")) {
                 return false;
             }
             if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) {
                 return true;
             }
             return false;
         }

         /**
          * Remove whitespace from the given string
          *
          * Remove whitespace as defined by regex \s, and also
          * U+00A0 NO-BREAK SPACE
          * U+2007 FIGURE SPACE
          * U+202F NARROW NO-BREAK SPACE
          *
          * @param s the string
          * @return the modified string
          */
         private String removeWhitespace(String s) {
             return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", "");
         }
     }

     /**
      * Determine which paths are considered "high-level" paths, i.e.,
      * paths for which any changes have high potential to cause disruptive "churn".
      * Whether a path is high-level sometimes depends on the locale or xml file in
      * which it occurs.
      * Some paths are high-level regardless of the locale in which they are located.
      * Other paths are high-level for some locales but not others. For example,
      *    //ldml/localeDisplayNames/languages/language[@type="xx"]
      * is high level in locale "xx", and maybe "en", but not for all locales.
      */
     private static class HighLevelPaths {
         /**
          * A set of paths to be treated as "high-level".
          * These are complete paths to be matched exactly.
          * Other paths are recognized by special functions like isHighLevelTerritoryName.
          *
          * The ordering and comments are based on the design spec.
          */
         final private static Set<String> highLevelPaths = new HashSet<>(Arrays.asList(
             /*
              * Core data
              */
             "//ldml/characters/exemplarCharacters",
             "//ldml/numbers/defaultNumberingSystem",
             "//ldml/numbers/otherNumberingSystems/native",
             /*
              * Territory and Language names
              *  Country/Region names (English and Native names) -- see isHighLevelTerritoryName
              *   //ldml/localeDisplayName/territories/territory/...
              *  Language names (English and Native) -- see isHighLevelLangName
              *   //ldml/localeDisplayNames/languages/language/...
              */
             /*
              * Date
              * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type,
              * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication
              * to filter out "fri", "zone", etc.
              */
             "//ldml/dates/fields/field[@type=\"year\"]/displayName",
             "//ldml/dates/fields/field[@type=\"month\"]/displayName",
             "//ldml/dates/fields/field[@type=\"week\"]/displayName",
             "//ldml/dates/fields/field[@type=\"day\"]/displayName",
             "//ldml/dates/fields/field[@type=\"hour\"]/displayName",
             "//ldml/dates/fields/field[@type=\"era\"]/displayName",
             "//ldml/dates/fields/field[@type=\"minute\"]/displayName",
             "//ldml/dates/fields/field[@type=\"second\"]/displayName",
             /*
              * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay
              * First week of year: see isHighLevelWeekOfPreference
              */
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]",
             /*
              * Time
              */
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]",
             "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]",
             /*
              * Currency (English and Native) -- see isHighLevelCurrencyName
              * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName"
              *
              * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode
              */
             /*
              * Currency Formats
              *  a. Currency thousand separator
              *  b. Currency decimal separator
              *  c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol
              *  d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]"
              *
              * See isHighLevelCurrencySeparatorOrSymbol
              */
             "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             /*
              * Number Symbols
              */
             "//ldml/numbers/minimumGroupingDigits",
             "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal",
             "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group",
             "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal",
             "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group",
             /*
              * Number formats
              */
             "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]",
             "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
             "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"
             /*
              * "Complementary Observations"
              */
             /*
              * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias
              * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"]
              */
             /*
              * Changes in the containment graph -- see isHighLevelTerritoryContainment
              * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control.
              * Users use the containment graph in a variety of ways.
              * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
              */
             /*
              * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP
              *  or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none.
              *  -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly
              */
             /*
              * TODO: per design doc, "Adding a timezone"
              * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown
              *  currency symbol change '???' -> '¤'."
              * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially
              *  newly adding or removing punctuation. Frequently irritates parsers."
              */
         ));

         static Pattern currencyPattern = Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*");

         /**
          * Should the given path in the given locale be taken into account for generating "churn" reports?
          *
          * @param path the path of interest
          * @param locale the locale in which the path was found, or null, or possibly
          *     the base file name without extension, like "xx" if the file name is "xx.xml",
          *     where "xx" may or may not be a locale; e.g., "supplementalData"
          * @return true if it counts, else false to ignore
          */
         private static boolean pathIsHighLevel(String path, String locale) {
             if (path == null || locale == null) {
                 return false;
             }
             if (!localeIsHighLevel(locale)) { // for efficiency, this should be caught at a higher level
                 System.out.println("locale [" + locale + "] failed localeIsHighLevel in pathIsHighLevel; path = " + path);
                 return false;
             }
             if (pathIsReallyHighLevel(path, locale)) {
                 if (verboseHighLevelReporting) {
                     recordHighLevelMatch(path);
                 }
                 return true;
             }
             return false;
         }

         private static boolean pathIsReallyHighLevel(String path, String locale) {
             if (highLevelPaths.contains(path)) {
                 return true;
             } else if (isHighLevelTerritoryName(path, locale)) {
                 return true;
             } else if (isHighLevelLangName(path, locale)) {
                 return true;
             } else if (isHighLevelCurrencyName(path, locale)) {
                 return true;
             } else if (isHighLevelCurrencyCode(path, locale)) {
                 return true;
             } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) {
                 return true;
             } else if (isHighLevelLangAlias(path, locale)) {
                 return true;
             } else if (isHighLevelTerritoryContainment(path, locale)) {
                 return true;
             } else if (isHighLevelFirstDay(path, locale)) {
                 return true;
             } else if (isHighLevelWeekOfPreference(path, locale)) {
                 return true;
             }
             return false;
         }

         /**
          * Is the given locale, or base name, to be considered for "high level" churn report?
          *
          * @param locale the locale string, or base name like "supplementalData" as in "supplementalData.xml"
          * @return true or false
          */
         private static boolean localeIsHighLevel(String locale) {
             return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale)
                 || "supplementalData".equals(locale);
         }

         /**
          * Changes to language aliases (supplemental metadata)
          * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"]
          *
          * @param path
          * @param locale must be "supplementalData" to match
          * @return true or false
          */
         private static boolean isHighLevelLangAlias(String path, String locale) {
             if ("supplementalData".equals(locale)) {
                 if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) {
                     return true;
                 }
             }
             return false;
         }

         /**
          * Changes in the containment graph
          * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control.
          * Users use the containment graph in a variety of ways.
          * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
          *
          * @param path
          * @param locale must be "supplementalData" to match
          * @return true or false
          */
         private static boolean isHighLevelTerritoryContainment(String path, String locale) {
             if ("supplementalData".equals(locale)) {
                 if (path.startsWith("//supplementalData/territoryContainment")) {
                     return true;
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level territory name path in the given locale?
          *
          * E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"]
          * if type "NNN" CORRESPONDS TO the locale or the locale is "en"
          *
          * English names (en.xml): match all types
          * Native: check each territory type NNN corresponding to the given locale
          *
          * Exclude "alt"
          *
          * @param path
          * @param locale
          * @return true or false
          */
         private static boolean isHighLevelTerritoryName(String path, String locale) {
             if (path.startsWith("//ldml/localeDisplayNames/territories/territory")
                     && !path.contains("[@alt=")) {
                 if ("en".equals(locale)) {
                     return true;
                 }
                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
                 if (cvi != null) {
                     for (String type : cvi.targetTerritories) {
                         if (path.contains("[@type=\"" + type + "\"]")) {
                             return true;
                         }
                     }
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level language name path in the given locale?
          *
          * E.g., //ldml/localeDisplayNames/languages/language[@type="xx"]
          * if type "xx" matches the locale or the locale is "en"
          *
          * Exclude "alt"
          *
          * @param path
          * @param locale
          * @return true or false
          */
         private static boolean isHighLevelLangName(String path, String locale) {
             if (path.startsWith("//ldml/localeDisplayNames/languages/language")
                     && !path.contains("[@alt=")) {
                 if ("en".equals(locale)) {
                     /*
                      * English names (en.xml): match all types
                      */
                     return true;
                 } else if (path.contains("[@type=\"" + locale + "\"]")) {
                     /*
                      * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml
                      */
                     return true;
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level currency name path in the given locale?
          *
          * E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName
          * if type "AAA" CORRESPONDS TO the locale or the locale is "en"
          *
          * English names (en.xml): match all types
          * Native: check each currency type AAA corresponding to the given locale
          *
          * Do NOT exclude "alt"; e.g.,
          * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"]
          *
          * @param path
          * @param locale
          * @return true or false
          */
         private static boolean isHighLevelCurrencyName(String path, String locale) {
             if (currencyPattern.matcher(path).matches()) {
                 if ("en".equals(locale)) {
                     return true;
                 }
                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
                 if (cvi != null) {
                     for (String type : cvi.targetCurrencies) {
                         if (path.contains("[@type=\"" + type + "\"]")) {
                             return true;
                         }
                     }
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level currency code path in the given locale?
          *
          * E.g., //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"]
          *
          * @param path
          * @param locale must be "supplementalData" to match
          * @return true or false
          */
         private static boolean isHighLevelCurrencyCode(String path, String locale) {
             if ("supplementalData".equals(locale)) {
                 if (path.contains("iso3166")) {
                     return true;
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level currency thousands-separator or decimal-separator path in the given locale?
          *
          * E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group
          *       //ldml/numbers/currencies/currency[@type="ESP"]/decimal
          *       //ldml/numbers/currencies/currency[@type="CNY"]/symbol
          *       //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]"
          *
          * @param path
          * @param locale
          * @return true or false
          */
         private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) {
             if (path.startsWith("//ldml/numbers/currencies/currency")
                 && (path.contains("group") || path.contains("decimal") || path.contains("symbol"))) {
                 return true;
             }
             return false;
         }

         /**
          * Is the given path a high-level weekData/firstDay in the given locale?
          *
          * E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"]
          *
          * @param path
          * @param locale must be "supplementalData" to match
          * @return true or false
          */
         private static boolean isHighLevelFirstDay(String path, String locale) {
             if ("supplementalData".equals(locale)) {
                 if (path.startsWith("//supplementalData/weekData/firstDay")) {
                     return true;
                 }
             }
             return false;
         }

         /**
          * Is the given path a high-level weekOfPreference in the given locale?
          *
          * E.g., //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"]
          *
          * @param path
          * @param locale must be "supplementalData" to match
          * @return true or false
          */
         private static boolean isHighLevelWeekOfPreference(String path, String locale) {
             if ("supplementalData".equals(locale)) {
                 if (path.startsWith("//supplementalData/weekData/weekOfPreference")) {
                     return true;
                 }
             }
             return false;
         }

         /**
          * For debugging, testing
          */
         private static Set<String> highLevelPathMatched = null;
         private static boolean verboseHighLevelReporting = false;

         private static void recordHighLevelMatch(String path) {
             if (highLevelPathMatched == null) {
                 highLevelPathMatched = new HashSet<>();
             }
             highLevelPathMatched.add(path);
         }

         /**
          * For debugging, report on any paths in highLevelPaths that never matched
          */
         private static void reportHighLevelPathUsage() {
             if (!verboseHighLevelReporting) {
                 return;
             }
             if (highLevelPathMatched == null) {
                 System.out.println("Zero high-level paths were matched!");
                 return;
             }
             for (String path : highLevelPaths) {
                 if (!highLevelPathMatched.contains(path)) {
                     System.out.println("Unmatched high-level path: " + path);
                 }
             }
             for (String path : highLevelPathMatched) {
                 if (!highLevelPaths.contains(path)) {
                     System.out.println("Special matched high-level path: " + path);
                 }
             }
         }
     }
 }