blob: 4b8e83f0ffb1b8ddfe8bec50e6146e8a784a61a6 [file] [log] [blame]
* Copyright (C) 2004-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
package org.unicode.cldr.tool;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.test.CLDRTest;
import org.unicode.cldr.test.CoverageLevel2;
import org.unicode.cldr.test.DisplayAndInputProcessor;
import org.unicode.cldr.test.QuickCheck;
import org.unicode.cldr.util.Annotations;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.DraftStatus;
import org.unicode.cldr.util.CLDRFile.ExemplarType;
import org.unicode.cldr.util.CLDRFile.NumberingSystem;
import org.unicode.cldr.util.CLDRFile.WinningChoice;
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CLDRTool;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.DateTimeCanonicalizer;
import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType;
import org.unicode.cldr.util.DtdData;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.FileProcessor;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.Level;
// import org.unicode.cldr.util.Log;
import org.unicode.cldr.util.LogicalGrouping;
import org.unicode.cldr.util.PathChecker;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.SimpleFactory;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.StringId;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
import org.unicode.cldr.util.XMLSource;
import org.unicode.cldr.util.XPathParts;
import org.unicode.cldr.util.XPathParts.Comments;
import org.unicode.cldr.util.XPathParts.Comments.CommentType;
* Tool for applying modifications to the CLDR files. Use -h to see the options.
* <p>
* There are some environment variables that can be used with the program <br>
* -DSHOW_FILES=<anything> shows all create/open of files.
@CLDRTool(alias = "modify",
description = "Tool for applying modifications to the CLDR files. Use -h to see the options.")
public class CLDRModify {
static final String DEBUG_PATHS = null; // ".*currency.*";
static final boolean COMMENT_REMOVALS = false; // append removals as comments
static final UnicodeSet whitespace = new UnicodeSet("[:whitespace:]").freeze();
static final UnicodeSet HEX = new UnicodeSet("[a-fA-F0-9]").freeze();
private static final DtdData dtdData = DtdData.getInstance(DtdType.ldml);
// TODO make this into input option.
enum ConfigKeys {
action, locale, path, value, new_path, new_value
enum ConfigAction {
* Remove a path
* Add a path/value
* Replace a path/value. Equals 'add' but tests selected paths
* Add a a path/value. Equals 'add' but tests that path did NOT exist
static final class ConfigMatch {
final String exactMatch;
final Matcher regexMatch; // doesn't have to be thread safe
final ConfigAction action;
final boolean hexPath;
public ConfigMatch(ConfigKeys key, String match) {
if (key == ConfigKeys.action) {
exactMatch = null;
regexMatch = null;
action = ConfigAction.valueOf(match);
hexPath = false;
} else if (match.startsWith("/") && match.endsWith("/")) {
if (key != ConfigKeys.locale && key != ConfigKeys.path && key != ConfigKeys.value) {
throw new IllegalArgumentException("Regex only allowed for old path/value.");
exactMatch = null;
regexMatch = PatternCache.get(match.substring(1, match.length() - 1)
.replace("[@", "\\[@")).matcher("");
action = null;
hexPath = false;
} else {
exactMatch = match;
regexMatch = null;
action = null;
hexPath = (key == ConfigKeys.new_path || key == ConfigKeys.path)
&& HEX.containsAll(match);
public boolean matches(String other) {
if (exactMatch == null) {
return regexMatch.reset(other).find();
} else if (hexPath) {
// convert path to id for comparison
return exactMatch.equals(StringId.getHexId(other));
} else {
return exactMatch.equals(other);
public String toString() {
return action != null ? action.toString()
: exactMatch == null ? regexMatch.toString()
: hexPath ? "*" + exactMatch + "*"
: exactMatch;
public String getPath(CLDRFile cldrFileToFilter) {
if (!hexPath) {
return exactMatch;
// ensure that we have all the possible paths cached
String path = StringId.getStringFromHexId(exactMatch);
if (path == null) {
for (String eachPath : cldrFileToFilter.fullIterable()) {
path = StringId.getStringFromHexId(exactMatch);
if (path == null) {
throw new IllegalArgumentException("No path for hex id: " + exactMatch);
return path;
public static String getModified(ConfigMatch valueMatch, String value, ConfigMatch newValue) {
if (valueMatch == null) { // match anything
if (newValue != null && newValue.exactMatch != null) {
return newValue.exactMatch;
if (value != null) {
return value;
throw new IllegalArgumentException("Can't have both old and new be null.");
} else if (valueMatch.exactMatch == null) { // regex
if (newValue == null || newValue.exactMatch == null) {
throw new IllegalArgumentException("Can't have regex without replacement.");
StringBuffer buffer = new StringBuffer();
valueMatch.regexMatch.appendReplacement(buffer, newValue.exactMatch);
return buffer.toString();
} else {
return newValue.exactMatch != null ? newValue.exactMatch : value;
static FixList fixList = new FixList();
private static final int HELP1 = 0,
HELP2 = 1,
MATCH = 4,
JOIN = 5,
FIX = 7,
VET_ADD = 9,
PATH = 11,
USER = 12,
ALL_DIRS = 13,
CHECK = 14,
KONFIG = 15;
private static final UOption[] options = {
UOption.DESTDIR().setDefault(CLDRPaths.GEN_DIRECTORY + "cldrModify/"),
UOption.create("match", 'm', UOption.REQUIRES_ARG).setDefault(".*"),
UOption.create("join", 'j', UOption.OPTIONAL_ARG),
UOption.create("minimize", 'r', UOption.NO_ARG),
UOption.create("fix", 'f', UOption.OPTIONAL_ARG),
UOption.create("join-args", 'i', UOption.OPTIONAL_ARG),
UOption.create("vet", 'v', UOption.OPTIONAL_ARG),
UOption.create("resolve", 'z', UOption.OPTIONAL_ARG),
UOption.create("path", 'p', UOption.REQUIRES_ARG),
UOption.create("user", 'u', UOption.REQUIRES_ARG),
UOption.create("all", 'a', UOption.REQUIRES_ARG),
UOption.create("check", 'c', UOption.NO_ARG),
UOption.create("konfig", 'k', UOption.OPTIONAL_ARG).setDefault("modify_config.txt"),
private static final UnicodeSet allMergeOptions = new UnicodeSet("[rcd]");
static final String HELP_TEXT1 = "Use the following options"
+ XPathParts.NEWLINE
+ "-h or -?\t for this message"
+ XPathParts.NEWLINE
+ "-"
+ options[SOURCEDIR].shortName
+ "\t source directory. Default = -s"
+ CldrUtility.getCanonicalName(CLDRPaths.MAIN_DIRECTORY)
+ XPathParts.NEWLINE
+ "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\"
+ XPathParts.NEWLINE
+ "-"
+ options[DESTDIR].shortName
+ "\t destination directory. Default = -d"
+ CldrUtility.getCanonicalName(CLDRPaths.GEN_DIRECTORY + "main/")
+ XPathParts.NEWLINE
+ "-m<regex>\t to restrict the locales to what matches <regex>"
+ XPathParts.NEWLINE
+ "-j<merge_dir>/X'\t to merge two sets of files together (from <source_dir>/X and <merge_dir>/X', "
+ XPathParts.NEWLINE
+ "\twhere * in X' is replaced by X)."
+ XPathParts.NEWLINE
+ "\tExample:-jC:\\Unicode-CVS2\\cldr\\dropbox\\to_be_merged\\missing\\missing_*"
+ XPathParts.NEWLINE
+ "-i\t merge arguments:"
+ XPathParts.NEWLINE
+ "\tr\t replace contents (otherwise new data will be draft=\"unconfirmed\")"
+ XPathParts.NEWLINE
+ "\tc\t ignore comments in <merge_dir> files"
+ XPathParts.NEWLINE
+ "-v\t incorporate vetting information, and generate diff files."
+ XPathParts.NEWLINE
+ "-z\t generate resolved files"
+ XPathParts.NEWLINE
+ "-p\t set path for -fx"
+ XPathParts.NEWLINE
+ "-u\t set user for -fb"
+ XPathParts.NEWLINE
+ "-a\t pattern: recurse over all subdirectories that match pattern"
+ XPathParts.NEWLINE
+ "-c\t check that resulting xml files are valid. Requires that a dtd directory be copied to the output directory, in the appropriate location."
+ XPathParts.NEWLINE
+ "-k\t config_file\twith -fk perform modifications according to what is in the config file. For format details, see:"
+ XPathParts.NEWLINE
+ "\t\t"
+ XPathParts.NEWLINE
+ "-f\t to perform various fixes on the files (add following arguments to specify which ones, eg -fxi)"
+ XPathParts.NEWLINE;
static final String HELP_TEXT2 = "Note: A set of bat files are also generated in <dest_dir>/diff. They will invoke a comparison program on the results."
+ XPathParts.NEWLINE;
private static final boolean SHOW_DETAILS = false;
private static boolean SHOW_PROCESSING = false;
static String sourceInput;
* Picks options and executes. Use -h to see options.
public static void main(String[] args) throws Exception {
long startTime = System.currentTimeMillis();
UOption.parseArgs(args, options);
if (options[HELP1].doesOccur || options[HELP2].doesOccur) {
System.out.println(HELP_TEXT1 + fixList.showHelp() + HELP_TEXT2);
checkSuboptions(options[FIX], fixList.getOptions());
checkSuboptions(options[JOIN_ARGS], allMergeOptions);
String recurseOnDirectories = options[ALL_DIRS].value;
boolean makeResolved = options[RESOLVE].doesOccur; // Utility.COMMON_DIRECTORY + "main/";
sourceInput = options[SOURCEDIR].value;
String destInput = options[DESTDIR].value;
if (recurseOnDirectories != null) {
sourceInput = removeSuffix(sourceInput, "main/", "main");
destInput = removeSuffix(destInput, "main/", "main");
String sourceDirBase = CldrUtility.checkValidDirectory(sourceInput); // Utility.COMMON_DIRECTORY + "main/";
String targetDirBase = CldrUtility.checkValidDirectory(destInput); // Utility.GEN_DIRECTORY + "main/";
System.out.format("Source:\t%s\n", sourceDirBase);
System.out.format("Target:\t%s\n", targetDirBase);
Set<String> dirSet = new TreeSet<>();
if (recurseOnDirectories == null) {
} else {
String[] subdirs = new File(sourceDirBase).list();
Matcher subdirMatch = PatternCache.get(recurseOnDirectories).matcher("");
for (String subdir : subdirs) {
if (!subdirMatch.reset(subdir).find()) continue;
dirSet.add(subdir + "/");
for (String dir : dirSet) {
String sourceDir = sourceDirBase + dir;
if (!new File(sourceDir).isDirectory()) continue;
String targetDir = targetDirBase + dir;
try {
Factory cldrFactoryForAvailable = Factory.make(sourceDir, ".*");
Factory cldrFactory = cldrFactoryForAvailable;
// Fix for annotations. Need root.xml or else cannot load resolved
// locales.
if (sourceDir.endsWith("/seed/annotations/") && "Q".equals(options[FIX].value)) {
System.err.println("Correcting factory so that annotations can load, including " + CLDRPaths.ANNOTATIONS_DIRECTORY);
final File[] paths = {
new File(sourceDir),
new File(CLDRPaths.ANNOTATIONS_DIRECTORY) // common/annotations - to load root.xml
cldrFactory = SimpleFactory.make(paths, ".*");
} else {
System.err.println("!!! " + sourceDir);
if (options[VET_ADD].doesOccur) {
VettingAdder va = new VettingAdder(options[VET_ADD].value);
va.showFiles(cldrFactory, targetDir);
Factory mergeFactory = null;
String join_prefix = "", join_postfix = "";
if (options[JOIN].doesOccur) {
String mergeDir = options[JOIN].value;
File temp = new File(mergeDir);
mergeDir = CldrUtility.checkValidDirectory(temp.getParent() + File.separator); // Utility.COMMON_DIRECTORY
// + "main/";
String filename = temp.getName();
join_prefix = join_postfix = "";
int pos = filename.indexOf("*");
if (pos >= 0) {
join_prefix = filename.substring(0, pos);
join_postfix = filename.substring(pos + 1);
mergeFactory = Factory.make(mergeDir, ".*");
Set<String> locales = new TreeSet<>(cldrFactoryForAvailable.getAvailable());
if (mergeFactory != null) {
Set<String> temp = new TreeSet<>(mergeFactory.getAvailable());
Set<String> locales3 = new TreeSet<>();
for (String locale : temp) {
if (!locale.startsWith(join_prefix) || !locale.endsWith(join_postfix)) continue;
locales3.add(locale.substring(join_prefix.length(), locale.length() - join_postfix.length()));
System.out.println("Merging: " + locales3);
new CldrUtility.MatcherFilter(options[MATCH].value).retainAll(locales);
long lastTime = System.currentTimeMillis();
int spin = 0;
System.out.format(locales.size() + " Locales:\t%s\n", locales.toString());
int totalRemoved = 0;
for (String test : locales) {
long now = System.currentTimeMillis();
if (now - lastTime > 5000) {
System.out.println(" .. still processing " + test + " [" + spin + "/" + locales.size()
+ "]");
lastTime = now;
// TODO parameterize the directory and filter
final CLDRFile originalCldrFile = cldrFactory.make(test, makeResolved);
CLDRFile k = originalCldrFile.cloneAsThawed();
if (DEBUG_PATHS != null) {
System.out.println("Debug1 (" + test + "):\t" + k.toString(DEBUG_PATHS));
if (mergeFactory != null) {
int mergeOption = CLDRFile.MERGE_ADD_ALTERNATE;
CLDRFile toMergeIn = mergeFactory.make(join_prefix + test + join_postfix, false)
if (toMergeIn != null) {
if (options[JOIN_ARGS].doesOccur) {
if (options[JOIN_ARGS].value.indexOf("r") >= 0)
if (options[JOIN_ARGS].value.indexOf("d") >= 0)
if (options[JOIN_ARGS].value.indexOf("c") >= 0) toMergeIn.clearComments();
if (options[JOIN_ARGS].value.indexOf("x") >= 0) removePosix(toMergeIn);
k.putAll(toMergeIn, mergeOption);
// special fix
" The following are strings that are not found in the locale (currently), but need valid translations for localizing timezones. ");
if (DEBUG_PATHS != null) {
System.out.println("Debug2 (" + test + "):\t" + k.toString(DEBUG_PATHS));
if (options[FIX].doesOccur) {
fix(k, options[FIX].value, options[KONFIG].value, cldrFactory);
if (DEBUG_PATHS != null) {
System.out.println("Debug3 (" + test + "):\t" + k.toString(DEBUG_PATHS));
if (DEBUG_PATHS != null) {
System.out.println("Debug4 (" + test + "):\t" + k.toString(DEBUG_PATHS));
PrintWriter pw = FileUtilities.openUTF8Writer(targetDir, test + ".xml");
String testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
if (false) {
System.out.println("Printing Raw File:");
testPath = "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/alias";
TreeSet s = new TreeSet();
Set orderedSet = new TreeSet(k.getComparator());
for (Iterator it3 = orderedSet.iterator(); it3.hasNext();) {
String path = (String);
if (path.equals(testPath)) {
String value = k.getStringValue(path);
String fullpath = k.getFullXPath(path);
System.out.println("\t=\t" + fullpath);
System.out.println("\t=\t" + value);
System.out.println("Done Printing Raw File:");
File oldFile = new File(sourceDir, test + ".xml");
File newFile = new File(targetDir, test + ".xml");
if (!oldFile.equals(newFile) // only skip if the source & target are different.
&& equalsSkippingCopyright(oldFile, newFile)) {
if (options[CHECK].doesOccur) {
QuickCheck.check(new File(targetDir, test + ".xml"));
if (totalSkeletons.size() != 0) {
System.out.println("Total Skeletons" + totalSkeletons);
if (totalRemoved > 0) {
System.out.println("# Removed:\t" + totalRemoved);
} finally {
System.out.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0)
+ " minutes");
public static boolean equalsSkippingCopyright(File oldFile, File newFile) {
Iterator<String> oldIterator =;
Iterator<String> newIterator =;
while (true) {
boolean oldHasNext = oldIterator.hasNext();
boolean newHasNext = newIterator.hasNext();
if (oldHasNext != newHasNext) {
return false;
if (!oldHasNext) {
return true;
String oldLine =;
String newLine =;
if (!oldLine.equals(newLine)) {
if (oldLine.startsWith("<!-- Copyright ©") && newLine.startsWith("<!-- Copyright ©")) {
return false;
private static String removeSuffix(String value, String... suffices) {
for (String suffix : suffices) {
if (value.endsWith(suffix)) {
return value.substring(0, value.length() - suffix.length());
return value;
* Use the coverage to determine what we should keep in the case of a locale just below root.
static class RetainWhenMinimizing implements CLDRFile.RetentionTest {
private CLDRFile file;
private CLDRLocale c;
private boolean isArabicSublocale;
public RetainWhenMinimizing setParentFile(CLDRFile file) {
this.file = file;
this.c = CLDRLocale.getInstance(file.getLocaleIDFromIdentity());
isArabicSublocale = "ar".equals(c.getLanguage()) && !"001".equals(c.getCountry());
return this;
public Retention getRetention(String path) {
if (path.startsWith("//ldml/identity/")) {
return Retention.RETAIN;
// special case for Arabic
if (isArabicSublocale && path.startsWith("//ldml/numbers/defaultNumberingSystem")) {
return Retention.RETAIN;
String localeId = file.getSourceLocaleID(path, null);
if ((c.isLanguageLocale() || c.equals(CLDRLocale.getInstance("pt_PT")))
&& (XMLSource.ROOT_ID.equals(localeId) || XMLSource.CODE_FALLBACK_ID.equals(localeId))) {
return Retention.RETAIN;
return Retention.RETAIN_IF_DIFFERENT;
static final Splitter COMMA_SEMI = Splitter.on(Pattern.compile("[,;|]")).trimResults().omitEmptyStrings();
protected static final boolean NUMBER_SYSTEM_HACK = true;
private static void checkSuboptions(UOption givenOptions, UnicodeSet allowedOptions) {
if (givenOptions.doesOccur && !allowedOptions.containsAll(givenOptions.value)) {
throw new IllegalArgumentException("Illegal sub-options for "
+ givenOptions.shortName
+ ": "
+ new UnicodeSet().addAll(givenOptions.value).removeAll(allowedOptions)
+ CldrUtility.LINE_SEPARATOR + "Use -? for help.");
private static void removePosix(CLDRFile toMergeIn) {
Set<String> toRemove = new HashSet<>();
for (String xpath : toMergeIn) {
if (xpath.startsWith("//ldml/posix")) toRemove.add(xpath);
toMergeIn.removeAll(toRemove, false);
static PathChecker pathChecker = new PathChecker();
abstract static class CLDRFilter {
protected CLDRFile cldrFileToFilter;
protected CLDRFile cldrFileToFilterResolved;
private String localeID;
protected Set<String> availableChildren;
private Set<String> toBeRemoved;
private CLDRFile toBeReplaced;
protected Factory factory;
public final void setFile(CLDRFile k, Factory factory, Set<String> removal, CLDRFile replacements) {
this.cldrFileToFilter = k;
cldrFileToFilterResolved = null;
this.factory = factory;
localeID = k.getLocaleID();
this.toBeRemoved = removal;
this.toBeReplaced = replacements;
public void handleStart() {
public abstract void handlePath(String xpath);
public void handleEnd() {
public CLDRFile getResolved() {
if (cldrFileToFilterResolved == null) {
if (cldrFileToFilter.isResolved()) {
cldrFileToFilterResolved = cldrFileToFilter;
} else {
cldrFileToFilterResolved = factory.make(cldrFileToFilter.getLocaleID(), true);
return cldrFileToFilterResolved;
public void show(String reason, String detail) {
System.out.println("%" + localeID + "\t" + reason + "\tConsidering " + detail);
public void retain(String path, String reason) {
System.out.println("%" + localeID + "\t" + reason + "\tRetaining: " + cldrFileToFilter.getStringValue(path)
+ "\t at: " + path);
public void remove(String path) {
remove(path, "-");
public void remove(String path, String reason) {
if (toBeRemoved.contains(path)) return;
String oldValueOldPath = cldrFileToFilter.getStringValue(path);
showAction(reason, "Removing", oldValueOldPath, null, null, path, path);
public void replace(String oldFullPath, String newFullPath, String newValue) {
replace(oldFullPath, newFullPath, newValue, "-");
public void showAction(String reason, String action, String oldValueOldPath, String oldValueNewPath,
String newValue, String oldFullPath, String newFullPath) {
+ localeID
+ "\t"
+ action
+ "\t"
+ reason
+ "\t«"
+ oldValueOldPath
+ "»"
+ (newFullPath.equals(oldFullPath) || oldValueNewPath == null ? "" : oldValueNewPath
.equals(oldValueOldPath) ? "/=" : "/«" + oldValueNewPath + "»")
+ "\t→\t" + (newValue == null ? "∅" : newValue.equals(oldValueOldPath) ? "≡" : "«" + newValue + "»")
+ "\t" + oldFullPath
+ (newFullPath.equals(oldFullPath) ? "" : "\t→\t" + newFullPath));
* There are the following cases, where:
* <pre>
* pathSame, new value null: Removing v p
* pathSame, new value not null: Replacing v v' p
* pathChanges, nothing at new path: Moving v p p'
* pathChanges, same value at new path: Replacing v v' p p'
* pathChanges, value changes: Overriding v v' p p'
* <pre>
* @param oldFullPath
* @param newFullPath
* @param newValue
* @param reason
public void replace(String oldFullPath, String newFullPath, String newValue, String reason) {
String oldValueOldPath = cldrFileToFilter.getStringValue(oldFullPath);
String temp = cldrFileToFilter.getFullXPath(oldFullPath);
if (temp != null) {
oldFullPath = temp;
boolean pathSame = oldFullPath.equals(newFullPath);
if (!pathChecker.checkPath(newFullPath)) {
throw new IllegalArgumentException("Bad path: " + newFullPath);
if (pathSame) {
if (newValue == null) {
remove(oldFullPath, reason);
} else if (oldValueOldPath == null) {
toBeReplaced.add(oldFullPath, newValue);
showAction(reason, "Adding", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
} else {
toBeReplaced.add(oldFullPath, newValue);
showAction(reason, "Replacing", oldValueOldPath, null, newValue, oldFullPath, newFullPath);
String oldValueNewPath = cldrFileToFilter.getStringValue(newFullPath);
toBeReplaced.add(newFullPath, newValue);
if (oldValueNewPath == null) {
showAction(reason, "Moving", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
} else if (oldValueNewPath.equals(newValue)) {
showAction(reason, "Redundant Value", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
} else {
showAction(reason, "Overriding", oldValueOldPath, oldValueNewPath, newValue, oldFullPath, newFullPath);
* Adds a new path-value pair to the CLDRFile.
* @param path the new path
* @param value the value
* @param reason Reason for adding the path and value.
public void add(String path, String value, String reason) {
String oldValueOldPath = cldrFileToFilter.getStringValue(path);
if (oldValueOldPath == null) {
toBeReplaced.add(path, value);
showAction(reason, "Adding", oldValueOldPath, null,
value, path, path);
} else {
replace(path, path, value);
public CLDRFile getReplacementFile() {
return toBeReplaced;
public void handleCleanup() {
public void handleSetup() {
public String getLocaleID() {
return localeID;
static class FixList {
// simple class, so we use quick list
CLDRFilter[] filters = new CLDRFilter[128]; // only ascii
String[] helps = new String[128]; // only ascii
UnicodeSet options = new UnicodeSet();
String inputOptions = null;
void add(char letter, String help) {
add(letter, help, null);
public void handleSetup() {
for (int i = 0; i < filters.length; ++i) {
if (filters[i] != null) {
public void handleCleanup() {
for (int i = 0; i < filters.length; ++i) {
if (filters[i] != null) {
public UnicodeSet getOptions() {
return options;
void add(char letter, String help, CLDRFilter filter) {
if (helps[letter] != null) throw new IllegalArgumentException("Duplicate letter: " + letter);
filters[letter] = filter;
helps[letter] = help;
void setFile(CLDRFile file, String inputOptions, Factory factory, Set<String> removal, CLDRFile replacements) {
this.inputOptions = inputOptions;
for (int i = 0; i < inputOptions.length(); ++i) {
char c = inputOptions.charAt(i);
if (filters[c] != null) {
try {
filters[c].setFile(file, factory, removal, replacements);
} catch (RuntimeException e) {
System.err.println("Failure in " + filters[c].localeID + "\t START");
throw e;
void handleStart() {
for (int i = 0; i < inputOptions.length(); ++i) {
char c = inputOptions.charAt(i);
if (filters[c] != null) {
try {
} catch (RuntimeException e) {
System.err.println("Failure in " + filters[c].localeID + "\t START");
throw e;
void handlePath(String xpath) {
for (int i = 0; i < inputOptions.length(); ++i) {
char c = inputOptions.charAt(i);
if (filters[c] != null) {
try {
} catch (RuntimeException e) {
System.err.println("Failure in " + filters[c].localeID + "\t " + xpath);
throw e;
void handleEnd() {
for (int i = 0; i < inputOptions.length(); ++i) {
char c = inputOptions.charAt(i);
if (filters[c] != null) {
try {
} catch (RuntimeException e) {
System.err.println("Failure in " + filters[c].localeID + "\t START");
throw e;
String showHelp() {
String result = "";
for (int i = 0; i < filters.length; ++i) {
if (helps[i] != null) {
result += "\t" + (char) i + "\t " + helps[i] + XPathParts.NEWLINE;
return result;
static Set<String> totalSkeletons = new HashSet<>();
static Map<String, String> rootUnitMap = new HashMap<>();
static {
rootUnitMap.put("second", "s");
rootUnitMap.put("minute", "min");
rootUnitMap.put("hour", "h");
rootUnitMap.put("day", "d");
rootUnitMap.put("week", "w");
rootUnitMap.put("month", "m");
rootUnitMap.put("year", "y");
fixList.add('z', "Remove deprecated elements", new CLDRFilter() {
public boolean isDeprecated(DtdType type, String element, String attribute, String value) {
return DtdData.getInstance(type).isDeprecated(element, attribute, value);
public boolean isDeprecated(DtdType type, String path) {
XPathParts parts = XPathParts.getFrozenInstance(path);
for (int i = 0; i < parts.size(); ++i) {
String element = parts.getElement(i);
if (isDeprecated(type, element, "*", "*")) {
return true;
for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
String attribute = entry.getKey();
String value = entry.getValue();
if (isDeprecated(type, element, attribute, value)) {
return true;
return false;
public void handlePath(String xpath) {
String fullPath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullPath);
for (int i = 0; i < parts.size(); ++i) {
String element = parts.getElement(i);
if (dtdData.isDeprecated(element, "*", "*")) {
remove(fullPath, "Deprecated element");
for (Entry<String, String> entry : parts.getAttributes(i).entrySet()) {
String attribute = entry.getKey();
String value = entry.getValue();
if (dtdData.isDeprecated(element, attribute, value)) {
remove(fullPath, "Element with deprecated attribute(s)");
fixList.add('e', "fix Interindic", new CLDRFilter() {
public void handlePath(String xpath) {
if (xpath.indexOf("=\"InterIndic\"") < 0) return;
String v = cldrFileToFilter.getStringValue(xpath);
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
XPathParts fullparts = XPathParts.getFrozenInstance(fullXPath);
Map<String, String> attributes = fullparts.findAttributes("transform");
String oldValue = attributes.get("direction");
if ("both".equals(oldValue)) {
attributes.put("direction", "forward");
replace(xpath, fullparts.toString(), v);
fixList.add('B', "fix bogus values", new CLDRFilter() {
RegexLookup<Integer> paths = RegexLookup.<Integer> of()
.add("//ldml/localeDisplayNames/languages/language[@type='([^']*)']", 0)
.add("//ldml/localeDisplayNames/scripts/script[@type='([^']*)']", 0)
.add("//ldml/localeDisplayNames/territories/territory[@type='([^']*)']", 0)
.add("//ldml/dates/timeZoneNames/metazone[@type='([^']*)']", 0)
.add("//ldml/dates/timeZoneNames/zone[@type='([^']*)']/exemplarCity", 0)
.add("//ldml/numbers/currencies/currency[@type='([^']*)']/displayName", 0);
Output<String[]> arguments = new Output<>();
CLDRFile english = CLDRConfig.getInstance().getEnglish();
boolean skip;
public void handleStart() {
CLDRFile resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
UnicodeSet exemplars = resolved.getExemplarSet(ExemplarType.main, WinningChoice.WINNING);
skip = exemplars.containsSome('a', 'z');
// TODO add simpler way to skip file entirely
public void handlePath(String xpath) {
if (skip) {
Integer lookupValue = paths.get(xpath, null, arguments);
if (lookupValue == null) {
String type = arguments.value[1];
String value = cldrFileToFilter.getStringValue(xpath);
if (value.equals(type)) {
remove(xpath, "Matches code");
String evalue = english.getStringValue(xpath);
if (value.equals(evalue)) {
remove(xpath, "Matches English");
fixList.add('s', "fix alt accounting", new CLDRFilter() {
public void handlePath(String xpath) {
XPathParts parts = XPathParts.getFrozenInstance(xpath);
if (!parts.containsAttributeValue("alt", "accounting")) {
String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
String value = cldrFileToFilter.getStringValue(xpath);
XPathParts fullparts = XPathParts.getFrozenInstance(oldFullXPath).cloneAsThawed(); // not frozen, for removeAttribute
fullparts.removeAttribute("pattern", "alt");
fullparts.setAttribute("currencyFormat", "type", "accounting");
String newFullXPath = fullparts.toString();
replace(oldFullXPath, newFullXPath, value, "Move alt=accounting value to new path");
fixList.add('n', "add unit displayName", new CLDRFilter() {
public void handlePath(String xpath) {
if (xpath.indexOf("/units/unitLength[@type=\"long\"]") < 0 || xpath.indexOf("/unitPattern[@count=\"other\"]") < 0 ||
xpath.indexOf("[@draft=\"unconfirmed\"]") >= 0) {
String value = cldrFileToFilter.getStringValue(xpath);
String newValue = null;
if (value.startsWith("{0}")) {
newValue = value.substring(3).trim();
} else if (value.endsWith("{0}")) {
newValue = value.substring(0, value.length() - 3).trim();
} else {
System.out.println("unitPattern-other does not start or end with \"{0}\": \"" + value + "\"");
String oldFullXPath = cldrFileToFilter.getFullXPath(xpath);
String newFullXPath = oldFullXPath.substring(0, oldFullXPath.indexOf("unitPattern")).concat("displayName[@draft=\"provisional\"]");
add(newFullXPath, newValue, "create unit displayName-long from unitPattern-long-other");
String newFullXPathShort = newFullXPath.replace("[@type=\"long\"]", "[@type=\"short\"]");
add(newFullXPathShort, newValue, "create unit displayName-short from unitPattern-long-other");
fixList.add('x', "retain paths", new CLDRFilter() {
Matcher m = null;
public void handlePath(String xpath) {
if (m == null) {
m = PatternCache.get(options[PATH].value).matcher("");
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
if (!m.reset(fullXPath).matches()) {
fixList.add('l', "change language code", new CLDRFilter() {
private CLDRFile resolved;
public void handleStart() {
resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
public void handlePath(String xpath) {
if (!xpath.contains("/language")) {
XPathParts parts = XPathParts.getFrozenInstance(xpath);
String languageCode = parts.findAttributeValue("language", "type");
String v = resolved.getStringValue(xpath);
if (!languageCode.equals("swc")) {
parts = parts.cloneAsThawed();
parts.setAttribute("language", "type", "sw_CD");
replace(xpath, parts.toString(), v);
fixList.add('g', "Swap alt/non-alt values for Czechia", new CLDRFilter() {
public void handleStart() {
public void handlePath(String xpath) {
XPathParts parts = XPathParts.getFrozenInstance(xpath);
if (!parts.containsAttributeValue("alt", "variant") || !parts.containsAttributeValue("type", "CZ")) {
String variantValue = cldrFileToFilter.getStringValue(xpath);
String nonVariantXpath = xpath.replaceAll("\\[\\@alt=\"variant\"\\]", "");
String nonVariantValue = cldrFileToFilter.getStringValue(nonVariantXpath);
replace(xpath, xpath, nonVariantValue);
replace(nonVariantXpath, nonVariantXpath, variantValue);
fixList.add('u', "fix duration unit patterns", new CLDRFilter() {
public void handlePath(String xpath) {
if (!xpath.contains("/units")) {
if (!xpath.contains("/durationUnitPattern")) {
String value = cldrFileToFilter.getStringValue(xpath);
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullXPath);
String unittype = parts.findAttributeValue("durationUnit", "type");
String newFullXpath = "//ldml/units/durationUnit[@type=\"" + unittype + "\"]/durationUnitPattern";
replace(fullXPath, newFullXpath, value, "converting to new duration unit structure");
fixList.add('a', "Fix 0/1", new CLDRFilter() {
final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze();
PluralInfo info;
public void handleStart() {
info = SupplementalDataInfo.getInstance().getPlurals(super.localeID);
public void handlePath(String xpath) {
if (xpath.indexOf("count") < 0) {
String fullpath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullpath).cloneAsThawed(); // not frozen, for setAttribute
String countValue = parts.getAttributeValue(-1, "count");
if (!DIGITS.containsAll(countValue)) {
int intValue = Integer.parseInt(countValue);
Count count = info.getCount(intValue);
parts.setAttribute(-1, "count", count.toString());
String newPath = parts.toString();
String oldValue = cldrFileToFilter.getStringValue(newPath);
String value = cldrFileToFilter.getStringValue(xpath);
if (oldValue != null) {
String fixed = oldValue.replace("{0}", countValue);
if (value.equals(oldValue)
|| value.equals(fixed)) {
remove(fullpath, "Superfluous given: "
+ count + "→«" + oldValue + "»");
} else {
remove(fullpath, "Can’t replace: "
+ count + "→«" + oldValue + "»");
replace(fullpath, newPath, value, "Moving 0/1");
fixList.add('b', "Prep for bulk import", new CLDRFilter() {
public void handlePath(String xpath) {
if (!options[USER].doesOccur) {
String userID = options[USER].value;
String fullpath = cldrFileToFilter.getFullXPath(xpath);
String value = cldrFileToFilter.getStringValue(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullpath).cloneAsThawed(); // not frozen, for addAttribute
parts.addAttribute("draft", "unconfirmed");
parts.addAttribute("alt", "proposed-u" + userID + "-implicit1.8");
String newPath = parts.toString();
replace(fullpath, newPath, value);
fixList.add('c', "Fix transiton from an old currency code to a new one", new CLDRFilter() {
public void handlePath(String xpath) {
String oldCurrencyCode = "VEF";
String newCurrencyCode = "VES";
int fromDate = 2008;
int toDate = 2018;
String leadingParenString = " (";
String trailingParenString = ")";
String separator = "\u2013";
String languageTag = "root";
if (xpath.indexOf("/currency[@type=\"" + oldCurrencyCode + "\"]/displayName") < 0) {
String value = cldrFileToFilter.getStringValue(xpath);
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
String newFullXPath = fullXPath.replace(oldCurrencyCode, newCurrencyCode);
cldrFileToFilter.add(newFullXPath, value);
// Exceptions for locales that use an alternate numbering system or a different format for the dates at
// the end.
// Add additional ones as necessary
String localeID = cldrFileToFilter.getLocaleID();
if (localeID.equals("ne")) {
languageTag = "root-u-nu-deva";
} else if (localeID.equals("bn")) {
languageTag = "root-u-nu-beng";
} else if (localeID.equals("ar")) {
leadingParenString = " - ";
trailingParenString = "";
} else if (localeID.equals("fa")) {
languageTag = "root-u-nu-arabext";
separator = Utility.unescape(" \\u062A\\u0627 ");
NumberFormat nf = NumberFormat.getInstance(ULocale.forLanguageTag(languageTag));
String tagString = leadingParenString + nf.format(fromDate) + separator + nf.format(toDate)
+ trailingParenString;
replace(fullXPath, fullXPath, value + tagString);
fixList.add('p', "input-processor", new CLDRFilter() {
private DisplayAndInputProcessor inputProcessor;
public void handleStart() {
inputProcessor = new DisplayAndInputProcessor(cldrFileToFilter, true);
public void handleEnd() {
inputProcessor = null; // clean up, just in case
public void handlePath(String xpath) {
String value = cldrFileToFilter.getStringValue(xpath);
String newValue = inputProcessor.processInput(xpath, value, null);
if (value.equals(newValue)) {
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
replace(fullXPath, fullXPath, newValue);
fixList.add('t', "Fix missing count values groups", new CLDRFilter() {
public void handlePath(String xpath) {
if (xpath.indexOf("@count=\"other\"") < 0) {
String value = cldrFileToFilter.getStringValue(xpath);
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
String[] missingCounts = { "one" };
for (String count : missingCounts) {
String newFullXPath = fullXPath.replace("other", count);
if (cldrFileToFilter.getWinningValue(newFullXPath) == null) {
add(newFullXPath, value, "Adding missing plural form");
fixList.add('f', "NFC (all but transforms, exemplarCharacters, pc, sc, tc, qc, ic)", new CLDRFilter() {
public void handlePath(String xpath) {
if (xpath.indexOf("/segmentation") >= 0
|| xpath.indexOf("/transforms") >= 0
|| xpath.indexOf("/exemplarCharacters") >= 0
|| xpath.indexOf("/pc") >= 0
|| xpath.indexOf("/sc") >= 0
|| xpath.indexOf("/tc") >= 0
|| xpath.indexOf("/qc") >= 0
|| xpath.indexOf("/ic") >= 0) return;
String value = cldrFileToFilter.getStringValue(xpath);
String nfcValue = Normalizer.compose(value, false);
if (value.equals(nfcValue)) return;
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
replace(fullXPath, fullXPath, nfcValue);
fixList.add('v', "remove illegal codes", new CLDRFilter() {
StandardCodes sc = StandardCodes.make();
String[] codeTypes = { "language", "script", "territory", "currency" };
public void handlePath(String xpath) {
if (xpath.indexOf("/currency") < 0
&& xpath.indexOf("/timeZoneNames") < 0
&& xpath.indexOf("/localeDisplayNames") < 0) return;
XPathParts parts = XPathParts.getFrozenInstance(xpath);
String code;
for (int i = 0; i < codeTypes.length; ++i) {
code = parts.findAttributeValue(codeTypes[i], "type");
if (code != null) {
if (!sc.getGoodAvailableCodes(codeTypes[i]).contains(code)) remove(xpath);
code = parts.findAttributeValue("zone", "type");
if (code != null) {
if (code.indexOf("/GMT") >= 0) remove(xpath);
fixList.add('w', "fix alt='...proposed' when there is no alternative", new CLDRFilter() {
private Set<String> newFullXPathSoFar = new HashSet<>();
public void handlePath(String xpath) {
if (xpath.indexOf("proposed") < 0) return;
String fullXPath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullXPath).cloneAsThawed(); // not frozen, for removeProposed
String newFullXPath = parts.removeProposed().toString();
// now see if there is an uninherited value
String value = cldrFileToFilter.getStringValue(xpath);
String baseValue = cldrFileToFilter.getStringValue(newFullXPath);
if (baseValue != null) {
// if the value AND the fullxpath are the same as what we have, then delete
if (value.equals(baseValue)) {
String baseFullXPath = cldrFileToFilter.getFullXPath(newFullXPath);
if (baseFullXPath.equals(newFullXPath)) {
remove(xpath, "alt=base");
return; // there is, so skip
// there isn't, so modif if we haven't done so already
if (!newFullXPathSoFar.contains(newFullXPath)) {
replace(fullXPath, newFullXPath, value);
fixList.add('S', "add datetimeSkeleton to dateFormat,timeFormat", new CLDRFilter() {
DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
public void handlePath(String xpath) {
// desired xpaths are like
// //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"]
// //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@draft="..."]
// //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."]
// //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@numbers="..."][@draft="..."]
// //ldml/dates/calendars/calendar[@type="..."]/dateFormats/dateFormatLength[@type="..."]/dateFormat[@type="standard"]/pattern[@type="standard"][@alt="variant"]
// //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"]
// //ldml/dates/calendars/calendar[@type="..."]/timeFormats/timeFormatLength[@type="..."]/timeFormat[@type="standard"]/pattern[@type="standard"][@draft="..."]
if (xpath.indexOf("/dateFormat[@type=\"standard\"]/pattern") < 0 && xpath.indexOf("/timeFormat[@type=\"standard\"]/pattern") < 0) {
String patternValue = cldrFileToFilter.getStringValue(xpath);
String skeletonValue = patternValue;
if (!patternValue.equals("↑↑↑")) {
skeletonValue = dateTimePatternGenerator.getSkeleton(patternValue);
if (skeletonValue == null || skeletonValue.length() < 1) {
show("empty skeleton for datetime pattern \"" + patternValue + "\"", "path " + xpath);
String patternFullXPath = cldrFileToFilter.getFullXPath(xpath);
// Replace pattern[@type="standard"] with datetimeSkeleton, preserve other attributes (including numbers per TC discussion).
// Note that for the alt="variant" patterns there are corresponding alt="variant" availableFormats that must be used.
String skeletonFullXPath = patternFullXPath.replace("/pattern[@type=\"standard\"]", "/datetimeSkeleton"); // .replaceAll("\\[@numbers=\"[^\"]+\"\\]", "")
add(skeletonFullXPath, skeletonValue, "create datetimeSkeleton from dateFormat/pattern or timeFormat/pattern");
* Fix id to be identical to skeleton
* Eliminate any single-field ids
* Add "L" (stand-alone month), "?" (other stand-alones)
* Remove any fields with both a date and a time
* Test that datetime format is valid format (will have to fix by hand)
* Map k, K to H, h
* In Survey Tool: don't show id; compute when item added or changed
* test validity
fixList.add('d', "fix dates", new CLDRFilter() {
DateTimePatternGenerator dateTimePatternGenerator = DateTimePatternGenerator.getEmptyInstance();
DateTimePatternGenerator.FormatParser formatParser = new DateTimePatternGenerator.FormatParser();
Map<String, Set<String>> seenSoFar = new HashMap<>();
public void handleStart() {
public void handlePath(String xpath) {
if (xpath.contains("timeFormatLength") && xpath.contains("full")) {
String fullpath = cldrFileToFilter.getFullXPath(xpath);
String value = cldrFileToFilter.getStringValue(xpath);
boolean gotChange = false;
List<Object> list = formatParser.set(value).getItems();
for (int i = 0; i < list.size(); ++i) {
Object item = list.get(i);
if (item instanceof DateTimePatternGenerator.VariableField) {
String itemString = item.toString();
if (itemString.charAt(0) == 'z') {
list.set(i, new VariableField(Utility.repeat("v", itemString.length())));
gotChange = true;
if (gotChange) {
String newValue = toStringWorkaround();
if (value != newValue) {
replace(xpath, fullpath, newValue);
if (xpath.indexOf("/availableFormats") < 0) {
String value = cldrFileToFilter.getStringValue(xpath);
if (value == null) {
return; // not in current file
String fullpath = cldrFileToFilter.getFullXPath(xpath);
XPathParts fullparts = XPathParts.getFrozenInstance(fullpath);
Map<String, String> attributes = fullparts.findAttributes("dateFormatItem");
String id = attributes.get("id");
String oldID = id;
try {
id = dateTimePatternGenerator.getBaseSkeleton(id);
if (id.equals(oldID)) {
System.out.println(oldID + " => " + id);
} catch (RuntimeException e) {
id = "[error]";
attributes.put("id", id);
replace(xpath, fullparts.toString(), value);
private String toStringWorkaround() {
StringBuffer result = new StringBuffer();
List<Object> items = formatParser.getItems();
for (int i = 0; i < items.size(); ++i) {
Object item = items.get(i);
if (item instanceof String) {
result.append(formatParser.quoteLiteral((String) items.get(i)));
} else {
return result.toString();
fixList.add('y', "fix years to be y (with exceptions)", new CLDRFilter() {
DateTimeCanonicalizer dtc = new DateTimeCanonicalizer(true);
Map<String, Set<String>> seenSoFar = new HashMap<>();
public void handleStart() {
public void handlePath(String xpath) {
DateTimePatternType datetimePatternType = DateTimePatternType.fromPath(xpath);
// check to see if we need to change the value
if (!DateTimePatternType.STOCK_AVAILABLE_INTERVAL_PATTERNS.contains(datetimePatternType)) {
String oldValue = cldrFileToFilter.getStringValue(xpath);
String value = dtc.getCanonicalDatePattern(xpath, oldValue, datetimePatternType);
String fullPath = cldrFileToFilter.getFullXPath(xpath);
if (value.equals(oldValue)) {
// made it through the gauntlet, so replace
replace(xpath, fullPath, value);
// This should only be applied to specific locales, and the results checked manually afterward.
// It will only create ranges using the same digits as in root, not script-specific digits.
// Any pre-existing year ranges should use the range marker from the intervalFormats "y" item.
// This make several assumptions and is somewhat *FRAGILE*.
fixList.add('j', "add year ranges from root to Japanese calendar eras", new CLDRFilter() {
private CLDRFile rootFile;
public void handleStart() {
rootFile = factory.make("root", false);
public void handlePath(String xpath) {
// Skip paths we don't care about
if (xpath.indexOf("/calendar[@type=\"japanese\"]/eras/era") < 0) return;
// Get root name for the era, check it
String rootEraValue = rootFile.getStringValue(xpath);
int rootEraIndex = rootEraValue.indexOf(" (");
if (rootEraIndex < 0) return; // this era does not have a year range in root, no need to add one in this
// locale
// Get range marker from intervalFormat range for y
String yearIntervalFormat = cldrFileToFilter
if (yearIntervalFormat == null) return; // oops, no intervalFormat data for y
String rangeMarker = yearIntervalFormat.replaceAll("[.y\u5E74\uB144]", ""); // *FRAGILE* strip out
// everything except the
// range-indicating part
// Get current locale name for this era, check it
String eraValue = cldrFileToFilter.getStringValue(xpath);
if (eraValue.indexOf('(') >= 0 && eraValue.indexOf(rangeMarker) >= 0) return; // this eraValue already
// has a year range that
// uses the appropriate
// rangeMarker
// Now update the root year range it with the rangeMarker for this locale, and append it to this
// locale's name
String rootYearRange = rootEraValue.substring(rootEraIndex);
String appendYearRange = rootYearRange.replaceAll("[\u002D\u2013]", rangeMarker);
String newEraValue = eraValue.concat(appendYearRange);
String fullpath = cldrFileToFilter.getFullXPath(xpath);
replace(xpath, fullpath, newEraValue);
fixList.add('r', "fix references and standards", new CLDRFilter() {
int currentRef = 500;
Map<String, TreeMap<String, String>> locale_oldref_newref = new TreeMap<>();
TreeMap<String, String> oldref_newref;
public void handleStart() {
String locale = cldrFileToFilter.getLocaleID();
oldref_newref = locale_oldref_newref.get(locale);
if (oldref_newref == null) {
oldref_newref = new TreeMap<>();
locale_oldref_newref.put(locale, oldref_newref);
public void handlePath(String xpath) {
// must be minimized for this to work.
String fullpath = cldrFileToFilter.getFullXPath(xpath);
if (!fullpath.contains("reference")) return;
String value = cldrFileToFilter.getStringValue(xpath);
XPathParts fullparts = XPathParts.getFrozenInstance(fullpath).cloneAsThawed(); // can't be frozen
if ("reference".equals(fullparts.getElement(-1))) {
fixType(value, "type", fullpath, fullparts);
} else if (fullparts.getAttributeValue(-1, "references") != null) {
fixType(value, "references", fullpath, fullparts);
} else {
System.out.println("CLDRModify: Skipping: " + xpath);
* @param value
* @param type
* @param oldFullPath
* @param fullparts the XPathParts -- must not be frozen, for addAttribute
private void fixType(String value, String type, String oldFullPath, XPathParts fullparts) {
String ref = fullparts.getAttributeValue(-1, type);
if (whitespace.containsSome(ref)) {
throw new IllegalArgumentException("Whitespace in references");
String newRef = getNewRef(ref);
fullparts.addAttribute(type, newRef);
replace(oldFullPath, fullparts.toString(), value);
private String getNewRef(String ref) {
String newRef = oldref_newref.get(ref);
if (newRef == null) {
newRef = String.valueOf(currentRef++);
newRef = "R" + Utility.repeat("0", (3 - newRef.length())) + newRef;
oldref_newref.put(ref, newRef);
return newRef;
fixList.add('q', "fix annotation punctuation", new CLDRFilter() {
public void handlePath(String xpath) {
if (!xpath.contains("/annotation")) {
String fullpath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullpath);
String cp = parts.getAttributeValue(2, "cp");
String tts = parts.getAttributeValue(2, "tts");
String type = parts.getAttributeValue(2, "type");
if ("tts".equals(type)) {
return; // ok, skip
parts = parts.cloneAsThawed();
String hex = "1F600";
if (cp.startsWith("[")) {
UnicodeSet us = new UnicodeSet(cp);
if (us.size() == 1) {
cp = us.iterator().next();
hex = Utility.hex(cp);
} else {
hex = us.toString();
parts.putAttributeValue(2, "cp", cp);
parts.removeAttribute(2, "tts");
if (tts != null) {
String newTts = CldrUtility.join(COMMA_SEMI.splitToList(tts), ", ");
XPathParts parts2 = parts.cloneAsThawed();
parts2.putAttributeValue(2, "type", "tts");
add(parts2.toString(), newTts, "separate tts");
String value = cldrFileToFilter.getStringValue(xpath);
String newValue = CldrUtility.join(COMMA_SEMI.splitToList(value), " | ");
final String newFullPath = parts.toString();
Comments comments = cldrFileToFilter.getXpath_comments();
String comment = comments.removeComment(CommentType.PREBLOCK, xpath);
comment = hex + (comment == null ? "" : " " + comment);
comments.addComment(CommentType.PREBLOCK, newFullPath, comment);
if (!fullpath.equals(newFullPath) || !value.equals(newValue)) {
replace(fullpath, newFullPath, newValue);
fixList.add('Q', "add annotation names to keywords", new CLDRFilter() {
Set<String> available = Annotations.getAllAvailable();
TreeSet<String> sorted = new TreeSet<>(Collator.getInstance(ULocale.ROOT));
CLDRFile resolved;
public void handleStart() {
String localeID = cldrFileToFilter.getLocaleID();
if (!available.contains(localeID)) {
throw new IllegalArgumentException("no annotations available, probably wrong directory");
resolved = factory.make(localeID, true);
public void handlePath(String xpath) {
if (!xpath.contains("/annotation")) {
// <annotation cp="💯">100 | honderd | persent | telling | vol</annotation>
// <annotation cp="💯" type="tts">honderd punte</annotation>
// we will copy honderd punte into the list of keywords.
String fullpath = cldrFileToFilter.getFullXPath(xpath);
XPathParts parts = XPathParts.getFrozenInstance(fullpath);
String type = parts.getAttributeValue(2, "type");
if (type == null) {
return; // no TTS, so keywords, skip
XPathParts keywordParts = parts.cloneAsThawed().removeAttribute(2, "type");
String keywordPath = keywordParts.toString();
String rawKeywordValue = cldrFileToFilter.getStringValue(keywordPath);
// skip if keywords AND name are inherited
if (rawKeywordValue == null || rawKeywordValue.equals(CldrUtility.INHERITANCE_MARKER)) {
String rawName = cldrFileToFilter.getStringValue(xpath);
if (rawName == null || rawName.equals(CldrUtility.INHERITANCE_MARKER)) {
// skip if the name is not above root
String nameSourceLocale = resolved.getSourceLocaleID(xpath, null);
if ("root".equals(nameSourceLocale) || XMLSource.CODE_FALLBACK_ID.equals(nameSourceLocale)) {
String name = resolved.getStringValue(xpath);
String keywordValue = resolved.getStringValue(keywordPath);
String sourceLocaleId = resolved.getSourceLocaleID(keywordPath, null);
List<String> items;
if (!sourceLocaleId.equals(XMLSource.ROOT_ID) && !sourceLocaleId.equals(XMLSource.CODE_FALLBACK_ID)) {
items = Annotations.splitter.splitToList(keywordValue);
String newKeywordValue = Joiner.on(" | ").join(sorted);
if (!newKeywordValue.equals(keywordValue)) {
replace(keywordPath, keywordPath, newKeywordValue);
fixList.add('N', "add number symbols to exemplars", new CLDRFilter() {
CLDRFile resolved;
UnicodeSet numberStuff = new UnicodeSet();
Set<String> seen = new HashSet<>();
Set<String> hackAllowOnly = new HashSet<>();
boolean skip = false;
public void handleStart() {
String localeID = cldrFileToFilter.getLocaleID();
resolved = factory.make(localeID, true);
skip = localeID.equals("root");
// TODO add return value to handleStart to skip calling handlePath
for (NumberingSystem system : NumberingSystem.values()) {
String numberingSystem = system.path == null ? "latn" : cldrFileToFilter.getStringValue(system.path);
if (numberingSystem != null) {
public void handlePath(String xpath) {
// the following doesn't work without NUMBER_SYSTEM_HACK, because there are spurious numbersystems in the data.
// so using a hack for now in handleEnd
if (skip || !xpath.startsWith("//ldml/numbers/symbols")) {
// //ldml/numbers/symbols[@numberSystem="latn"]/exponential
XPathParts parts = XPathParts.getFrozenInstance(xpath);
String system = parts.getAttributeValue(2, "numberSystem");
if (system == null) {
System.err.println("Bogus numberSystem:\t" + cldrFileToFilter.getLocaleID() + " \t" + xpath);
} else if (seen.contains(system) || !hackAllowOnly.contains(system)) {
UnicodeSet exemplars = resolved.getExemplarsNumeric(system);
System.out.println("# " + system + " ==> " + exemplars.toPattern(false));
for (String s : exemplars) {
numberStuff.addAll(s); // add individual characters
public void handleEnd() {
if (!numberStuff.isEmpty()) {
UnicodeSet current = cldrFileToFilter.getExemplarSet(ExemplarType.numbers, WinningChoice.WINNING);
if (!numberStuff.equals(current)) {
DisplayAndInputProcessor daip = new DisplayAndInputProcessor(cldrFileToFilter);
if (current != null && !current.isEmpty()) {
String path = CLDRFile.getExemplarPath(ExemplarType.numbers);
String value = daip.getPrettyPrinter().format(numberStuff);
replace(path, path, value);
"fix according to -k config file. Details on",
new CLDRFilter() {
private Map<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> locale2keyValues;
private LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = new LinkedHashSet<>();
public void handleStart() {
if (!options[FIX].doesOccur || !options[FIX].value.equals("k")) {
if (locale2keyValues == null) {
// set up for the specific locale we are dealing with.
// a small optimization
String localeId = getLocaleID();
for (Entry<ConfigMatch, LinkedHashSet<Map<ConfigKeys, ConfigMatch>>> localeMatcher : locale2keyValues
.entrySet()) {
if (localeMatcher.getKey().matches(localeId)) {
System.out.println("# Checking entries & changing:\t" + keyValues.size());
for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
ConfigMatch action = entry.get(ConfigKeys.action);
ConfigMatch pathMatch = entry.get(ConfigKeys.path);
ConfigMatch valueMatch = entry.get(ConfigKeys.value);
ConfigMatch newPath = entry.get(ConfigKeys.new_path);
ConfigMatch newValue = entry.get(ConfigKeys.new_value);
switch (action.action) {
// we add all the values up front
case addNew:
case add:
if (pathMatch != null || valueMatch != null || newPath == null || newValue == null) {
throw new IllegalArgumentException(
"Bad arguments, must have non-null for one of:" +
"path, value, new_path, new_value "
+ ":\n\t"
+ entry);
String newPathString = newPath.getPath(getResolved());
if (action.action == ConfigAction.add
|| cldrFileToFilter.getStringValue(newPathString) == null) {
replace(newPathString, newPathString, newValue.exactMatch, "config");
// we just check
case replace:
if ((pathMatch == null && valueMatch == null) || (newPath == null && newValue == null)) {
throw new IllegalArgumentException(
"Bad arguments, must have " +
"(path!=null OR value=null) AND (new_path!=null OR new_value!=null):\n\t"
+ entry);
// For delete, we just check; we'll remove later
case delete:
if (newPath != null || newValue != null) {
throw new IllegalArgumentException("Bad arguments, must have " +
"newPath=null, newValue=null"
+ entry);
default: // fall through
throw new IllegalArgumentException("Internal Error");
private void fillCache() {
locale2keyValues = new LinkedHashMap<>();
String configFileName = options[KONFIG].value;
FileProcessor myReader = new FileProcessor() {
doHash = false;
protected boolean handleLine(int lineCount, String line) {
line = line.trim();
String[] lineParts = line.split("\\s*;\\s*");
Map<ConfigKeys, ConfigMatch> keyValue = new EnumMap<>(
for (String linePart : lineParts) {
int pos = linePart.indexOf('=');
if (pos < 0) {
throw new IllegalArgumentException(lineCount + ":\t No = in command: «" + linePart + "» in " + line);
ConfigKeys key = ConfigKeys.valueOf(linePart.substring(0, pos).trim());
if (keyValue.containsKey(key)) {
throw new IllegalArgumentException("Must not have multiple keys: " + key);
String match = linePart.substring(pos + 1).trim();
keyValue.put(key, new ConfigMatch(key, match));
final ConfigMatch locale = keyValue.get(ConfigKeys.locale);
if (locale == null || keyValue.get(ConfigKeys.action) == null) {
throw new IllegalArgumentException();
// validate new path
LinkedHashSet<Map<ConfigKeys, ConfigMatch>> keyValues = locale2keyValues
if (keyValues == null) {
keyValues = new LinkedHashSet<>());
return true;
myReader.process(CLDRModify.class, configFileName);
public void handlePath(String xpath) {
// slow method; could optimize
for (Map<ConfigKeys, ConfigMatch> entry : keyValues) {
ConfigMatch pathMatch = entry.get(ConfigKeys.path);
if (pathMatch != null && !pathMatch.matches(xpath)) {
ConfigMatch valueMatch = entry.get(ConfigKeys.value);
String value = cldrFileToFilter.getStringValue(xpath);
if (valueMatch != null && !valueMatch.matches(value)) {
ConfigMatch action = entry.get(ConfigKeys.action);
switch (action.action) {
case delete:
remove(xpath, "config");
case replace:
ConfigMatch newPath = entry.get(ConfigKeys.new_path);
ConfigMatch newValue = entry.get(ConfigKeys.new_value);
String fullpath = cldrFileToFilter.getFullXPath(xpath);
String draft = "";
int loc = fullpath.indexOf("[@draft=");
if (loc >= 0) {
int loc2 = fullpath.indexOf(']', loc + 7);
draft = fullpath.substring(loc, loc2 + 1);
String modPath = ConfigMatch.getModified(pathMatch, xpath, newPath) + draft;
String modValue = ConfigMatch.getModified(valueMatch, value, newValue);
replace(xpath, modPath, modValue, "config");
fixList.add('i', "fix Identical Children");
fixList.add('o', "check attribute validity");
Goal is: if value in vxml is ^^^, then add ^^^ to trunk IFF
(a) if there is no value in trunk
(b) the value in trunk = bailey.
fixList.add('^', "add inheritance-marked items from vxml to trunk", new CLDRFilter() {
Factory VxmlFactory;
final ArrayList<File> fileList = new ArrayList<>();
public void handleStart() {
if (fileList.isEmpty()) {
for (String top : Arrays.asList("common/", "seed/")) {
//for (String leaf : Arrays.asList("main/", "annotations/")) {
String leaf = sourceInput.contains("annotations") ? "annotations/" : "main/";
String key = top + leaf;
fileList.add(new File(CLDRPaths.AUX_DIRECTORY + "voting/" + CLDRFile.GEN_VERSION + "/vxml/" + key));
VxmlFactory = SimpleFactory.make(fileList.toArray(new File[fileList.size()]), ".*");
String localeID = cldrFileToFilter.getLocaleID();
CLDRFile vxmlCommonMainFile;
try {
vxmlCommonMainFile = VxmlFactory.make(localeID, false);
} catch (Exception e) {
System.out.println("#ERROR: VXML file not found for " + localeID + " in " + fileList);
CLDRFile resolved = cldrFileToFilter;
if (!cldrFileToFilter.isResolved()) {
resolved = factory.make(cldrFileToFilter.getLocaleID(), true);
for (String xpath : vxmlCommonMainFile) {
String vxmlValue = vxmlCommonMainFile.getStringValue(xpath);
if (vxmlValue == null) {
if (!CldrUtility.INHERITANCE_MARKER.equals(vxmlValue)) {
String trunkValue = resolved.getStringValue(xpath);
if (trunkValue != null) {
String baileyValue = resolved.getBaileyValue(xpath, null, null);
if (!trunkValue.equals(baileyValue)) {
// at this point, the vxmlValue is ^^^ and the trunk value is either null or == baileyValue
String fullPath = resolved.getFullXPath(xpath); // get the draft status, etc.
if (fullPath == null) { // debugging
fullPath = vxmlCommonMainFile.getFullXPath(xpath);
if (fullPath == null) {
throw new ICUException("getFullXPath not working for " + localeID + ", " + xpath);
add(fullPath, vxmlValue, "Add or replace by " + CldrUtility.INHERITANCE_MARKER);
public void handlePath(String xpath) {
// Everything done in handleStart
fixList.add('L', "fix logical groups by adding all the bailey values", new CLDRFilter() {
Set<String> seen = new HashSet<>();
CLDRFile resolved;
boolean skip;
CoverageLevel2 coverageLeveler;
public void handleStart() {
resolved = getResolved();
skip = false;
coverageLeveler = null;
String localeID = cldrFileToFilter.getLocaleID();
LanguageTagParser ltp = new LanguageTagParser().set(localeID);
if (!ltp.getRegion().isEmpty() || !ltp.getVariants().isEmpty()) {
skip = true;
} else {
coverageLeveler = CoverageLevel2.getInstance(localeID);
public void handlePath(String xpath) {
if (skip
|| seen.contains(xpath)
|| coverageLeveler.getLevel(xpath) == Level.COMPREHENSIVE) {
Set<String> paths = LogicalGrouping.getPaths(cldrFileToFilter, xpath);
if (paths == null || paths.size() < 2) {
Set<String> needed = new LinkedHashSet<>();
for (String path2 : paths) {
if (path2.equals(xpath)) {
if (cldrFileToFilter.isHere(path2)) {
if (LogicalGrouping.isOptional(cldrFileToFilter, path2)) {
// ok, we have a path missing a value
if (needed.isEmpty()) {
// we need at least one value
// flesh out by adding a bailey value
// TODO resolve the draft status in a better way
// For now, get the lowest draft status, and we'll reset everything to that.
DraftStatus worstStatus = DraftStatus.contributed; // don't ever add an approved.
for (String path2 : paths) {
XPathParts parts = XPathParts.getFrozenInstance(path2);
String rawStatus = parts.getAttributeValue(-1, "draft");
if (rawStatus == null) {
DraftStatus df = DraftStatus.forString(rawStatus);
if (df.compareTo(worstStatus) < 0) {
worstStatus = df;
for (String path2 : paths) {
String fullPath = resolved.getFullXPath(path2);
String value = resolved.getStringValue(path2);
if (LogicalGrouping.isOptional(cldrFileToFilter, path2)
&& !cldrFileToFilter.isHere(path2)) {
XPathParts fullparts = XPathParts.getFrozenInstance(fullPath).cloneAsThawed(); // not frozen, for setAttribute
fullparts.setAttribute(-1, "draft", worstStatus.toString());
replace(fullPath, fullparts.toString(), value, "Fleshing out bailey to " + worstStatus);
public static String getLast2Dirs(File sourceDir1) {
String[] pathElements = sourceDir1.toString().split("/");
return pathElements[pathElements.length-2] + "/" + pathElements[pathElements.length-1] + "/";
// references=""
private static class ValuePair {
String value;
String fullxpath;
* Find the set of xpaths that
* (a) have all the same values (if present) in the children
* (b) are absent in the parent,
* (c) are different than what is in the fully resolved parent
* and add them.
static void fixIdenticalChildren(Factory cldrFactory, CLDRFile k, CLDRFile replacements) {
String key = k.getLocaleID();
if (key.equals("root")) return;
Set<String> availableChildren = cldrFactory.getAvailableWithParent(key, true);
if (availableChildren.size() == 0) return;
Set<String> skipPaths = new HashSet<>();
Map<String, ValuePair> haveSameValues = new TreeMap<>();
CLDRFile resolvedFile = cldrFactory.make(key, true);
// get only those paths that are not in "root"
// first, collect all the paths
for (String locale : availableChildren) {
if (locale.indexOf("POSIX") >= 0) continue;
CLDRFile item = cldrFactory.make(locale, false);
for (String xpath : item) {
if (skipPaths.contains(xpath)) continue;
// skip certain elements
if (xpath.indexOf("/identity") >= 0) continue;
if (xpath.startsWith("//ldml/numbers/currencies/currency")) continue;
if (xpath.startsWith("//ldml/dates/timeZoneNames/metazone[")) continue;
if (xpath.indexOf("[@alt") >= 0) continue;
if (xpath.indexOf("/alias") >= 0) continue;
// must be string vale
ValuePair v1 = new ValuePair();
v1.value = item.getStringValue(xpath);
v1.fullxpath = item.getFullXPath(xpath);
ValuePair vAlready = haveSameValues.get(xpath);
if (vAlready == null) {
haveSameValues.put(xpath, v1);
} else if (!v1.value.equals(vAlready.value) || !v1.fullxpath.equals(vAlready.fullxpath)) {
// at this point, haveSameValues is all kosher, so add items
for (String xpath : haveSameValues.keySet()) {
ValuePair v = haveSameValues.get(xpath);
// if (v.value.equals(resolvedFile.getStringValue(xpath))
// && v.fullxpath.equals(resolvedFile.getFullXPath(xpath))) continue;
replacements.add(v.fullxpath, v.value);
static void fixAltProposed() {
throw new IllegalArgumentException();
* Perform various fixes
* TODO add options to pick which one.
* @param options
* @param config
* @param cldrFactory
private static void fix(CLDRFile k, String inputOptions, String config, Factory cldrFactory) {
// TODO before modifying, make sure that it is fully resolved.
// then minimize against the NEW parents
Set<String> removal = new TreeSet<>(k.getComparator());
CLDRFile replacements = SimpleFactory.makeFile("temp");
fixList.setFile(k, inputOptions, cldrFactory, removal, replacements);
for (String xpath : k) {
// remove bad attributes
if (inputOptions.indexOf('v') >= 0) {
CLDRTest.checkAttributeValidity(k, null, removal);
// raise identical elements
if (inputOptions.indexOf('i') >= 0) {
fixIdenticalChildren(cldrFactory, k, replacements);
// now do the actions we collected
if (removal.size() != 0 || !replacements.isEmpty()) {
if (!removal.isEmpty()) {
for (String path : removal) {
System.out.println(path + " =\t " + k.getStringValue(path));
if (!replacements.isEmpty()) {
System.out.println(replacements.toString().replaceAll("\u00A0", "<NBSP>"));
if (removal.size() != 0) {
k.removeAll(removal, COMMENT_REMOVALS);
k.putAll(replacements, CLDRFile.MERGE_REPLACE_MINE);
* Internal
public static void testJavaSemantics() {
Collator caseInsensitive = Collator.getInstance(ULocale.ROOT);
Set<String> setWithCaseInsensitive = new TreeSet<>(caseInsensitive);
setWithCaseInsensitive.addAll(Arrays.asList(new String[] { "a", "b", "c" }));
Set<String> plainSet = new TreeSet<>();
plainSet.addAll(Arrays.asList(new String[] { "a", "b", "B" }));
System.out.println("S1 equals S2?\t" + setWithCaseInsensitive.equals(plainSet));
System.out.println("S2 equals S1?\t" + plainSet.equals(setWithCaseInsensitive));
System.out.println("S1 removeAll S2 is empty?\t" + setWithCaseInsensitive.isEmpty());