| package org.unicode.cldr.tool; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.PrintWriter; |
| import java.io.UncheckedIOException; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.HashSet; |
| import java.util.LinkedHashSet; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.TreeSet; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import org.unicode.cldr.tool.Option.Options; |
| import org.unicode.cldr.tool.Option.Params; |
| import org.unicode.cldr.util.*; |
| |
| import com.google.common.collect.HashMultimap; |
| import com.google.common.collect.ImmutableSet; |
| import com.google.common.collect.Multimap; |
| import com.google.common.collect.Sets; |
| import com.google.common.collect.TreeMultimap; |
| import com.google.common.io.Files; |
| import com.ibm.icu.util.Output; |
| |
| public class GenerateProductionData { |
| static boolean DEBUG = false; |
| static boolean VERBOSE = false; |
| static Matcher FILE_MATCH = null; |
| |
| static String SOURCE_COMMON_DIR = null; |
| static String DEST_COMMON_DIR = null; |
| |
| static boolean ADD_LOGICAL_GROUPS = false; |
| static boolean ADD_DATETIME = false; |
| static boolean ADD_SIDEWAYS = false; |
| static boolean ADD_ROOT = false; |
| static boolean INCLUDE_COMPREHENSIVE = false; |
| static boolean CONSTRAINED_RESTORATION = false; |
| |
| static final Set<String> NON_XML = ImmutableSet.of("dtd", "properties", "testData", "uca"); |
| static final Set<String> COPY_ANYWAY = ImmutableSet.of("casing", "collation"); // don't want to "clean up", makes format difficult to use |
| static final SupplementalDataInfo SDI = CLDRConfig.getInstance().getSupplementalDataInfo(); |
| |
| static final Multimap<String, Pair<String, String>> localeToSubdivisionsToMigrate = TreeMultimap.create(); |
| |
| enum MyOptions { |
| sourceDirectory(new Params() |
| .setHelp("source common directory") |
| .setDefault(CLDRPaths.COMMON_DIRECTORY) |
| .setMatch(".*")), |
| destinationDirectory(new Params() |
| .setHelp("destination common directory") |
| .setDefault(CLDRPaths.STAGING_DIRECTORY + "production/common") |
| .setMatch(".*")), |
| logicalGroups(new Params() |
| .setHelp("add path/values for logical groups") |
| .setDefault("true") |
| .setMatch("true|false")), |
| time(new Params() |
| .setHelp("add path/values for stock date/time/datetime") |
| .setDefault("true") |
| .setMatch("true|false")), |
| Sideways(new Params() |
| .setHelp("add path/values for sideways inheritance") |
| .setDefault("true") |
| .setMatch("true|false")), |
| root(new Params() |
| .setHelp("add path/values for root and code-fallback") |
| .setDefault("true") |
| .setMatch("true|false")), |
| constrainedRestoration(new Params() |
| .setHelp("only add inherited paths that were in original file") |
| .setDefault("true") |
| .setMatch("true|false")), |
| includeComprehensive(new Params() |
| .setHelp("exclude comprehensive paths — otherwise just to modern level") |
| .setDefault("true") |
| .setMatch("true|false")), |
| verbose(new Params() |
| .setHelp("verbose debugging messages")), |
| Debug(new Params() |
| .setHelp("debug")), |
| fileMatch(new Params() |
| .setHelp("regex to match patterns") |
| .setMatch(".*")), |
| ; |
| |
| // BOILERPLATE TO COPY |
| final Option option; |
| |
| private MyOptions(Params params) { |
| option = new Option(this, params); |
| } |
| |
| private static Options myOptions = new Options(); |
| static { |
| for (MyOptions option : MyOptions.values()) { |
| myOptions.add(option, option.option); |
| } |
| } |
| |
| private static Set<String> parse(String[] args, boolean showArguments) { |
| return myOptions.parse(MyOptions.values()[0], args, true); |
| } |
| } |
| |
| public static void main(String[] args) { |
| // TODO rbnf and segments don't have modern coverage; fix there. |
| |
| MyOptions.parse(args, true); |
| SOURCE_COMMON_DIR = MyOptions.sourceDirectory.option.getValue(); |
| DEST_COMMON_DIR = MyOptions.destinationDirectory.option.getValue(); |
| |
| // debugging |
| VERBOSE = MyOptions.verbose.option.doesOccur(); |
| DEBUG = MyOptions.Debug.option.doesOccur(); |
| String fileMatch = MyOptions.fileMatch.option.getValue(); |
| if (fileMatch != null) { |
| FILE_MATCH = Pattern.compile(fileMatch).matcher(""); |
| } |
| |
| // controls for minimization |
| ADD_LOGICAL_GROUPS = "true".equalsIgnoreCase(MyOptions.logicalGroups.option.getValue()); |
| ADD_DATETIME = "true".equalsIgnoreCase(MyOptions.time.option.getValue()); |
| ADD_SIDEWAYS = "true".equalsIgnoreCase(MyOptions.Sideways.option.getValue()); |
| ADD_ROOT = "true".equalsIgnoreCase(MyOptions.root.option.getValue()); |
| |
| // constraints |
| INCLUDE_COMPREHENSIVE = "true".equalsIgnoreCase(MyOptions.includeComprehensive.option.getValue()); |
| CONSTRAINED_RESTORATION = "true".equalsIgnoreCase(MyOptions.constrainedRestoration.option.getValue()); |
| |
| // get directories |
| |
| Arrays.asList(DtdType.values()) |
| .parallelStream() |
| .unordered() |
| .forEach(type -> { |
| boolean isLdmlDtdType = type == DtdType.ldml; |
| |
| // bit of a hack, using the ldmlICU — otherwise unused! — to get the nonXML files. |
| Set<String> directories = (type == DtdType.ldmlICU) ? NON_XML : type.directories; |
| |
| for (String dir : directories) { |
| File sourceDir = new File(SOURCE_COMMON_DIR, dir); |
| File destinationDir = new File(DEST_COMMON_DIR, dir); |
| Stats stats = new Stats(); |
| copyFilesAndReturnIsEmpty(sourceDir, destinationDir, null, isLdmlDtdType, stats); |
| } |
| }); |
| // should be called from the main thread. Synchronizing to document. |
| if (!localeToSubdivisionsToMigrate.isEmpty()) { |
| System.err.println("WARNING: Subdivision files not written, " + localeToSubdivisionsToMigrate.size() + " entries\n" + |
| "For locales: " + localeToSubdivisionsToMigrate.keySet()); |
| for (Entry<String, Pair<String, String>> entry : localeToSubdivisionsToMigrate.entries()) { |
| System.err.println(entry.getKey() + " \t" + entry.getValue()); |
| } |
| } |
| } |
| |
| private static class Stats { |
| long files; |
| long removed; |
| long retained; |
| long remaining; |
| Stats clear() { |
| files = removed = retained = remaining = 0; |
| return this; |
| } |
| @Override |
| public String toString() { |
| return |
| "files=" + files |
| + (removed + retained + remaining == 0 ? "" |
| : "; removed=" + removed |
| + "; retained=" + retained |
| + "; remaining=" + remaining); |
| } |
| public void showNonZero(String label) { |
| if (removed + retained + remaining != 0) { |
| System.out.println(label + toString()); |
| } |
| } |
| } |
| |
| /** |
| * Copy files in directories, recursively. |
| * @param sourceFile |
| * @param destinationFile |
| * @param factory |
| * @param isLdmlDtdType |
| * @param stats |
| * @param hasChildren |
| * @return true if the file is an ldml file with empty content. |
| */ |
| private static boolean copyFilesAndReturnIsEmpty(File sourceFile, File destinationFile, |
| Factory factory, boolean isLdmlDtdType, final Stats stats) { |
| if (sourceFile.isDirectory()) { |
| |
| System.out.println(sourceFile + " => " + destinationFile); |
| if (!destinationFile.mkdirs()) { |
| // if created, remove old contents |
| Arrays.stream(destinationFile.listFiles()).forEach(File::delete); |
| } |
| |
| Set<String> sorted = new TreeSet<>(); |
| sorted.addAll(Arrays.asList(sourceFile.list())); |
| |
| if (COPY_ANYWAY.contains(sourceFile.getName())) { // special cases |
| isLdmlDtdType = false; |
| } |
| // reset factory for directory |
| factory = null; |
| if (isLdmlDtdType) { |
| // if the factory is empty, then we just copy files |
| factory = Factory.make(sourceFile.toString(), ".*"); |
| } |
| boolean isMainDir = factory != null && sourceFile.getName().contentEquals("main"); |
| boolean isRbnfDir = factory != null && sourceFile.getName().contentEquals("rbnf"); |
| boolean isAnnotationsDir = factory != null && sourceFile.getName().startsWith("annotations"); |
| |
| Set<String> emptyLocales = new HashSet<>(); |
| final Stats stats2 = new Stats(); |
| final Factory theFactory = factory; |
| final boolean isLdmlDtdType2 = isLdmlDtdType; |
| sorted |
| .parallelStream() |
| .forEach(file -> { |
| File sourceFile2 = new File(sourceFile, file); |
| File destinationFile2 = new File(destinationFile, file); |
| if (VERBOSE) System.out.println("\t" + file); |
| |
| // special step to just copy certain files like main/root.xml file |
| Factory currFactory = theFactory; |
| if (isMainDir) { |
| if (file.equals("root.xml")) { |
| currFactory = null; |
| } |
| } else if (isRbnfDir) { |
| currFactory = null; |
| } |
| |
| // when the currFactory is null, we just copy files as-is |
| boolean isEmpty = copyFilesAndReturnIsEmpty(sourceFile2, destinationFile2, currFactory, isLdmlDtdType2, stats2); |
| if (isEmpty) { // only happens for ldml |
| emptyLocales.add(file.substring(0,file.length()-4)); // remove .xml for localeId |
| } |
| }); |
| stats2.showNonZero("\tTOTAL:\t"); |
| // if there are empty ldml files, AND we aren't in /main/, |
| // then remove any without children |
| if (!emptyLocales.isEmpty() && !isMainDir) { |
| Set<String> childless = getChildless(emptyLocales, factory.getAvailable(), isAnnotationsDir); |
| if (!childless.isEmpty()) { |
| if (VERBOSE) System.out.println("\t" + destinationFile + "\tRemoving empty locales:" + childless); |
| childless.stream().forEach(locale -> new File(destinationFile, locale + ".xml").delete()); |
| } |
| } |
| return false; |
| } else if (factory != null) { |
| String file = sourceFile.getName(); |
| if (!file.endsWith(".xml")) { |
| return false; |
| } |
| String localeId = file.substring(0, file.length()-4); |
| if (FILE_MATCH != null) { |
| if (!FILE_MATCH.reset(localeId).matches()) { |
| return false; |
| } |
| } |
| boolean isRoot = localeId.equals("root"); |
| String directoryName = sourceFile.getParentFile().getName(); |
| boolean isSubdivisionDirectory = "subdivisions".equals(directoryName); |
| |
| CLDRFile cldrFileUnresolved = factory.make(localeId, false); |
| CLDRFile cldrFileResolved = factory.make(localeId, true); |
| boolean gotOne = false; |
| Set<String> toRemove = new TreeSet<>(); // TreeSet just makes debugging easier |
| Set<String> toRetain = new TreeSet<>(); |
| Output<String> pathWhereFound = new Output<>(); |
| Output<String> localeWhereFound = new Output<>(); |
| |
| boolean isArabicSpecial = localeId.equals("ar") || localeId.startsWith("ar_"); |
| |
| String debugPath = null; // "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"power-kilowatt\"]/displayName"; |
| String debugLocale = "af"; |
| |
| for (String xpath : cldrFileUnresolved) { |
| if (xpath.startsWith("//ldml/identity")) { |
| continue; |
| } |
| if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { |
| int debug = 0; |
| } |
| |
| String value = cldrFileUnresolved.getStringValue(xpath); |
| if (value == null || CldrUtility.INHERITANCE_MARKER.equals(value)) { |
| toRemove.add(xpath); |
| continue; |
| } |
| |
| // special-case the root values that are only for Survey Tool use |
| |
| if (isRoot) { |
| if (AnnotationUtil.pathIsAnnotation(xpath)) { |
| toRemove.add(xpath); |
| continue; |
| } |
| } |
| |
| // special case for Arabic defaultNumberingSystem |
| if (isArabicSpecial && xpath.contains("/defaultNumberingSystem")) { |
| toRetain.add(xpath); |
| } |
| |
| // remove items that are the same as their bailey values. This also catches Inheritance Marker |
| |
| String bailey = cldrFileResolved.getBaileyValue(xpath, pathWhereFound, localeWhereFound); |
| if (value.equals(bailey) |
| && (!ADD_SIDEWAYS |
| || pathEqualsOrIsAltVariantOf(xpath, pathWhereFound.value)) |
| && (!ADD_ROOT |
| || (!Objects.equals(XMLSource.ROOT_ID, localeWhereFound.value) |
| && !Objects.equals(XMLSource.CODE_FALLBACK_ID, localeWhereFound.value)))) { |
| toRemove.add(xpath); |
| continue; |
| } |
| |
| // Move subdivisions elsewhere |
| if (!isSubdivisionDirectory && xpath.startsWith("//ldml/localeDisplayNames/subdivisions/subdivision")) { |
| synchronized(localeToSubdivisionsToMigrate) { |
| localeToSubdivisionsToMigrate.put(localeId, Pair.of(xpath, value)); |
| } |
| toRemove.add(xpath); |
| continue; |
| } |
| // remove level=comprehensive (under setting) |
| |
| if (!INCLUDE_COMPREHENSIVE) { |
| Level coverage = SDI.getCoverageLevel(xpath, localeId); |
| if (coverage == Level.COMPREHENSIVE) { |
| toRemove.add(xpath); |
| continue; |
| } |
| } |
| |
| // if we got all the way to here, we have a non-empty result |
| |
| // check to see if we might need to flesh out logical groups |
| // TODO Should be done in the converter tool!! |
| if (ADD_LOGICAL_GROUPS && !LogicalGrouping.isOptional(cldrFileResolved, xpath)) { |
| Set<String> paths = LogicalGrouping.getPaths(cldrFileResolved, xpath); |
| if (paths != null && paths.size() > 1) { |
| for (String possiblePath : paths) { |
| // Unclear from API whether we need to do this filtering |
| if (!LogicalGrouping.isOptional(cldrFileResolved, possiblePath)) { |
| toRetain.add(possiblePath); |
| } |
| } |
| } |
| } |
| |
| // check to see if we might need to flesh out datetime. |
| // TODO Should be done in the converter tool!! |
| if (ADD_DATETIME && isDateTimePath(xpath)) { |
| toRetain.addAll(dateTimePaths(xpath)); |
| } |
| |
| // past the gauntlet |
| gotOne = true; |
| } |
| |
| // we even add empty files, but can delete them back on the directory level. |
| try (PrintWriter pw = new PrintWriter(destinationFile)) { |
| CLDRFile outCldrFile = cldrFileUnresolved.cloneAsThawed(); |
| if (isSubdivisionDirectory) { |
| synchronized (localeToSubdivisionsToMigrate) { |
| Collection<Pair<String, String>> path_values = localeToSubdivisionsToMigrate.get(localeId); |
| if (path_values != null) { |
| for (Pair<String, String>path_value : path_values) { |
| outCldrFile.add(path_value.getFirst(), path_value.getSecond()); |
| } |
| localeToSubdivisionsToMigrate.removeAll(localeId); |
| } |
| } |
| } |
| |
| // Remove paths, but pull out the ones to retain |
| // example: |
| // toRemove == {a b c} // c may have ^^^ value |
| // toRetain == {b c d} // d may have ^^^ value |
| |
| if (DEBUG) { |
| showIfNonZero(localeId, "removing", toRemove); |
| showIfNonZero(localeId, "retaining", toRetain); |
| |
| } |
| if (CONSTRAINED_RESTORATION) { |
| toRetain.retainAll(toRemove); // only add paths that were there already |
| // toRetain == {b c} |
| if (DEBUG) { |
| showIfNonZero(localeId, "constrained retaining", toRetain); |
| } |
| } |
| |
| boolean changed0 = toRemove.removeAll(toRetain); |
| // toRemove == {a} |
| if (DEBUG && changed0) { |
| showIfNonZero(localeId, "final removing", toRemove); |
| } |
| |
| boolean changed = toRetain.removeAll(toRemove); |
| // toRetain = {b c d} or if constrained, {b c} |
| if (DEBUG && changed) { |
| showIfNonZero(localeId, "final retaining", toRetain); |
| } |
| |
| outCldrFile.removeAll(toRemove, false); |
| if (DEBUG) { |
| for (String xpath : toRemove) { |
| System.out.println(localeId + ": removing: «" |
| + cldrFileUnresolved.getStringValue(xpath) |
| + "», " + xpath); |
| } |
| } |
| |
| // now set any null values to bailey values if not present |
| for (String xpath : toRetain) { |
| if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { |
| int debug = 0; |
| } |
| String value = cldrFileResolved.getStringValue(xpath); |
| if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { |
| throw new IllegalArgumentException(localeId + ": " + value + " in value for " + xpath); |
| } else { |
| if (DEBUG) { |
| String oldValue = cldrFileUnresolved.getStringValue(xpath); |
| System.out.println("Restoring: «" + oldValue + "» ⇒ «" + value |
| + "»\t" + xpath); |
| } |
| outCldrFile.add(xpath, value); |
| } |
| } |
| |
| // double-check results |
| int count = 0; |
| for (String xpath : outCldrFile) { |
| if (debugPath != null && localeId.equals(debugLocale) && xpath.equals(debugPath)) { |
| int debug = 0; |
| } |
| String value = outCldrFile.getStringValue(xpath); |
| if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { |
| throw new IllegalArgumentException(localeId + ": " + value + " in value for " + xpath); |
| } |
| } |
| |
| outCldrFile.write(pw); |
| ++stats.files; |
| stats.removed += toRemove.size(); |
| stats.retained += toRetain.size(); |
| stats.remaining += count; |
| } catch (FileNotFoundException e) { |
| throw new UncheckedIOException("Can't copy " + sourceFile + " to " + destinationFile + " — ", e); |
| } |
| return !gotOne; |
| } else { |
| if (FILE_MATCH != null) { |
| String file = sourceFile.getName(); |
| int dotPos = file.lastIndexOf('.'); |
| String baseName = dotPos >= 0 ? file.substring(0, file.length()-dotPos) : file; |
| if (!FILE_MATCH.reset(baseName).matches()) { |
| return false; |
| } |
| } |
| // for now, just copy |
| ++stats.files; |
| copyFiles(sourceFile, destinationFile); |
| return false; |
| } |
| } |
| |
| private static void showIfNonZero(String localeId, String title, Set<String> toRemove) { |
| if (toRemove.size() != 0) { |
| System.out.println(localeId + ": " |
| + title |
| + ": " + toRemove.size()); |
| } |
| } |
| |
| private static boolean pathEqualsOrIsAltVariantOf(String desiredPath, String foundPath) { |
| if (desiredPath.equals(foundPath)) { |
| return true; |
| } |
| if (desiredPath.contains("type=\"en_GB\"") && desiredPath.contains("alt=")) { |
| int debug = 0; |
| } |
| if (foundPath == null || foundPath.equals(GlossonymConstructor.PSEUDO_PATH)) { |
| // We can do this, because the bailey value has already been checked. |
| // Since it isn't null, a null or PSEUDO_PATH indicates a constructed alt value. |
| return true; |
| } |
| XPathParts desiredPathParts = XPathParts.getFrozenInstance(desiredPath); |
| XPathParts foundPathParts = XPathParts.getFrozenInstance(foundPath); |
| if (desiredPathParts.size() != foundPathParts.size()) { |
| return false; |
| } |
| for (int e = 0; e < desiredPathParts.size(); ++e) { |
| String element1 = desiredPathParts.getElement(e); |
| String element2 = foundPathParts.getElement(e); |
| if (!element1.equals(element2)) { |
| return false; |
| } |
| Map<String, String> attr1 = desiredPathParts.getAttributes(e); |
| Map<String, String> attr2 = foundPathParts.getAttributes(e); |
| if (attr1.equals(attr2)) { |
| continue; |
| } |
| Set<String> keys1 = attr1.keySet(); |
| Set<String> keys2 = attr2.keySet(); |
| for (String attr : Sets.union(keys1, keys2)) { |
| if (attr.equals("alt")) { |
| continue; |
| } |
| if (!Objects.equals(attr1.get(attr), attr2.get(attr))) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| private static boolean isDateTimePath(String xpath) { |
| return xpath.startsWith("//ldml/dates/calendars/calendar") |
| && xpath.contains("FormatLength[@type="); |
| } |
| |
| /** generate full dateTimePaths from any element |
| //ldml/dates/calendars/calendar[@type="gregorian"]/dateFormats/dateFormatLength[@type=".*"]/dateFormat[@type="standard"]/pattern[@type="standard"] |
| //ldml/dates/calendars/calendar[@type="gregorian"]/timeFormats/timeFormatLength[@type=".*"]/timeFormat[@type="standard"]/pattern[@type="standard"] |
| //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/dateTimeFormatLength[@type=".*"]/dateTimeFormat[@type="standard"]/pattern[@type="standard"] |
| */ |
| private static Set<String> dateTimePaths(String xpath) { |
| LinkedHashSet<String> result = new LinkedHashSet<>(); |
| String prefix = xpath.substring(0,xpath.indexOf(']') + 2); // get after ]/ |
| for (String type : Arrays.asList("date", "time", "dateTime")) { |
| String pattern = prefix + "$XFormats/$XFormatLength[@type=\"$Y\"]/$XFormat[@type=\"standard\"]/pattern[@type=\"standard\"]".replace("$X", type); |
| for (String width : Arrays.asList("full", "long", "medium", "short")) { |
| result.add(pattern.replace("$Y", width)); |
| } |
| } |
| return result; |
| } |
| |
| private static Set<String> getChildless(Set<String> emptyLocales, Set<String> available, boolean isAnnotationsDir) { |
| // first build the parent2child map |
| Multimap<String,String> parent2child = HashMultimap.create(); |
| for (String locale : available) { |
| String parent = LocaleIDParser.getParent(locale); |
| if (parent != null) { |
| parent2child.put(parent, locale); |
| } |
| if (isAnnotationsDir) { |
| String simpleParent = LocaleIDParser.getParent(locale, true); |
| if (simpleParent != null && (parent == null || simpleParent != parent)) { |
| parent2child.put(simpleParent, locale); |
| } |
| } |
| } |
| |
| // now cycle through the empties |
| Set<String> result = new HashSet<>(); |
| for (String empty : emptyLocales) { |
| if (allChildrenAreEmpty(empty, emptyLocales, parent2child)) { |
| result.add(empty); |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Recursively checks that all children are empty (including that there are no children) |
| * @param name |
| * @param emptyLocales |
| * @param parent2child |
| * @return |
| */ |
| private static boolean allChildrenAreEmpty( |
| String locale, |
| Set<String> emptyLocales, |
| Multimap<String, String> parent2child) { |
| |
| Collection<String> children = parent2child.get(locale); |
| for (String child : children) { |
| if (!emptyLocales.contains(child)) { |
| return false; |
| } |
| if (!allChildrenAreEmpty(child, emptyLocales, parent2child)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| private static void copyFiles(File sourceFile, File destinationFile) { |
| try { |
| Files.copy(sourceFile, destinationFile); |
| } catch (IOException e) { |
| System.err.println("Can't copy " + sourceFile + " to " + destinationFile + " — " + e); |
| } |
| } |
| } |