blob: a428985976090cbdd6a7b6880056dd798cb3b0f9 [file] [log] [blame]
package org.unicode.cldr.tool;
import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.tool.Option.Options;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.DraftStatus;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CoverageInfo;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.Organization;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexFileParser;
import org.unicode.cldr.util.RegexFileParser.RegexLineParser;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.StandardCodes;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.XMLSource;
import org.unicode.cldr.util.XPathParts;
import com.ibm.icu.util.Output;
/**
* Factory for filtering CLDRFiles by organization and replacing certain values.
* Organization coverage data is in org/unicode/cldr/util/data/Locales.txt.
*
* @author jchye
*/
public class FilterFactory extends Factory {
/**
* Types of data modification supported.
*/
private enum ModificationType {
xpath, value;
}
private Factory rawFactory;
private String organization;
private SupplementalDataInfo supplementalData;
private boolean modifyValues;
private List<Modifier> modifiers = new ArrayList<Modifier>();
/**
* Creates a new Factory for filtering CLDRFiles.
*
* @param rawFactory
* the factory to be filtered
* @param organization
* the organization that the filtering is catered towards
* @param modifyValues
* true if certain values in the data should be modified or replaced
*/
private FilterFactory(Factory rawFactory, String organization, boolean modifyValues) {
this.rawFactory = rawFactory;
this.organization = organization;
supplementalData = SupplementalDataInfo.getInstance();
setSupplementalDirectory(rawFactory.getSupplementalDirectory());
this.modifyValues = modifyValues;
}
public static FilterFactory load(Factory rawFactory, String organization, boolean usesAltValue) {
FilterFactory filterFactory = new FilterFactory(rawFactory, organization, usesAltValue);
filterFactory.loadModifiers("dataModifiers.txt");
return filterFactory;
}
@Override
public File[] getSourceDirectories() {
return rawFactory.getSourceDirectories();
}
@Override
public List<File> getSourceDirectoriesForLocale(String localeID) {
return rawFactory.getSourceDirectoriesForLocale(localeID);
}
@Override
protected CLDRFile handleMake(String localeID, boolean resolved, DraftStatus minimalDraftStatus) {
if (resolved) {
return new CLDRFile(makeResolvingSource(localeID, minimalDraftStatus));
} else {
return filterCldrFile(localeID, minimalDraftStatus);
}
}
/**
* @return a filtered CLDRFile.
*/
private CLDRFile filterCldrFile(String localeID, DraftStatus minimalDraftStatus) {
CLDRFile rawFile = rawFactory.make(localeID, false, minimalDraftStatus).cloneAsThawed();
filterAltValues(rawFile);
filterCoverage(rawFile);
removeRedundantPaths(rawFile);
return rawFile;
}
/**
* Replaces the value for certain XPaths with their alternate value.
*
* @param rawFile
*/
private void filterAltValues(CLDRFile rawFile) {
if (!modifyValues) return;
for (Modifier modifier : modifiers) {
modifier = modifier.filterLocale(rawFile.getLocaleID());
if (!modifier.isEmpty()) {
modifier.modifyFile(rawFile);
}
}
}
/**
* Filters a CLDRFile according to the specified organization's coverage level.
*
* @param rawFile
*/
private void filterCoverage(CLDRFile rawFile) {
if (organization == null) return;
int minLevel = StandardCodes.make()
.getLocaleCoverageLevel(organization, rawFile.getLocaleID())
.getLevel();
CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
for (String xpath : rawFile) {
// Locale metadata shouldn't be stripped.
int level = covInfo.getCoverageValue(xpath, rawFile.getLocaleID());
if (level > minLevel) {
rawFile.remove(xpath);
}
}
}
/**
* Removes paths with duplicate values that can be found elsewhere in the file.
* @param rawFile
*/
private void removeRedundantPaths(CLDRFile rawFile) {
if (organization == null || rawFile.getLocaleID().equals("root")) return;
String parent = LocaleIDParser.getParent(rawFile.getLocaleID());
CLDRFile resolvedParent = rawFactory.make(parent, true);
List<String> duplicatePaths = new ArrayList<String>();
XPathParts parts = new XPathParts();
for (String xpath : rawFile) {
if (xpath.startsWith("//ldml/identity")) continue;
String value = rawFile.getStringValue(xpath);
// Remove count="x" if the value is equivalent to count="other".
if (xpath.contains("[@count=")) {
parts.set(xpath);
String count = parts.getAttributeValue(-1, "count");
if (!count.equals("other")) {
parts.setAttribute(-1, "count", "other");
String otherPath = parts.toString();
if (value.equals(rawFile.getStringValue(otherPath))) {
duplicatePaths.add(xpath);
continue;
}
}
}
// Remove xpaths with values also found in the parent.
String sourceLocale = resolvedParent.getSourceLocaleID(xpath, null);
if (!sourceLocale.equals(XMLSource.CODE_FALLBACK_ID)) {
String parentValue = resolvedParent.getStringValue(xpath);
if (value.equals(parentValue)) {
duplicatePaths.add(xpath);
}
}
}
for (String xpath : duplicatePaths) {
rawFile.remove(xpath);
}
}
@Override
public DraftStatus getMinimalDraftStatus() {
return rawFactory.getMinimalDraftStatus();
}
@Override
protected Set<String> handleGetAvailable() {
return rawFactory.getAvailable();
}
/**
* Wrapper class for holding information about a value modification entry.
*/
private class ModifierEntry {
String oldValue;
String newValue;
Map<String, String> options;
public ModifierEntry(String oldValue, String newValue, Map<String, String> options) {
this.oldValue = oldValue;
this.newValue = newValue;
this.options = options;
}
/**
* @param locale
* the locale to be matched
* @return true if the locale matches the locale filter in this entry.
*/
public boolean localeMatches(String locale) {
String pattern = options.get("locale");
return pattern == null ? true : locale.matches(pattern);
}
}
/**
* Class for performing a specific type of data modification on a CLDRFile.
*/
private abstract class Modifier {
protected List<ModifierEntry> entries = new ArrayList<ModifierEntry>();
public abstract void modifyFile(CLDRFile file);
public abstract Modifier filterLocale(String locale);
/**
* @return the list of modifiers meant for the specified locale.
*/
protected List<ModifierEntry> getModifiersForLocale(String locale) {
List<ModifierEntry> newFilters = new ArrayList<ModifierEntry>();
for (ModifierEntry filter : entries) {
if (filter.localeMatches(locale)) {
newFilters.add(filter);
}
}
return newFilters;
}
/**
*
* @param filter
*/
public void addModifierEntry(ModifierEntry entry) {
entries.add(entry);
}
public boolean isEmpty() {
return entries.size() == 0;
}
}
/**
* Maps the value of an XPath onto another XPath.
*/
private class PathModifier extends Modifier {
@Override
public void modifyFile(CLDRFile file) {
// For certain alternate values, use them as the main values.
for (ModifierEntry entry : entries) {
String oldPath = entry.oldValue;
String value = file.getStringValue(oldPath);
if (value != null) {
String newPath = entry.newValue;
file.add(newPath, value);
file.remove(oldPath);
}
}
}
@Override
public Modifier filterLocale(String locale) {
PathModifier newModifier = new PathModifier();
newModifier.entries = getModifiersForLocale(locale);
return newModifier;
}
}
/**
* Replaces certain values with other values.
*/
private class ValueModifier extends Modifier {
@Override
public void modifyFile(CLDRFile file) {
// Replace values.
for (ModifierEntry entry : entries) {
String filteringPath = entry.options.get("xpath");
if (filteringPath != null && isValidXPath(filteringPath)) {
// For non-regex XPaths, look them up directly.
String value = file.getStringValue(filteringPath);
if (value != null) {
value = value.replaceAll(entry.oldValue, entry.newValue);
file.add(filteringPath, value);
}
} else {
Iterator<String> iterator = file.iterator();
if (filteringPath != null) {
Matcher matcher = PatternCache.get(filteringPath).matcher("");
iterator = file.iterator(matcher);
}
while (iterator.hasNext()) {
String xpath = iterator.next();
String originalValue = file.getStringValue(xpath);
String value = originalValue.replaceAll(entry.oldValue, entry.newValue);
if (!value.equals(originalValue)) {
file.add(xpath, value);
}
}
}
}
}
@Override
public Modifier filterLocale(String locale) {
ValueModifier newModifier = new ValueModifier();
newModifier.entries = getModifiersForLocale(locale);
return newModifier;
}
}
/**
* Maps the value of XPaths onto other XPaths using regexes.
*/
private class PathRegexModifier extends Modifier {
private RegexLookup<String> xpathLookup = new RegexLookup<String>();
@Override
public void addModifierEntry(ModifierEntry entry) {
super.addModifierEntry(entry);
xpathLookup.add(entry.oldValue, entry.newValue);
}
@Override
public void modifyFile(CLDRFile file) {
if (xpathLookup.size() > 0) {
Output<String[]> arguments = new Output<String[]>();
for (String xpath : file) {
String newValue = xpathLookup.get(xpath, null, arguments, null, null);
if (newValue != null) {
String newPath = RegexLookup.replace(newValue, arguments.value);
String value = file.getStringValue(xpath);
file.add(newPath, value);
file.remove(xpath);
}
}
}
}
@Override
public Modifier filterLocale(String locale) {
PathRegexModifier newModifier = new PathRegexModifier();
newModifier.entries = getModifiersForLocale(locale);
for (ModifierEntry entry : newModifier.entries) {
newModifier.xpathLookup.add(entry.oldValue, entry.newValue);
}
return newModifier;
}
}
/**
* Loads modifiers from a specified file.
*/
private void loadModifiers(String filename) {
if (!modifyValues) return;
final Modifier pathModifier = new PathModifier();
final Modifier pathRegexModifier = new PathRegexModifier();
final Modifier valueModifier = new ValueModifier();
RegexFileParser fileParser = new RegexFileParser();
fileParser.setLineParser(new RegexLineParser() {
@Override
public void parse(String line) {
String[] contents = line.split("\\s*+;\\s*+");
ModificationType filterType = ModificationType.valueOf(contents[0]);
String oldValue = contents[1];
String newValue = contents[2];
// Process remaining options.
Map<String, String> options = new HashMap<String, String>();
for (int i = 3; i < contents.length; i++) {
String rawLine = contents[i];
int pos = rawLine.indexOf('=');
if (pos < 0) {
throw new IllegalArgumentException("Invalid option: " + rawLine);
}
String optionType = rawLine.substring(0, pos).trim();
options.put(optionType, rawLine.substring(pos + 1).trim());
}
switch (filterType) {
case xpath:
if (isValidXPath(oldValue)) {
pathModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
} else {
pathRegexModifier.addModifierEntry(new ModifierEntry(fixXPathRegex(oldValue),
newValue, options));
}
break;
case value:
String xpath = options.get("xpath");
if (xpath != null && !isValidXPath(xpath)) {
options.put("xpath", fixXPathRegex(xpath));
}
valueModifier.addModifierEntry(new ModifierEntry(oldValue, newValue, options));
break;
}
}
});
fileParser.parse(FilterFactory.class, filename);
modifiers.add(pathModifier);
modifiers.add(pathRegexModifier);
modifiers.add(valueModifier);
}
private Pattern XPATH_PATTERN = PatternCache.get("/(/\\w++(\\[@\\w++=\"[^\"()%\\\\]+\"])*)++");
/**
* @param path
* @return true if path is a valid XPath and not a regex.
*/
private boolean isValidXPath(String path) {
return XPATH_PATTERN.matcher(path).matches();
}
/**
* Converts an xpath into a proper regex pattern.
*
* @param path
* @return
*/
private String fixXPathRegex(String path) {
return '^' + path.replace("[@", "\\[@");
}
private static final Options options = new Options(
"Filters CLDR XML files according to orgnizational coverage levels and an " +
"input file of replacement values/xpaths.")
// .add("org", 'o', ".*", "google", "The organization that the filtering is for. If set, also removes duplicate paths.")
.add("org", 'o', ".*", Organization.cldr.name(), "The organization that the filtering is for. If set, also removes duplicate paths.")
.add("locales", 'l', ".*", ".*", "A regular expression indicating the locales to be filtered");
/**
* Run FilterFactory for a specific organization.
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
options.parse(args, true);
Factory rawFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, options.get("locales").getValue());
String org = options.get("org").getValue();
FilterFactory filterFactory = FilterFactory.load(rawFactory, org, true);
String outputDir = CLDRPaths.GEN_DIRECTORY + "/filter";
for (String locale : rawFactory.getAvailable()) {
try (PrintWriter out = FileUtilities.openUTF8Writer(outputDir, locale + ".xml");) {
filterFactory.make(locale, false).write(out);
}
// out.close();
}
}
}