blob: 0b86dbee231d54abcf9faa8de6e15b19f111aaba [file] [log] [blame]
package org.unicode.cldr.tool;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.test.CoverageLevel2;
import org.unicode.cldr.tool.Option.Options;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.CLDRFile.Status;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.Counter;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.Level;
import org.unicode.cldr.util.PathHeader;
import org.unicode.cldr.util.PathHeader.BaseUrl;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.StandardCodes;
import com.google.common.collect.ImmutableSet;
import com.ibm.icu.dev.util.CollectionUtilities;
import com.ibm.icu.util.Output;
public class SearchCLDR {
// private static final int
// HELP1 = 0,
// HELP2 = 1,
// SOURCEDIR = 2,
// MATCH_FILE = 3,
// MATCH_PATH = 4,
// MATCH_VALUE = 5,
// SHOW_PATH = 6,
// SHOW_PARENT_VALUE = 7,
// SHOW_ENGLISH_VALUE = 8
// ;
// private static final UOption[] options = {
// UOption.HELP_H(),
// UOption.HELP_QUESTION_MARK(),
// UOption.SOURCEDIR().setDefault(CldrUtility.MAIN_DIRECTORY),
// UOption.create("localematch", 'l', UOption.REQUIRES_ARG).setDefault(".*"),
// UOption.create("pathmatch", 'p', UOption.REQUIRES_ARG).setDefault(".*"),
// UOption.create("valuematch", 'v', UOption.REQUIRES_ARG).setDefault(".*"),
// UOption.create("showPath", 'z', UOption.NO_ARG),
// UOption.create("showParentValue", 'q', UOption.NO_ARG),
// UOption.create("showEnglishValue", 'e', UOption.NO_ARG),
// };
// static final String HELP_TEXT1 = "Use the following options" + XPathParts.NEWLINE
// + "-h or -?\t for this message" + XPathParts.NEWLINE
// + "-"+options[SOURCEDIR].shortName + "\t source directory. Default = -s" +
// CldrUtility.getCanonicalName(CldrUtility.MAIN_DIRECTORY) + XPathParts.NEWLINE
// + "\tExample:-sC:\\Unicode-CVS2\\cldr\\common\\gen\\source\\" + XPathParts.NEWLINE
// + "-l<regex>\t to restrict the locales to what matches <regex>" + XPathParts.NEWLINE
// + "-p<regex>\t to restrict the paths to what matches <regex>" + XPathParts.NEWLINE
// + "-v<regex>\t to restrict the values to what matches <regex>" + XPathParts.NEWLINE
// + "\t Remember to put .* on the front and back of any regex if you want to find any occurence."
// + "-s\t show path"
// + "-s\t show parent value"
// + "-s\t show English value"
// ;
final static Options myOptions = new Options()
.add("source", ".*", CLDRPaths.MAIN_DIRECTORY, "source directory")
.add("file", ".*", ".*", "regex to filter files/locales.")
.add("path", ".*", null, "regex to filter paths. ! in front selects items that don't match. example: -p relative.*@type=\\\"-?3\\\"")
.add("value", ".*", null, "regex to filter values. ! in front selects items that don't match")
.add("level", ".*", null, "regex to filter levels. ! in front selects items that don't match")
.add("count", null, null, "only count items")
.add("organization", ".*", null, "show level for organization")
.add("z-showPath", null, null, "show paths")
.add("resolved", null, null, "use resolved locales")
.add("q-showParent", null, null, "show parent value")
.add("english", null, null, "show english value")
.add("Verbose", null, null, "verbose output")
.add("PathHeader", null, null, "show path header and string ID");
private static String fileMatcher;
private static Matcher pathMatcher;
private static boolean countOnly;
private static boolean showPath;
private static PathHeader.Factory PATH_HEADER_FACTORY = null;
private static String organization;
public static void main(String[] args) {
myOptions.parse(args, true);
// System.out.println("Arguments: " + CollectionUtilities.join(args, " "));
long startTime = System.currentTimeMillis();
String sourceDirectory = myOptions.get("source").getValue();
Output<Boolean> exclude = new Output<Boolean>();
fileMatcher = myOptions.get("file").getValue();
pathMatcher = getMatcher(myOptions.get("path").getValue(), exclude);
Boolean pathExclude = exclude.value;
Set<Level> levelMatcher = getEnumMatcher(myOptions.get("level").getValue(), exclude);
Matcher valueMatcher = getMatcher(myOptions.get("value").getValue(), exclude);
Boolean valueExclude = exclude.value;
countOnly = myOptions.get("count").doesOccur();
boolean resolved = myOptions.get("resolved").doesOccur();
showPath = myOptions.get("z-showPath").doesOccur();
organization = myOptions.get("organization").getValue();
if (myOptions.get("PathHeader").doesOccur()) {
PATH_HEADER_FACTORY = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish());
}
boolean showParent = myOptions.get("q-showParent").doesOccur();
boolean showEnglish = myOptions.get("english").doesOccur();
Factory cldrFactory = Factory.make(sourceDirectory, fileMatcher);
Set<String> locales = new TreeSet<String>(cldrFactory.getAvailable());
CLDRFile english = cldrFactory.make("en", true);
PathHeader.Factory pathHeaderFactory = PathHeader.getFactory(english);
System.out.println("Searching...");
System.out.println();
System.out.flush();
// PrettyPath pretty = new PrettyPath();
if (countOnly) {
System.out.print("file");
for (Level cLevel : Level.values()) {
System.out.print("\t" + cLevel);
}
System.out.println();
}
for (String locale : locales) {
Level organizationLevel = organization == null ? null
: StandardCodes.make().getLocaleCoverageLevel(organization, locale);
CLDRFile file = (CLDRFile) cldrFactory.make(locale, resolved);
Counter<Level> levelCounter = new Counter<Level>();
//CLDRFile parent = null;
boolean headerShown = false;
// System.out.println("*Checking " + locale);
CoverageLevel2 level = null;
Level pathLevel = null;
level = CoverageLevel2.getInstance(locale);
Status status = new Status();
Set<PathHeader> sorted = new TreeSet<PathHeader>();
for (String path : file.fullIterable()) {
if (file.getStringValue(path) == null) {
continue;
}
sorted.add(pathHeaderFactory.fromPath(path));
}
for (PathHeader pathHeader : sorted) {
String path = pathHeader.getOriginalPath();
String fullPath = file.getFullXPath(path);
String value = file.getStringValue(path);
if (pathMatcher != null && pathExclude == pathMatcher.reset(fullPath).find()) {
continue;
}
{
pathLevel = level.getLevel(path);
levelCounter.add(pathLevel, 1);
}
if (levelMatcher != null && !levelMatcher.contains(pathLevel)) {
continue;
}
if (valueMatcher != null && valueExclude == valueMatcher.reset(value).find()) {
continue;
}
// made it through the sieve
if (countOnly) {
continue;
}
if (!headerShown) {
showLine(showPath, showParent, showEnglish, resolved, locale, "Path", "Full-Path", "Value",
"PathHeader", "Parent-Value", "English-Value", "Source-Locale\tSource-Path", "Org-Level");
headerShown = true;
}
// if (showParent && parent == null) {
// String parentLocale = LocaleIDParser.getParent(locale);
// parent = cldrFactory.make(parentLocale, true);
// }
// String shortPath = pretty.getPrettyPath(path);
// String cleanShort = pretty.getOutputForm(shortPath);
String cleanShort = pathHeader.toString().replace('\t', '|');
final String resolvedSource = !resolved ? null
: file.getSourceLocaleID(path, status)
+ (path.equals(status.pathWhereFound) ? "\t≣" : "\t" + status);
showLine(showPath, showParent, showEnglish, resolved, locale,
path, fullPath, value,
cleanShort,
!showParent ? null : english.getBaileyValue(path, null, null),
english == null ? null : english.getStringValue(path),
resolvedSource,
Objects.toString(pathLevel));
}
if (countOnly) {
System.out.print(locale);
for (Level cLevel : Level.values()) {
System.out.print("\t" + levelCounter.get(cLevel));
}
System.out.println();
}
System.out.flush();
}
System.out
.println("Done -- Elapsed time: " + ((System.currentTimeMillis() - startTime) / 60000.0) + " minutes");
}
private static void showLine(boolean showPath, boolean showParent, boolean showEnglish,
boolean resolved, String locale, String path, String fullPath, String value,
String shortPath, String parentValue, String englishValue, String resolvedSource, String organizationLevel) {
String pathHeaderInfo = "";
if (PATH_HEADER_FACTORY != null) {
PathHeader pathHeader = PATH_HEADER_FACTORY.fromPath(path);
if (pathHeader != null) {
pathHeaderInfo = "\n\t" + pathHeader
+ "\n\t" + pathHeader.getUrl(BaseUrl.PRODUCTION, locale);
}
}
System.out.println(
locale + "\t⟪" + value + "⟫"
+ (showEnglish ? "\t⟪" + englishValue + "⟫" : "")
+ (!showParent ? "" : CollectionUtilities.equals(value, parentValue) ? "\t≣" : "\t⟪" + parentValue + "⟫")
+ "\t" + shortPath
+ (showPath ? "\t" + fullPath : "")
+ (resolved ? "\t" + resolvedSource : "")
+ (organizationLevel != null ? "\t" + organizationLevel : "")
+ pathHeaderInfo);
}
private static Matcher getMatcher(String property, Output<Boolean> exclude) {
exclude.value = false;
if (property == null) {
return null;
}
if (property.startsWith("!")) {
exclude.value = true;
property = property.substring(1);
}
return PatternCache.get(property).matcher("");
}
private static Set<Level> getEnumMatcher(String property, Output<Boolean> exclude) {
exclude.value = false;
if (property == null) {
return null;
}
if (property.startsWith("!")) {
exclude.value = true;
property = property.substring(1);
}
EnumSet<Level> result = EnumSet.noneOf(Level.class);
Matcher matcher = Pattern.compile(property, Pattern.CASE_INSENSITIVE).matcher("");
for (Level level : Level.values()) {
if (matcher.reset(level.toString()).matches() != exclude.value) {
result.add(level);
}
}
return ImmutableSet.copyOf(result);
}
}