blob: af158d5b3cc2d1bf7222d982e3b6b752ba4a13cb [file] [log] [blame]
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.cldr.test.DisplayAndInputProcessor.NumericType;
import org.unicode.cldr.tool.FilterFactory;
import org.unicode.cldr.util.Builder;
import org.unicode.cldr.util.CLDRFile;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Factory;
import org.unicode.cldr.util.LanguageTagParser;
import org.unicode.cldr.util.LocaleIDParser;
import org.unicode.cldr.util.PatternCache;
import org.unicode.cldr.util.RegexLookup;
import org.unicode.cldr.util.RegexLookup.Finder;
import org.unicode.cldr.util.SupplementalDataInfo;
//import org.unicode.cldr.util.SupplementalDataInfo.MeasurementType;
* A mapper that converts locale data from CLDR to the ICU data structure.
* @author jchye
public class LocaleMapper extends Mapper {
* Map for converting enums to their integer values.
private static final Map<String, String> enumMap = Builder.with(new HashMap<String, String>())
.put("titlecase-firstword", "1")
.put("no-change", "0")
private static final Pattern DRAFT_PATTERN = PatternCache.get("\\[@draft=\"\\w+\"]");
private static final Pattern TERRITORY_XPATH = PatternCache.get(
private static final Pattern RB_DATETIMEPATTERN = PatternCache.get(
private SupplementalDataInfo supplementalDataInfo;
// We may use different factories for resolved or unresolved CLDRFiles depending
// on whether filtering is required.
private Factory unresolvedFactory;
private Factory resolvedFactory;
private Factory specialFactory;
private RegexManager manager;
private String debugXPath;
private Set<String> deprecatedTerritories;
* Special hack comparator, so that RB strings come out in the right order.
* This is only important for the order of items in arrays.
private static Comparator<String> comparator = new Comparator<String>() {
private final Pattern CURRENCY_FORMAT = PatternCache.get(
private final Pattern DATE_OR_TIME_FORMAT = PatternCache.get(
private final Pattern MONTH_PATTERN = PatternCache
private final Pattern CONTEXT_TRANSFORM = PatternCache.get(
private final String[] CURRENCY_ORDER = { "symbol", "displayName",
"pattern[@type=\"standard\"]", "decimal", "group" };
* Reverse the ordering of the following:
* //ldml/numbers/currencies/currency[@type="([^"]*)"]/displayName ; curr ; /Currencies/$1
* //ldml/numbers/currencies/currency[@type="([^"]*)"]/symbol ; curr ; /Currencies/$1
* and the following (time/date)
* //ldml/dates/calendars/calendar[@type="([^"]*)"]/(dateFormats|dateTimeFormats|timeFormats)/(?:[^/\[]*)[@type=
* "([^"]*)"]/(?:[^/\[]*)[@type="([^"]*)"]/.* ; locales ; /calendar/$1/DateTimePatterns
public int compare(String arg0, String arg1) {
Matcher[] matchers = new Matcher[2];
if (RegexManager.matches(CURRENCY_FORMAT, arg0, arg1, matchers)) {
// Use ldml ordering except that symbol should be first.
int index0 = getIndexOf(CURRENCY_ORDER, matchers[0].group(1));
int index1 = getIndexOf(CURRENCY_ORDER, matchers[1].group(1));
return index0 - index1;
} else if (RegexManager.matches(DATE_OR_TIME_FORMAT, arg0, arg1, matchers)) {
int compareValue = matchers[0].group(1).compareTo(matchers[1].group(1));
if (compareValue != 0) return -compareValue;
} else if (RegexManager.matches(CONTEXT_TRANSFORM, arg0, arg1, matchers)) {
// Sort uiListOrMenu before stand-alone.
if (matchers[0].group(1).equals(matchers[1].group(1))) {
return -matchers[0].group(2).compareTo(matchers[1].group(2));
} else if (RegexManager.matches(MONTH_PATTERN, arg0, arg1, matchers)) {
// Sort leap year types after normal month types.
String matchGroup0 = matchers[0].group(1);
String matchGroup1 = matchers[1].group(1);
if (matchGroup0 != matchGroup1) {
return matchGroup0 == null && matchGroup1 != null ? -1 : 1;
return CLDRFile.getComparator(DtdType.ldml).compare(arg0, arg1);
* Looks for a string in an array
* @param order
* the array to be searched
* @param key
* the string to be searched for
* @return the index of the string if found, -1 if not found
private static int getIndexOf(String[] order, String key) {
for (int i = 0; i < order.length; i++) {
if (order[i].equals(key)) return i;
return -1;
* LocaleMapper constructor.
* @param factory
* the factory containing the CLDR data to be converted
* @param specialFactory
* a factory containing any additional CLDR data
* @param supplementalDataInfo
* SupplementalDataInfo object
* @param useAltValues
* true if alt path filtering should be performed
* @param organization
* the organization to filter the data by
* (null if coverage filtering is not needed)
public LocaleMapper(Factory factory, Factory specialFactory,
SupplementalDataInfo supplementalDataInfo, boolean useAltValues,
String organization) {
manager = new RegexManager("ldml2icu_locale.txt");
unresolvedFactory = resolvedFactory = factory;
// If filtering is required, filter all unresolved CLDRFiles for use in
// fillFromCldr(). We don't filter the resolved CLDRFiles by organization
// coverage level because
// some rbPaths (e.g. /calendar/x/DateTimePatterns) have a fixed number
// of values that must always be present regardless of filtering.
if (useAltValues || organization != null) {
unresolvedFactory = FilterFactory.load(factory, organization, useAltValues);
resolvedFactory = FilterFactory.load(factory, null, useAltValues);
this.specialFactory = specialFactory;
this.supplementalDataInfo = supplementalDataInfo;
* @return the set of locales available for processing by this mapper
public Set<String> getAvailable() {
return unresolvedFactory.getAvailable();
* @param filename
* @return true if a special XML file with the specified filename is available.
private boolean hasSpecialFile(String filename) {
return specialFactory != null && specialFactory.getAvailable().contains(filename);
* @return the set of deprecated territories to be ignored. Remove when no longer
* present in CLDR data.
private Set<String> getDeprecatedTerritories() {
if (deprecatedTerritories == null) {
deprecatedTerritories = Builder.with(
return deprecatedTerritories;
* Fills an IcuData object using the CLDR data for the specified locale.
* @param locale
* @return the filled IcuData object
public IcuData[] fillFromCldr(String locale) {
Set<String> deprecatedTerritories = getDeprecatedTerritories();
CLDRFile resolvedCldr = resolvedFactory.make(locale, true);
RegexLookup<RegexResult> pathConverter = manager.getPathConverter(resolvedCldr);
// First pass through the unresolved CLDRFile to get all icu paths.
CLDRFile cldr = unresolvedFactory.make(locale, false);
Map<String, CldrArray> pathValueMap = new HashMap<String, CldrArray>();
Set<String> validRbPaths = new HashSet<String>();
for (String xpath : cldr) {
// Territory hacks to be removed once CLDR data is fixed.
Matcher matcher = TERRITORY_XPATH.matcher(xpath);
if (matcher.matches()) {
String country =;
if (deprecatedTerritories.contains(country)) {
// Add rb paths.
Output<Finder> matcherFound = new Output<Finder>();
Output<String[]> firstInfo = new Output<>();
RegexResult regexResult = matchXPath(pathConverter, cldr, xpath, matcherFound, firstInfo);
if (regexResult == null) continue;
// String[] arguments = matcherFound.value.getInfo();
String[] arguments = firstInfo.value;
for (PathValueInfo info : regexResult) {
String rbPath = info.processRbPath(arguments);
// The immediate parent of every path should also exist.
validRbPaths.add(rbPath.substring(0, rbPath.lastIndexOf('/')));
// Get all values from the resolved CLDRFile.
for (String xpath : resolvedCldr) {
// Since the unresolved CLDRFile may have been modified, use it
// to add values instead of the resolved CLDRFile if possible.
CLDRFile fileToUse = cldr.getStringValue(xpath) == null ? resolvedCldr : cldr;
addMatchesForPath(xpath, fileToUse, validRbPaths, pathConverter, pathValueMap);
// Add fallback paths if necessary.
manager.addFallbackValues(resolvedCldr, pathValueMap);
// Add special values to file.
boolean hasSpecial = hasSpecialFile(locale);
if (hasSpecial) {
CLDRFile specialCldrFile = specialFactory.make(locale, false);
for (String xpath : specialCldrFile) {
if (resolvedCldr.isHere(xpath)) continue;
addMatchesForPath(xpath, specialCldrFile, null, pathConverter, pathValueMap);
for (String rbPath : pathValueMap.keySet()) {
// HACK: DateTimePatterns needs a duplicate of the medium
// dateTimeFormat (formerly indicated using dateTimeFormats/default).
// This hack can be removed when ICU no longer requires it.
Matcher matcher = RB_DATETIMEPATTERN.matcher(rbPath);
if (matcher.matches()) {
String calendar =;
CldrArray valueList = RegexManager.getCldrArray(rbPath, pathValueMap);
// Create a dummy xpath to sort the value in front of the other date time formats.
String basePath = "//ldml/dates/calendars/calendar[@type=\"" + calendar + "\"]/dateTimeFormats";
String mediumFormatPath = basePath
+ "/dateTimeFormatLength[@type=\"medium\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]";
getStringValue(resolvedCldr, mediumFormatPath),
// HACK: Fill missing narrow era values with their abbreviated versions.
CldrArray narrowEras = pathValueMap.get("/calendar/japanese/eras/narrow");
CldrArray abbreviatedEras = pathValueMap.get("/calendar/japanese/eras/abbreviated");
if (narrowEras != null && abbreviatedEras != null) {
IcuData icuData = new IcuData("common/main/" + locale + ".xml", locale, true, enumMap);
if (hasSpecial) {
icuData.setFileComment("ICU <specials> source: <path>/common/main/" + locale + ".xml");
fillIcuData(pathValueMap, comparator, icuData);
// More hacks
hackAddExtras(resolvedCldr, locale, icuData);
return new IcuData[] { icuData };
private void fillIcuData(Map<String, CldrArray> pathValueMap,
Comparator<String> comparator, IcuData icuData) {
// Convert values to final data structure.
for (String rbPath : pathValueMap.keySet()) {
icuData.addAll(rbPath, pathValueMap.get(rbPath).sortValues(comparator));
public static String getFullXPath(String xpath, CLDRFile cldrFile) {
String fullPath = cldrFile.getFullXPath(xpath);
return fullPath == null ? xpath : DRAFT_PATTERN.matcher(fullPath).replaceAll("");
* @param cldr
* @param path
* @param matcherFound
* @param firstInfo
* @return the result of converting an xpath into an ICU-style path
private RegexResult matchXPath(RegexLookup<RegexResult> lookup,
CLDRFile cldr, String path,
Output<Finder> matcherFound, Output<String[]> firstInfo) {
String fullPath = cldr.getFullXPath(path);
fullPath = fullPath == null ? path : DRAFT_PATTERN.matcher(fullPath).replaceAll("");
List<String> debugResults = isDebugXPath(fullPath) ? new ArrayList<String>() : null;
Output<String[]> info = new Output<>();
RegexResult result = lookup.get(fullPath, null, info, matcherFound, debugResults);
if (debugResults != null) {
if (result == null) {
RegexManager.printLookupResults(fullPath, debugResults);
} else {
System.out.println(fullPath + " successfully matched");
if (firstInfo != null && info.value != null) {
firstInfo.value = info.value;
return result;
* Attempts to match an xpath and adds the results of a successful match to
* the specified map
* @param xpath
* the xpath to be matched
* @param cldrFile
* the CLDR file to get locale data from
* @param validRbPaths
* the set of valid rbPaths that the result must belong
* to, null if such a requirement does not exist
* @param pathValueMap
* the map that the results will be added to
private void addMatchesForPath(String xpath, CLDRFile cldrFile,
Set<String> validRbPaths, RegexLookup<RegexResult> pathConverter,
Map<String, CldrArray> pathValueMap) {
Output<Finder> matcher = new Output<Finder>();
Output<String[]> firstInfo = new Output<>();
RegexResult regexResult = matchXPath(pathConverter,
cldrFile, xpath, matcher, firstInfo);
if (regexResult == null) return;
// String[] arguments = matcher.value.getInfo();
String[] arguments = firstInfo.value;
String cldrValue = getStringValue(cldrFile, xpath);
for (PathValueInfo info : regexResult) {
String rbPath = info.processRbPath(arguments);
// Don't add additional paths at this stage.
if (validRbPaths != null && !validRbPaths.contains(rbPath)) continue;
CldrArray valueList = RegexManager.getCldrArray(rbPath, pathValueMap);
List<String> values = info.processValues(arguments, cldrValue);
String baseXPath = info.processXPath(arguments, xpath);
String groupKey = info.processGroupKey(arguments);
valueList.put(baseXPath, values, groupKey);
* @param cldrFile
* @param xpath
* @return the value of the specified xpath (fallback or otherwise)
private String getStringValue(CLDRFile cldrFile, String xpath) {
String value = cldrFile.getStringValue(xpath);
// HACK: DAIP doesn't currently make spaces in currency formats non-breaking.
// Remove this when fixed.
if (NumericType.getNumericType(xpath) == NumericType.CURRENCY) {
value = value.replace(' ', '\u00A0');
return value;
* Adds all mappings that couldn't be represented in the ldml2icu.txt file.
* @param cldrResolved
* @param locale
private void hackAddExtras(CLDRFile cldrResolved, String locale, IcuData icuData) {
// Specify parent of non-language locales.
String parent = supplementalDataInfo.getExplicitParentLocale(locale);
if (parent != null) {
icuData.add("/%%Parent", parent);
// <version number="$Revision: 5806 $"/>
String version = cldrResolved.getFullXPath("//ldml/identity/version");
icuData.add("/Version", MapperUtils.formatVersion(version));
// PaperSize:intvector{ 279, 216, } - now in supplemental
// MeasurementSystem:int{1} - now in supplemental
// Default calendar.
String localeID = cldrResolved.getLocaleID();
String calendar = getCalendarIfDifferent(localeID);
if (calendar != null) {
icuData.add("/calendar/default", calendar);
* Returns the default calendar to be used for a locale. If the default
* calendar for the parent locale is the same, null is returned.
private String getCalendarIfDifferent(String localeID) {
String calendar = getCalendar(localeID);
if (calendar == null) return null;
String parent = LocaleIDParser.getParent(localeID);
String parentCalendar = null;
while (parentCalendar == null && parent != null) {
parentCalendar = getCalendar(parent);
parent = LocaleIDParser.getParent(parent);
return calendar.equals(parentCalendar) ? null : calendar;
* Returns the default calendar to be used for a locale, if any.
private String getCalendar(String localeID) {
LanguageTagParser parser = new LanguageTagParser().set(localeID);
String region = localeID.equals("root") ? "001" : parser.getRegion();
if (region.equals("")) {
localeID = supplementalDataInfo.getLikelySubtags().get(parser.getLanguage());
if (localeID == null) {
throw new RuntimeException("Likely subtag not found for " + parser.getLanguage());
region = parser.getRegion();
if (region == null) region = "001";
List<String> calendars = supplementalDataInfo.getCalendars(region);
return calendars == null ? null : calendars.get(0);
//private String getMeasurementToDisplay(String localeID, MeasurementType measurementType) {...} // deleted
* @param localeID
* @param measurementType
* the type of measurement required
* @return the measurement of the specified locale
// private String getMeasurement(String localeID, MeasurementType measurementType) {
// String region = localeID.equals("root") ? "001" : new LanguageTagParser().set(localeID).getRegion();
// Map<MeasurementType, Map<String, String>> regionMeasurementData = supplementalDataInfo
// .getTerritoryMeasurementData();
// Map<String, String> typeMap = regionMeasurementData.get(measurementType);
// return typeMap.get(region);
// } //not used
* Sets xpath to monitor for debugging purposes.
* @param debugXPath
public void setDebugXPath(String debugXPath) {
this.debugXPath = debugXPath;
* @param xpath
* @return true if the xpath is to be debugged
boolean isDebugXPath(String xpath) {
return debugXPath == null ? false : xpath.startsWith(debugXPath);
public Makefile generateMakefile(Collection<String> aliases) {
Makefile makefile = new Makefile("GENRB");
return makefile;