tools/java/org/unicode/cldr/icu/SupplementalMapper.java - platform/external/cldr - Git at Google

 package org.unicode.cldr.icu;

 import java.io.File;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.Date;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 import org.unicode.cldr.icu.RegexManager.CldrArray;
 import org.unicode.cldr.icu.RegexManager.Function;
 import org.unicode.cldr.icu.RegexManager.PathValueInfo;
 import org.unicode.cldr.icu.RegexManager.RegexResult;
 import org.unicode.cldr.util.Builder;
 import org.unicode.cldr.util.CLDRFile;
 import org.unicode.cldr.util.DtdType;
 import org.unicode.cldr.util.Pair;
 import org.unicode.cldr.util.PatternCache;
 import org.unicode.cldr.util.RegexLookup;
 import org.unicode.cldr.util.RegexLookup.Finder;
 import org.unicode.cldr.util.XMLFileReader;
 import org.unicode.cldr.util.XPathParts;

 import com.ibm.icu.text.NumberFormat;
 import com.ibm.icu.text.SimpleDateFormat;
 import com.ibm.icu.util.Calendar;
 import com.ibm.icu.util.GregorianCalendar;
 import com.ibm.icu.util.Output;
 import com.ibm.icu.util.TimeZone;

 /**
  * A mapper that converts supplemental LDML data from CLDR to the ICU data
  * structure.
  */
 public class SupplementalMapper {
     private static final Pattern ARRAY_INDEX = PatternCache.get("(/[^\\[]++)(?:\\[(\\d++)\\])?$");
     private static final Map<String, String> enumMap = Builder.with(new HashMap<String, String>())
         .put("sun", "1").put("mon", "2").put("tues", "3").put("wed", "4")
         .put("thu", "5").put("fri", "6").put("sat", "7").get();
     private static final NumberFormat numberFormat = NumberFormat.getInstance();
     static {
         numberFormat.setMinimumIntegerDigits(4);
     }

     private int fifoCounter;
     private String inputDir;
     private RegexManager regexMapper;
     private String debugXPath;

     private enum DateFieldType {
         from, to;

         public static DateFieldType toEnum(String value) {
             value = value.toLowerCase();
             if (value.equals("from") || value.equals("start")) {
                 return from;
             } else if (value.equals("to") || value.equals("end")) {
                 return to;
             } else {
                 throw new IllegalArgumentException(value + " is not a valid date field type");
             }
         }
     };

     /**
      * Comparator for sorting LDML supplementalData xpaths.
      */
     private static Comparator<String> supplementalComparator = new Comparator<String>() {
         private final Pattern FROM_ATTRIBUTE = PatternCache.get("\\[@from=\"([^\"]++)\"]");
         private final Pattern WEEKDATA = PatternCache.get(
             "//supplementalData/weekData/(minDays|firstDay|weekendStart|weekendEnd).*");

         @Override
         public int compare(String arg0, String arg1) {
             Matcher[] matchers = new Matcher[2];
             String metazone = "//supplementalData/metaZones/metazoneInfo/timezone";
             if (arg0.startsWith(metazone) && arg1.startsWith(metazone)) {
                 int startPos = metazone.length();
                 boolean from0 = FROM_ATTRIBUTE.matcher(arg0).find(startPos);
                 boolean from1 = FROM_ATTRIBUTE.matcher(arg1).find(startPos);
                 if (from0 != from1) {
                     return from0 ? 1 : -1;
                 } else {
                     // CLDRFile.ldmlComparator doesn't always order the from
                     // dates correctly, so use a regular string comparison.
                     return arg0.compareTo(arg1);
                 }
             } else if (RegexManager.matches(WEEKDATA, arg0, arg1, matchers)) {
                 // Sort weekData elements ourselves because ldmlComparator
                 // sorts firstDay after minDays.
                 String elem0 = matchers[0].group(1);
                 String elem1 = matchers[1].group(1);
                 int compareElem = elem0.compareTo(elem1);
                 if (compareElem == 0) return compareElem;
                 if (elem0.equals("weekendEnd")) {
                     return 1;
                 } else if (elem1.equals("weekendEnd")) {
                     return -1;
                 }
                 return compareElem;
             }
             return CLDRFile.getComparator(DtdType.supplementalData).compare(arg0, arg1);
         }
     };

     /**
      * SupplementalMapper constructor.
      *
      * @param inputDir
      *            the directory that the input files are in
      */
     private SupplementalMapper(String inputDir) {
         this.inputDir = inputDir;
     }

     public static SupplementalMapper create(String inputDir) {
         SupplementalMapper mapper = new SupplementalMapper(inputDir);
         // Handlers for functions in regex file.
         RegexManager manager = new RegexManager("ldml2icu_supplemental.txt");
         manager.addFunction("date", new Function(2) {
             /**
              * args[0] = value
              * args[1] = type (i.e. from/to)
              */
             @Override
             protected String run(String... args) {
                 DateFieldType dft = DateFieldType.toEnum(args[1].trim());
                 return getSeconds(args[0], dft);
             }
         });
         manager.addFunction("algorithm", new Function(1) {
             @Override
             protected String run(String... args) {
                 // Insert % into numberingSystems descriptions.
                 String value = args[0];
                 int percentPos = value.lastIndexOf('/') + 1;
                 return value.substring(0, percentPos) + '%' + value.substring(percentPos);
             }
         });
         // Converts a number into a special integer that represents the number in
         // normalized scientific notation for ICU's RB parser.
         // Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
         // offset by 50 and yyyyyy is the coefficient to 5 decimal places, e.g.
         // 14660000000000 -> 1.466E13 -> 63146600
         // 0.0001 -> 1E-4 -> 46100000
         // -123.456 -> -1.23456E-2 -> -48123456
         // args[0] = number to be converted
         // args[2] = an (optional) additional exponent offset,
         // e.g. -2 for converting percentages into fractions.
         manager.addFunction("exp", new Function(2) {
             @Override
             protected String run(String... args) {
                 double value = Double.parseDouble(args[0]);
                 if (value == 0) {
                     return "0";
                 }
                 int exponent = 50;
                 if (args.length == 2) {
                     exponent += Integer.parseInt(args[1]);
                 }
                 String sign = value >= 0 ? "" : "-";
                 value = Math.abs(value);
                 while (value >= 10) {
                     value /= 10;
                     exponent++;
                 }
                 while (value < 1) {
                     value *= 10;
                     exponent--;
                 }
                 if (exponent < 0 || exponent > 99) {
                     throw new IllegalArgumentException("Exponent out of bounds: " + exponent);
                 }
                 return sign + exponent + Math.round(value * 100000);
             }
         });
         mapper.regexMapper = manager;
         return mapper;
     }

     /**
      * Loads an IcuData object of the specified type.
      *
      * @param outputName
      *            the type of data to be converted
      * @return an IcuData object
      */
     public IcuData fillFromCldr(String outputName) {
         Map<String, CldrArray> pathValueMap = new HashMap<String, CldrArray>();
         String category = outputName;
         if (outputName.equals("supplementalData")) {
             String[] categories = {
                 //                "characters", explicitly skipped
 //                "coverageLevels", explicitly skipped
 //                "dayPeriods", done in processSupplemental
 //                "genderList", done elsewhere??
                 "languageInfo",
                 //                "likelySubtags", done elsewhere??
 //                "metaZones", done elsewhere??
 //                "numberingSystems", done elsewhere??
 //                "ordinals", done in processSupplemental
 //                "pluralRanges", done in processSupplemental
 //                "plurals", done in processSupplemental
 //                "postalCodeData", deprecated
                 "supplementalData",
                 "subdivisions",
                 "telephoneCodeData",
                 "/../validity/"
                 //                "windowsZones", done elsewhere??
             };
             for (String cat : categories) {
                 loadValues(cat, pathValueMap);
             }
         } else {
             if (outputName.equals("metadata")) category = "supplementalMetadata";
             loadValues(category, pathValueMap);
         }
         regexMapper.addFallbackValues(pathValueMap);
         IcuData icuData = new IcuData(category + ".xml", outputName, false, enumMap);
         for (String rbPath : pathValueMap.keySet()) {
             CldrArray values = pathValueMap.get(rbPath);
             icuData.addAll(rbPath, values.sortValues(supplementalComparator));
         }
         // Final pass through IcuData object to clean up any fallback rbpaths
         // in the values.
         // Assume one value per fallback path.
         for (String rbPath : icuData) {
             List<String[]> values = icuData.get(rbPath);
             for (int i = 0, len = values.size(); i < len; i++) {
                 String[] valueArray = values.get(i);
                 if (valueArray.length != 1) continue;
                 String value = valueArray[0];
                 Matcher matcher = ARRAY_INDEX.matcher(value);
                 if (!matcher.matches()) continue;
                 String replacePath = matcher.group(1);
                 List<String[]> replaceValues = icuData.get(replacePath);
                 if (replaceValues == null) {
                     throw new RuntimeException(replacePath + " is missing from IcuData object.");
                 }
                 int replaceIndex = matcher.groupCount() > 1 ? Integer.valueOf(matcher.group(2)) : 0;
                 if (replaceIndex >= replaceValues.size()) {
                     throw new RuntimeException(replaceIndex + " out of range of values in " + replacePath);
                 }
                 values.set(i, replaceValues.get(replaceIndex));
             }
         }
         // Hack to add the CLDR version
         if (outputName.equals("supplementalData")) {
             icuData.add("/cldrVersion", CLDRFile.GEN_VERSION);
         }
         return icuData;
     }

     /**
      * Loads values for the specified category from CLDR.
      *
      * @param category
      * @param pathValueMap
      *            the output map
      */
     private void loadValues(String category, Map<String, CldrArray> pathValueMap) {
         if (category.endsWith("/")) {
             File dir = new File(inputDir + category);
             for (File subfile : dir.listFiles()) {
                 String name = subfile.getName();
                 if (name.endsWith(".xml")) {
                     name = name.substring(0, name.length() - 4);
                     loadValues(category + name, pathValueMap);
                 }
             }
             return;
         }
         String inputFile = new File(inputDir, category + ".xml").getAbsolutePath();
         List<Pair<String, String>> contents = new ArrayList<Pair<String, String>>();
         XMLFileReader.loadPathValues(inputFile, contents, true);
         RegexLookup<RegexResult> pathConverter = regexMapper.getPathConverter();
         fifoCounter = 0; // Helps to keep unsorted rb paths in order.
         XPathParts parts = new XPathParts();
         for (Pair<String, String> pair : contents) {
             Output<Finder> matcher = new Output<Finder>();
             String fullPath = parts.set(pair.getFirst()).toString();
             // Only convert contributed or higher data
             if (parts.containsAttributeValue("draft", "provisional") ||
                 parts.containsAttributeValue("draft", "unconfirmed")) {
                 continue;
             }
             List<String> debugResults = isDebugXPath(fullPath) ? new ArrayList<String>() : null;
             Output<String[]> argInfo = new Output<>();
             RegexResult regexResult = pathConverter.get(fullPath, null, argInfo, matcher, debugResults);
             if (regexResult == null) {
                 RegexManager.printLookupResults(fullPath, debugResults);
                 continue;
             }
             if (debugResults != null) {
                 System.out.println(fullPath + " successfully matched");
             }
             // String[] arguments = matcher.value.getInfo();
             String[] arguments = argInfo.value;
             String cldrValue = pair.getSecond();
             for (PathValueInfo info : regexResult) {
                 List<String> values = info.processValues(arguments, cldrValue);
                 // Check if there are any arguments that need splitting for the rbPath.
                 String groupKey = info.processGroupKey(arguments);
                 String baseXPath = info.processXPath(arguments, fullPath);
                 boolean splitNeeded = false;
                 int argIndex = info.getSplitRbPathArg();
                 if (argIndex != -1) {
                     String[] splitArgs = arguments[argIndex].split("\\s++");
                     // Only split the first splittable argument needed for each rbPath.
                     if (splitArgs.length > 1) {
                         String[] newArgs = arguments.clone();
                         for (String splitArg : splitArgs) {
                             newArgs[argIndex] = splitArg;
                             String rbPath = info.processRbPath(newArgs);
                             processValues(baseXPath, rbPath, values, groupKey, pathValueMap);
                         }
                         splitNeeded = true;
                     }
                 }
                 // No splitting required, process as per normal.
                 if (!splitNeeded) {
                     String rbPath = info.processRbPath(arguments);
                     processValues(baseXPath, rbPath, values, groupKey, pathValueMap);
                 }
             }
             fifoCounter++;
         }
     }

     /**
      * Processes values to be added to the ICU data structure
      *
      * @param xpath
      *            the CLDR path that the values came from
      * @param rbPath
      *            the rbPath that the values belong to
      * @param values
      *            the values
      * @param groupKey
      *            the key that the values should be grouped by
      * @param pathValueMap
      *            the output map
      */
     private void processValues(String xpath, String rbPath, List<String> values,
         String groupKey, Map<String, CldrArray> pathValueMap) {
         // The fifo counter needs to be formatted with leading zeros for sorting.
         if (rbPath.contains("<FIFO>")) {
             rbPath = rbPath.replace("<FIFO>", '<' + numberFormat.format(fifoCounter) + '>');
         }
         CldrArray cldrArray = RegexManager.getCldrArray(rbPath, pathValueMap);
         cldrArray.put(xpath, values, groupKey);
     }

     /**
      * Converts a date string to a pair of millisecond values.
      *
      * @param dateStr
      * @return
      */
     private static String getSeconds(String dateStr, DateFieldType type) {
         long millis;
         try {
             millis = getMilliSeconds(dateStr, type);
         } catch (ParseException ex) {
             throw new IllegalArgumentException("Could not parse date: " + dateStr, ex);
         }

         int top = (int) ((millis & 0xFFFFFFFF00000000L) >>> 32); // top
         int bottom = (int) ((millis & 0x00000000FFFFFFFFL)); // bottom

         if (NewLdml2IcuConverter.DEBUG) {
             long bot = 0xffffffffL & bottom;
             long full = ((long) (top) << 32);
             full += bot;
             if (full != millis) {
                 System.err.println("Error when converting " + millis + ": " +
                     top + ", " + bottom + " was converted back into " + full);
             }
         }

         return top + " " + bottom;
     }

     /**
      * Parses a string date and normalizes it depending on what type of date it
      * is.
      *
      * @param dateStr
      * @param type
      *            whether the date is a from or a to
      * @return
      * @throws ParseException
      */
     private static long getMilliSeconds(String dateStr, DateFieldType type)
         throws ParseException {
         int count = countHyphens(dateStr);
         SimpleDateFormat format = new SimpleDateFormat();
         if (count == 2) {
             format.applyPattern("yyyy-MM-dd");
         } else {
             throw new RuntimeException("Tried to parse invalid date: " + dateStr);
         }
         TimeZone timezone = TimeZone.getTimeZone("GMT");
         format.setTimeZone(timezone);
         Date date = format.parse(dateStr);
         Calendar calendar = new GregorianCalendar();
         calendar.setTimeZone(timezone);
         calendar.setTime(date);
         switch (type) {
         case from: {
             // Set the times for to fields to the beginning of the day.
             calendar.set(Calendar.HOUR_OF_DAY, 0);
             calendar.set(Calendar.MINUTE, 0);
             calendar.set(Calendar.SECOND, 0);
             calendar.set(Calendar.MILLISECOND, 0);
             break;
         }
         case to: {
             // Set the times for to fields to the end of the day.
             calendar.set(Calendar.HOUR_OF_DAY, 23);
             calendar.set(Calendar.MINUTE, 59);
             calendar.set(Calendar.SECOND, 59);
             calendar.set(Calendar.MILLISECOND, 999);
             break;
         }
         }
         return calendar.getTimeInMillis();
     }

     /**
      * Counts the number of hyphens in a string.
      *
      * @param str
      * @return
      */
     private static int countHyphens(String str) {
         // Hyphens in front are actually minus signs.
         int lastPos = 0;
         int numHyphens = 0;
         while ((lastPos = str.indexOf('-', lastPos + 1)) > -1) {
             numHyphens++;
         }
         return numHyphens;
     }

     /**
      * Sets xpath to monitor for debugging purposes.
      * @param debugXPath
      */
     public void setDebugXPath(String debugXPath) {
         this.debugXPath = debugXPath;
     }

     /**
      * @param xpath
      * @return true if the xpath is to be debugged
      */
     boolean isDebugXPath(String xpath) {
         return debugXPath == null ? false : xpath.startsWith(debugXPath);
     }
 }
	package org.unicode.cldr.icu;

	import java.io.File;
	import java.text.ParseException;
	import java.util.ArrayList;
	import java.util.Comparator;
	import java.util.Date;
	import java.util.HashMap;
	import java.util.List;
	import java.util.Map;
	import java.util.regex.Matcher;
	import java.util.regex.Pattern;

	import org.unicode.cldr.icu.RegexManager.CldrArray;
	import org.unicode.cldr.icu.RegexManager.Function;
	import org.unicode.cldr.icu.RegexManager.PathValueInfo;
	import org.unicode.cldr.icu.RegexManager.RegexResult;
	import org.unicode.cldr.util.Builder;
	import org.unicode.cldr.util.CLDRFile;
	import org.unicode.cldr.util.DtdType;
	import org.unicode.cldr.util.Pair;
	import org.unicode.cldr.util.PatternCache;
	import org.unicode.cldr.util.RegexLookup;
	import org.unicode.cldr.util.RegexLookup.Finder;
	import org.unicode.cldr.util.XMLFileReader;
	import org.unicode.cldr.util.XPathParts;

	import com.ibm.icu.text.NumberFormat;
	import com.ibm.icu.text.SimpleDateFormat;
	import com.ibm.icu.util.Calendar;
	import com.ibm.icu.util.GregorianCalendar;
	import com.ibm.icu.util.Output;
	import com.ibm.icu.util.TimeZone;

	/**
	* A mapper that converts supplemental LDML data from CLDR to the ICU data
	* structure.
	*/
	public class SupplementalMapper {
	private static final Pattern ARRAY_INDEX = PatternCache.get("(/[^\\[]++)(?:\\[(\\d++)\\])?$");
	private static final Map<String, String> enumMap = Builder.with(new HashMap<String, String>())
	.put("sun", "1").put("mon", "2").put("tues", "3").put("wed", "4")
	.put("thu", "5").put("fri", "6").put("sat", "7").get();
	private static final NumberFormat numberFormat = NumberFormat.getInstance();
	static {
	numberFormat.setMinimumIntegerDigits(4);
	}

	private int fifoCounter;
	private String inputDir;
	private RegexManager regexMapper;
	private String debugXPath;

	private enum DateFieldType {
	from, to;

	public static DateFieldType toEnum(String value) {
	value = value.toLowerCase();
	if (value.equals("from") \|\| value.equals("start")) {
	return from;
	} else if (value.equals("to") \|\| value.equals("end")) {
	return to;
	} else {
	throw new IllegalArgumentException(value + " is not a valid date field type");
	}
	}
	};

	/**
	* Comparator for sorting LDML supplementalData xpaths.
	*/
	private static Comparator<String> supplementalComparator = new Comparator<String>() {
	private final Pattern FROM_ATTRIBUTE = PatternCache.get("\\[@from=\"([^\"]++)\"]");
	private final Pattern WEEKDATA = PatternCache.get(
	"//supplementalData/weekData/(minDays\|firstDay\|weekendStart\|weekendEnd).*");

	@Override
	public int compare(String arg0, String arg1) {
	Matcher[] matchers = new Matcher[2];
	String metazone = "//supplementalData/metaZones/metazoneInfo/timezone";
	if (arg0.startsWith(metazone) && arg1.startsWith(metazone)) {
	int startPos = metazone.length();
	boolean from0 = FROM_ATTRIBUTE.matcher(arg0).find(startPos);
	boolean from1 = FROM_ATTRIBUTE.matcher(arg1).find(startPos);
	if (from0 != from1) {
	return from0 ? 1 : -1;
	} else {
	// CLDRFile.ldmlComparator doesn't always order the from
	// dates correctly, so use a regular string comparison.
	return arg0.compareTo(arg1);
	}
	} else if (RegexManager.matches(WEEKDATA, arg0, arg1, matchers)) {
	// Sort weekData elements ourselves because ldmlComparator
	// sorts firstDay after minDays.
	String elem0 = matchers[0].group(1);
	String elem1 = matchers[1].group(1);
	int compareElem = elem0.compareTo(elem1);
	if (compareElem == 0) return compareElem;
	if (elem0.equals("weekendEnd")) {
	return 1;
	} else if (elem1.equals("weekendEnd")) {
	return -1;
	}
	return compareElem;
	}
	return CLDRFile.getComparator(DtdType.supplementalData).compare(arg0, arg1);
	}
	};

	/**
	* SupplementalMapper constructor.
	*
	* @param inputDir
	* the directory that the input files are in
	*/
	private SupplementalMapper(String inputDir) {
	this.inputDir = inputDir;
	}

	public static SupplementalMapper create(String inputDir) {
	SupplementalMapper mapper = new SupplementalMapper(inputDir);
	// Handlers for functions in regex file.
	RegexManager manager = new RegexManager("ldml2icu_supplemental.txt");
	manager.addFunction("date", new Function(2) {
	/**
	* args[0] = value
	* args[1] = type (i.e. from/to)
	*/
	@Override
	protected String run(String... args) {
	DateFieldType dft = DateFieldType.toEnum(args[1].trim());
	return getSeconds(args[0], dft);
	}
	});
	manager.addFunction("algorithm", new Function(1) {
	@Override
	protected String run(String... args) {
	// Insert % into numberingSystems descriptions.
	String value = args[0];
	int percentPos = value.lastIndexOf('/') + 1;
	return value.substring(0, percentPos) + '%' + value.substring(percentPos);
	}
	});
	// Converts a number into a special integer that represents the number in
	// normalized scientific notation for ICU's RB parser.
	// Resultant integers are in the form -?xxyyyyyy, where xx is the exponent
	// offset by 50 and yyyyyy is the coefficient to 5 decimal places, e.g.
	// 14660000000000 -> 1.466E13 -> 63146600
	// 0.0001 -> 1E-4 -> 46100000
	// -123.456 -> -1.23456E-2 -> -48123456
	// args[0] = number to be converted
	// args[2] = an (optional) additional exponent offset,
	// e.g. -2 for converting percentages into fractions.
	manager.addFunction("exp", new Function(2) {
	@Override
	protected String run(String... args) {
	double value = Double.parseDouble(args[0]);
	if (value == 0) {
	return "0";
	}
	int exponent = 50;
	if (args.length == 2) {
	exponent += Integer.parseInt(args[1]);
	}
	String sign = value >= 0 ? "" : "-";
	value = Math.abs(value);
	while (value >= 10) {
	value /= 10;
	exponent++;
	}
	while (value < 1) {
	value *= 10;
	exponent--;
	}
	if (exponent < 0 \|\| exponent > 99) {
	throw new IllegalArgumentException("Exponent out of bounds: " + exponent);
	}
	return sign + exponent + Math.round(value * 100000);
	}
	});
	mapper.regexMapper = manager;
	return mapper;
	}

	/**
	* Loads an IcuData object of the specified type.
	*
	* @param outputName
	* the type of data to be converted
	* @return an IcuData object
	*/
	public IcuData fillFromCldr(String outputName) {
	Map<String, CldrArray> pathValueMap = new HashMap<String, CldrArray>();
	String category = outputName;
	if (outputName.equals("supplementalData")) {
	String[] categories = {
	// "characters", explicitly skipped
	// "coverageLevels", explicitly skipped
	// "dayPeriods", done in processSupplemental
	// "genderList", done elsewhere??
	"languageInfo",
	// "likelySubtags", done elsewhere??
	// "metaZones", done elsewhere??
	// "numberingSystems", done elsewhere??
	// "ordinals", done in processSupplemental
	// "pluralRanges", done in processSupplemental
	// "plurals", done in processSupplemental
	// "postalCodeData", deprecated
	"supplementalData",
	"subdivisions",
	"telephoneCodeData",
	"/../validity/"
	// "windowsZones", done elsewhere??
	};
	for (String cat : categories) {
	loadValues(cat, pathValueMap);
	}
	} else {
	if (outputName.equals("metadata")) category = "supplementalMetadata";
	loadValues(category, pathValueMap);
	}
	regexMapper.addFallbackValues(pathValueMap);
	IcuData icuData = new IcuData(category + ".xml", outputName, false, enumMap);
	for (String rbPath : pathValueMap.keySet()) {
	CldrArray values = pathValueMap.get(rbPath);
	icuData.addAll(rbPath, values.sortValues(supplementalComparator));
	}
	// Final pass through IcuData object to clean up any fallback rbpaths
	// in the values.
	// Assume one value per fallback path.
	for (String rbPath : icuData) {
	List<String[]> values = icuData.get(rbPath);
	for (int i = 0, len = values.size(); i < len; i++) {
	String[] valueArray = values.get(i);
	if (valueArray.length != 1) continue;
	String value = valueArray[0];
	Matcher matcher = ARRAY_INDEX.matcher(value);
	if (!matcher.matches()) continue;
	String replacePath = matcher.group(1);
	List<String[]> replaceValues = icuData.get(replacePath);
	if (replaceValues == null) {
	throw new RuntimeException(replacePath + " is missing from IcuData object.");
	}
	int replaceIndex = matcher.groupCount() > 1 ? Integer.valueOf(matcher.group(2)) : 0;
	if (replaceIndex >= replaceValues.size()) {
	throw new RuntimeException(replaceIndex + " out of range of values in " + replacePath);
	}
	values.set(i, replaceValues.get(replaceIndex));
	}
	}
	// Hack to add the CLDR version
	if (outputName.equals("supplementalData")) {
	icuData.add("/cldrVersion", CLDRFile.GEN_VERSION);
	}
	return icuData;
	}

	/**
	* Loads values for the specified category from CLDR.
	*
	* @param category
	* @param pathValueMap
	* the output map
	*/
	private void loadValues(String category, Map<String, CldrArray> pathValueMap) {
	if (category.endsWith("/")) {
	File dir = new File(inputDir + category);
	for (File subfile : dir.listFiles()) {
	String name = subfile.getName();
	if (name.endsWith(".xml")) {
	name = name.substring(0, name.length() - 4);
	loadValues(category + name, pathValueMap);
	}
	}
	return;
	}
	String inputFile = new File(inputDir, category + ".xml").getAbsolutePath();
	List<Pair<String, String>> contents = new ArrayList<Pair<String, String>>();
	XMLFileReader.loadPathValues(inputFile, contents, true);
	RegexLookup<RegexResult> pathConverter = regexMapper.getPathConverter();
	fifoCounter = 0; // Helps to keep unsorted rb paths in order.
	XPathParts parts = new XPathParts();
	for (Pair<String, String> pair : contents) {
	Output<Finder> matcher = new Output<Finder>();
	String fullPath = parts.set(pair.getFirst()).toString();
	// Only convert contributed or higher data
	if (parts.containsAttributeValue("draft", "provisional") \|\|
	parts.containsAttributeValue("draft", "unconfirmed")) {
	continue;
	}
	List<String> debugResults = isDebugXPath(fullPath) ? new ArrayList<String>() : null;
	Output<String[]> argInfo = new Output<>();
	RegexResult regexResult = pathConverter.get(fullPath, null, argInfo, matcher, debugResults);
	if (regexResult == null) {
	RegexManager.printLookupResults(fullPath, debugResults);
	continue;
	}
	if (debugResults != null) {
	System.out.println(fullPath + " successfully matched");
	}
	// String[] arguments = matcher.value.getInfo();
	String[] arguments = argInfo.value;
	String cldrValue = pair.getSecond();
	for (PathValueInfo info : regexResult) {
	List<String> values = info.processValues(arguments, cldrValue);
	// Check if there are any arguments that need splitting for the rbPath.
	String groupKey = info.processGroupKey(arguments);
	String baseXPath = info.processXPath(arguments, fullPath);
	boolean splitNeeded = false;
	int argIndex = info.getSplitRbPathArg();
	if (argIndex != -1) {
	String[] splitArgs = arguments[argIndex].split("\\s++");
	// Only split the first splittable argument needed for each rbPath.
	if (splitArgs.length > 1) {
	String[] newArgs = arguments.clone();
	for (String splitArg : splitArgs) {
	newArgs[argIndex] = splitArg;
	String rbPath = info.processRbPath(newArgs);
	processValues(baseXPath, rbPath, values, groupKey, pathValueMap);
	}
	splitNeeded = true;
	}
	}
	// No splitting required, process as per normal.
	if (!splitNeeded) {
	String rbPath = info.processRbPath(arguments);
	processValues(baseXPath, rbPath, values, groupKey, pathValueMap);
	}
	}
	fifoCounter++;
	}
	}

	/**
	* Processes values to be added to the ICU data structure
	*
	* @param xpath
	* the CLDR path that the values came from
	* @param rbPath
	* the rbPath that the values belong to
	* @param values
	* the values
	* @param groupKey
	* the key that the values should be grouped by
	* @param pathValueMap
	* the output map
	*/
	private void processValues(String xpath, String rbPath, List<String> values,
	String groupKey, Map<String, CldrArray> pathValueMap) {
	// The fifo counter needs to be formatted with leading zeros for sorting.
	if (rbPath.contains("<FIFO>")) {
	rbPath = rbPath.replace("<FIFO>", '<' + numberFormat.format(fifoCounter) + '>');
	}
	CldrArray cldrArray = RegexManager.getCldrArray(rbPath, pathValueMap);
	cldrArray.put(xpath, values, groupKey);
	}

	/**
	* Converts a date string to a pair of millisecond values.
	*
	* @param dateStr
	* @return
	*/
	private static String getSeconds(String dateStr, DateFieldType type) {
	long millis;
	try {
	millis = getMilliSeconds(dateStr, type);
	} catch (ParseException ex) {
	throw new IllegalArgumentException("Could not parse date: " + dateStr, ex);
	}

	int top = (int) ((millis & 0xFFFFFFFF00000000L) >>> 32); // top
	int bottom = (int) ((millis & 0x00000000FFFFFFFFL)); // bottom

	if (NewLdml2IcuConverter.DEBUG) {
	long bot = 0xffffffffL & bottom;
	long full = ((long) (top) << 32);
	full += bot;
	if (full != millis) {
	System.err.println("Error when converting " + millis + ": " +
	top + ", " + bottom + " was converted back into " + full);
	}
	}

	return top + " " + bottom;
	}

	/**
	* Parses a string date and normalizes it depending on what type of date it
	* is.
	*
	* @param dateStr
	* @param type
	* whether the date is a from or a to
	* @return
	* @throws ParseException
	*/
	private static long getMilliSeconds(String dateStr, DateFieldType type)
	throws ParseException {
	int count = countHyphens(dateStr);
	SimpleDateFormat format = new SimpleDateFormat();
	if (count == 2) {
	format.applyPattern("yyyy-MM-dd");
	} else {
	throw new RuntimeException("Tried to parse invalid date: " + dateStr);
	}
	TimeZone timezone = TimeZone.getTimeZone("GMT");
	format.setTimeZone(timezone);
	Date date = format.parse(dateStr);
	Calendar calendar = new GregorianCalendar();
	calendar.setTimeZone(timezone);
	calendar.setTime(date);
	switch (type) {
	case from: {
	// Set the times for to fields to the beginning of the day.
	calendar.set(Calendar.HOUR_OF_DAY, 0);
	calendar.set(Calendar.MINUTE, 0);
	calendar.set(Calendar.SECOND, 0);
	calendar.set(Calendar.MILLISECOND, 0);
	break;
	}
	case to: {
	// Set the times for to fields to the end of the day.
	calendar.set(Calendar.HOUR_OF_DAY, 23);
	calendar.set(Calendar.MINUTE, 59);
	calendar.set(Calendar.SECOND, 59);
	calendar.set(Calendar.MILLISECOND, 999);
	break;
	}
	}
	return calendar.getTimeInMillis();
	}

	/**
	* Counts the number of hyphens in a string.
	*
	* @param str
	* @return
	*/
	private static int countHyphens(String str) {
	// Hyphens in front are actually minus signs.
	int lastPos = 0;
	int numHyphens = 0;
	while ((lastPos = str.indexOf('-', lastPos + 1)) > -1) {
	numHyphens++;
	}
	return numHyphens;
	}

	/**
	* Sets xpath to monitor for debugging purposes.
	* @param debugXPath
	*/
	public void setDebugXPath(String debugXPath) {
	this.debugXPath = debugXPath;
	}

	/**
	* @param xpath
	* @return true if the xpath is to be debugged
	*/
	boolean isDebugXPath(String xpath) {
	return debugXPath == null ? false : xpath.startsWith(debugXPath);
	}
	}