blob: 923982cc4cff7fd6052f33f8ff2f5f64076f4b85 [file] [log] [blame]
package org.unicode.cldr.draft;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.regex.Pattern;
import org.unicode.cldr.util.PatternCache;
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ICUUncheckedIOException;
public class UnicodeMapBuilder<T> {
public enum Leniency {
allowChars, allowUnicodeSet
}
UnicodeMap<T> result;
Parser<T, String> parser;
Leniency leniency;
Pattern semi = PatternCache.get("\\s+;\\s+");
// Matcher semi = PatternCache.get("\\s+;\\s+").matcher("");
public UnicodeMapBuilder() {
}
public UnicodeMapBuilder<T> setParser(Parser<T, String> parser) {
this.parser = parser;
return this;
}
public Parser<T, String> getParser() {
return parser;
}
public Leniency getLeniency() {
return leniency;
}
public UnicodeMapBuilder<T> setLeniency(Leniency leniency) {
this.leniency = leniency;
return this;
}
public UnicodeMap<T> get() {
return result;
}
public UnicodeMap<T> getFrozen() {
UnicodeMap<T> myResult = result.freeze();
result = null;
return myResult;
}
public UnicodeMapBuilder<T> putFromLines(BufferedReader br) {
if (result == null) {
result = new UnicodeMap<T>();
}
UnicodeSet sources = new UnicodeSet();
String line = null;
try {
while (true) {
line = readDataLine(br, null);
if (line == null) {
break;
}
if (line.length() == 0) {
continue;
}
sources.clear();
final String[] pieces = semi.split(line);
if (pieces.length < 2) {
throw new IllegalArgumentException("Line must be of form code ; value");
}
final String codelist = pieces[0].trim();
final String valueString = pieces[1].trim();
if (UnicodeSet.resemblesPattern(pieces[0], 0)) {
sources = new UnicodeSet(codelist);
} else if (codelist.length() < 4) {
sources.add(codelist);
} else {
final String[] codes = codelist.split("\\s+");
for (int i = 0; i < codes.length; ++i) {
final String[] range = codes[i].split("\\.\\.");
final int start = getCodePoint(range[0]);
int end = start;
if (range.length > 1) {
if (range.length > 2) {
throw new IllegalArgumentException("Too many ..");
}
end = getCodePoint(range[1]);
if (start >= end) {
throw new IllegalArgumentException("Range out of order");
}
}
sources.add(start, end);
}
}
T value = parser == null ? (T) valueString : parser.parseObject(valueString);
result.putAll(sources, value);
}
br.close();
} catch (final Exception e) {
throw (RuntimeException) new RuntimeException("Failure on line " + line).initCause(e);
}
return this;
}
private int getCodePoint(String source) {
if (source.startsWith("U+") || source.startsWith("\\u") || source.startsWith("\\U")) {
source = source.substring(2);
}
return Integer.parseInt(source, 16);
}
public static String readDataLine(BufferedReader br, int[] count) throws IOException {
String originalLine = "";
String line = "";
try {
line = originalLine = br.readLine();
if (line == null) {
return null;
}
if (count != null) {
++count[0];
}
if (line.length() > 0 && line.charAt(0) == 0xFEFF) {
line = line.substring(1);
}
final int commentPos = line.indexOf('#');
if (commentPos >= 0) {
line = line.substring(0, commentPos);
}
line = line.trim();
} catch (final Exception e) {
throw new ICUUncheckedIOException("Line \"{" + originalLine + "}\", \"{" + line + "}\"", e);
}
return line;
}
}