blob: 4b017cd28280ae34c349f810a4df4cebd9cc0200 [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2006-2007, Google and others. All Rights Reserved.
**********************************************************************
* Author: Mark Davis
**********************************************************************
*/
package org.unicode.cldr.unittest;
import java.text.ParsePosition;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.TreeMap;
import org.unicode.cldr.util.CollationStringByteConverter;
import org.unicode.cldr.util.Dictionary;
import org.unicode.cldr.util.Dictionary.DictionaryBuilder;
import org.unicode.cldr.util.Dictionary.DictionaryCharList;
import org.unicode.cldr.util.Dictionary.Matcher;
import org.unicode.cldr.util.Dictionary.Matcher.Filter;
import org.unicode.cldr.util.LenientDateParser;
import org.unicode.cldr.util.LenientDateParser.Parser;
import org.unicode.cldr.util.StateDictionaryBuilder;
import org.unicode.cldr.util.TestStateDictionaryBuilder;
import org.unicode.cldr.util.Utf8StringByteConverter;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.Collator;
import com.ibm.icu.text.DateFormat;
import com.ibm.icu.text.RuleBasedCollator;
import com.ibm.icu.text.SimpleDateFormat;
import com.ibm.icu.util.Calendar;
import com.ibm.icu.util.SimpleTimeZone;
import com.ibm.icu.util.TimeZone;
import com.ibm.icu.util.ULocale;
public class TestCollationStringByteConverter {
private static final boolean DEBUG = true;
// static interface PartCharSequence {
// /**
// * Replace index < x.length() with hasCharAt(index)
// * @param index
// * @return
// */
// boolean hasCharAt(int index);
// char charAt(int index);
// public PartCharSequence subSequence2(int start, int end);
// /**
// * Returns a subsequence going to the end.
// * @param start
// * @return
// */
// public PartCharSequence subSequence2(int start);
// /**
// * Return the length known so far. If hasCharAt(getKnownLength()) ==
// false, then it is the real length.
// * @return
// */
// public int getKnownLength();
// }
//
// static class PartCharSequenceWrapper implements PartCharSequence {
// CharSequence source;
//
// public boolean equals(Object anObject) {
// return source.equals(anObject);
// }
//
// public int hashCode() {
// return source.hashCode();
// }
//
// public PartCharSequenceWrapper(CharSequence source) {
// this.source = source;
// }
//
// public char charAt(int index) {
// return source.charAt(index);
// }
//
// public PartCharSequence subSequence2(int beginIndex, int endIndex) {
// return new PartCharSequenceWrapper(source.subSequence(beginIndex,
// endIndex));
// }
//
// public PartCharSequence subSequence2(int beginIndex) {
// return new PartCharSequenceWrapper(source.subSequence(beginIndex,
// source.length()));
// }
//
// /* (non-Javadoc)
// * @see com.ibm.icu.text.RuleBasedCollator.PartCharSequence#hasCharAt(int)
// */
// public boolean hasCharAt(int index) {
// return index < source.length();
// }
// /* (non-Javadoc)
// * @see
// com.ibm.icu.text.RuleBasedCollator.PartCharSequence#getKnownLength()
// */
// public int getKnownLength() {
// return source.length();
// }
// }
boolean hasBadSurrogates(String string) {
boolean needsLow = false;
for (int i = 0; i < string.length(); ++i) {
char c = string.charAt(i);
if (needsLow != Character.isLowSurrogate(c)) {
return true;
}
needsLow = Character.isHighSurrogate(c);
}
return needsLow;
}
public static void main(String[] args) throws Exception {
check2(ULocale.JAPANESE);
// check2(ULocale.FRANCE);
if (true)
return;
checkBasic();
check();
}
private static void checkBasic() {
System.out.println(ULocale.getDisplayName("en_GB", "en"));
DictionaryBuilder<CharSequence> builder = new StateDictionaryBuilder<CharSequence>();
Map<CharSequence, CharSequence> map = new TreeMap<CharSequence, CharSequence>(
Dictionary.CHAR_SEQUENCE_COMPARATOR);
map.put("a", "ABC");
map.put("bc", "B"); // ß
Dictionary<CharSequence> dict = builder.make(map);
String[] tests = { "a/bc", "bc", "a", "d", "", "abca" };
for (String test : tests) {
System.out.println("TRYING: " + test);
DictionaryCharList<CharSequence> gcs = new DictionaryCharList<CharSequence>(
dict, test);
for (int i = 0; gcs.hasCharAt(i); ++i) {
char c = gcs.charAt(i);
final int sourceOffset = gcs.toSourceOffset(i);
final CharSequence sourceSubSequence = gcs.sourceSubSequence(i,
i + 1);
System.out.println(i + "\t" + c + "\t" + sourceOffset + "\t"
+ sourceSubSequence);
}
gcs.hasCharAt(Integer.MAX_VALUE);
System.out.println("Length: " + gcs.getKnownLength());
}
}
private static void check2(ULocale testLocale) {
// Map<CharSequence,CharSequence> map = new
// TreeMap<CharSequence,CharSequence>();
// map.put("j", "J");
// map.put("july", "JULY");
// map.put("june", "JUNE"); // ß
// map.put("august", "AUGUST"); // ß
// DictionaryBuilder<CharSequence> builder = new
// StateDictionaryBuilder<CharSequence>();
// Dictionary<CharSequence> dict = builder.make(map);
// System.out.println(map);
// System.out.println(dict);
// Matcher m = dict.getMatcher();
// System.out.println(m.setText("a").next(Filter.LONGEST_UNIQUE) + "\t"
// + m + "\t" + m.nextUniquePartial());
// System.out.println(m.setText("j").next(Filter.LONGEST_UNIQUE) + "\t"
// + m + "\t" + m.nextUniquePartial());
// System.out.println(m.setText("ju").next(Filter.LONGEST_UNIQUE) + "\t"
// + m + "\t" + m.nextUniquePartial());
// System.out.println(m.setText("jul").next(Filter.LONGEST_UNIQUE) +
// "\t" + m + "\t" + m.nextUniquePartial());
TimeZone testTimeZone = TimeZone.getTimeZone("America/Chicago");
TimeZone unknown = new SimpleTimeZone(60000, "Etc/Unknown");
// DateFormatSymbols dfs = new DateFormatSymbols(testLocale);
// String[][] zoneInfo = dfs.getZoneStrings();
// for (int i = 0; i < zoneInfo.length; ++i) {
// System.out.print(i );
// for (int j = 0; j < zoneInfo[i].length; ++j) {
// System.out.print("\t" + zoneInfo[i][j]);
// }
// System.out.println();
// }
LenientDateParser ldp = LenientDateParser.getInstance(testLocale);
Parser parser = ldp.getParser();
if (DEBUG) {
System.out.println(parser.debugShow2());
}
LinkedHashSet<DateFormat> tests = new LinkedHashSet<DateFormat>();
// Arrays.asList(new String[] {"jan 12 1963 1942 au", "1:2:3",
// "1:3 jan"})
Calendar testDate = Calendar.getInstance();
// testDate.set(2007, 8, 25, 1, 2, 3);
//
// Calendar dateOnly = Calendar.getInstance();
// dateOnly.set(Calendar.YEAR, testDate.get(Calendar.YEAR));
// dateOnly.set(Calendar.MONTH, testDate.get(Calendar.MONTH));
// dateOnly.set(Calendar.DAY_OF_MONTH,
// testDate.get(Calendar.DAY_OF_MONTH));
//
// Calendar timeOnly = Calendar.getInstance();
// timeOnly.set(Calendar.HOUR, testDate.get(Calendar.HOUR));
// timeOnly.set(Calendar.MINUTE, testDate.get(Calendar.MINUTE));
// timeOnly.set(Calendar.SECOND, testDate.get(Calendar.SECOND));
//
// Calendar hourMinuteOnly = Calendar.getInstance();
// hourMinuteOnly.set(Calendar.HOUR, testDate.get(Calendar.HOUR));
// hourMinuteOnly.set(Calendar.MINUTE, testDate.get(Calendar.MINUTE));
SimpleDateFormat iso = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss VVVV");
for (int style = 0; style < 4; ++style) {
final DateFormat dateInstance = DateFormat.getDateInstance(style,
testLocale);
dateInstance.setTimeZone(testTimeZone);
tests.add(dateInstance);
final DateFormat timeInstance = DateFormat.getTimeInstance(style,
testLocale);
timeInstance.setTimeZone(testTimeZone);
tests.add(timeInstance);
for (int style2 = 0; style2 < 4; ++style2) {
final DateFormat dateTimeInstance = DateFormat
.getDateTimeInstance(style, style, testLocale);
dateTimeInstance.setTimeZone(testTimeZone);
tests.add(dateTimeInstance);
final SimpleDateFormat simpleDateFormat = new SimpleDateFormat(
((SimpleDateFormat) DateFormat.getTimeInstance(style2,
testLocale)).toPattern()
+ " "
+ ((SimpleDateFormat) dateInstance).toPattern(),
testLocale);
simpleDateFormat.setTimeZone(testTimeZone);
tests.add(simpleDateFormat);
// tests.put(DateFormat.getTimeInstance(style,
// testLocale).format(testDate) + " " + dateExample, null);
// // reversed
}
}
// DateTimePatternGenerator dtpg =
// DateTimePatternGenerator.getInstance(testLocale);
// Map<String,String> skeletons = dtpg.getSkeletons(new
// LinkedHashMap());
// for (String skeleton : skeletons.keySet()) {
// String pattern = skeletons.get(skeleton);
// tests.add(new SimpleDateFormat(pattern, testLocale));
// }
ParsePosition parsePosition = new ParsePosition(0);
ParsePosition parsePosition2 = new ParsePosition(0);
Calendar calendar = Calendar.getInstance();
Calendar calendar2 = Calendar.getInstance();
int success = 0;
int failure = 0;
for (DateFormat test : tests) {
String expected = test.format(testDate);
parsePosition.setIndex(0);
calendar.clear();
calendar.setTimeZone(unknown);
parser.parse(expected, calendar, parsePosition);
parsePosition2.setIndex(0);
calendar2.clear();
calendar2.setTimeZone(unknown);
test.parse(expected, calendar2, parsePosition2);
if (true) {
}
if (areEqual(calendar, calendar2)) {
System.out.println("OK\t" + expected + "\t=>\t<"
+ iso.format(calendar) + ">\t"
+ show(expected, parsePosition));
success++;
} else {
failure++;
LenientDateParser.DEBUG = true;
calendar.clear();
parsePosition.setIndex(0);
parser.parse(expected, calendar, parsePosition);
LenientDateParser.DEBUG = false;
System.out.println("FAIL\t" + expected + "\t=>\t<"
+ iso.format(calendar) + "//"
+ showZone(ldp, calendar.getTimeZone())
+ ">\texpected: <" + iso.format(calendar2) + "//"
+ showZone(ldp, calendar2.getTimeZone()) + ">\t"
+ show(expected, parsePosition) + "\t" + parser);
}
System.out.println();
}
System.out.println("SUCCESS: " + success + "\t\tFAILURE: " + failure);
}
private static String showZone(LenientDateParser ldp, TimeZone zone) {
String id = zone.getID();
return LenientDateParser.getCountry(id) + ":" + id;
}
private static boolean areEqual(Calendar calendar, Calendar calendar2) {
if (calendar.getTimeInMillis() == calendar2.getTimeInMillis()) { // &&
// calendar.getTimeZone().equals(calendar2.getTimeZone())
// until
// ICU
// gets
// fixed
return true;
}
return false; // separate for debugging
}
private static String show(String test, ParsePosition parsePosition) {
return "{"
+ test.substring(0, parsePosition.getIndex())
+ "|"
+ test.substring(parsePosition.getIndex())
+ (parsePosition.getErrorIndex() == -1 ? "" : "/"
+ parsePosition.getErrorIndex()) + "}";
}
public static void check() throws Exception {
final RuleBasedCollator col = (RuleBasedCollator) Collator
.getInstance(ULocale.ENGLISH);
col.setStrength(Collator.PRIMARY);
col.setAlternateHandlingShifted(true);
CollationStringByteConverter converter = new CollationStringByteConverter(
col, new Utf8StringByteConverter()); // new
// ByteString(true)
Matcher<String> matcher = converter.getDictionary().getMatcher();
// if (true) {
// Iterator<Entry<CharSequence, String>> x =
// converter.getDictionary().getMapping();
// while (x.hasNext()) {
// Entry<CharSequence, String> entry = x.next();
// System.out.println(entry.getKey() + "\t" +
// Utility.hex(entry.getKey().toString())+ "\t\t" + entry.getValue()
// + "\t" + Utility.hex(entry.getValue().toString()));
// }
// System.out.println(converter.getDictionary().debugShow());
// }
String[] tests = { "ab", "abc", "ss", "ß", "Abcde",
"Once Upon AB Time", "\u00E0b", "A\u0300b" };
byte[] output = new byte[1000];
for (String test : tests) {
String result = matcher
.setText(
new DictionaryCharList<String>(converter
.getDictionary(), test))
.convert(new StringBuffer()).toString();
System.out.println(test + "\t=>\t" + result);
int len = converter.toBytes(test, output, 0);
String result2 = converter.fromBytes(output, 0, len,
new StringBuilder()).toString();
System.out.println(test + "\t(bytes) =>\t" + result2);
for (int i = 0; i < len; ++i) {
System.out.print(Utility.hex(output[i] & 0xFF, 2) + " ");
}
System.out.println();
}
DictionaryBuilder<String> builder = new StateDictionaryBuilder<String>(); // .setByteConverter(converter);
Map<CharSequence, String> map = new TreeMap<CharSequence, String>(
Dictionary.CHAR_SEQUENCE_COMPARATOR);
map.put("ab", "found-ab");
map.put("abc", "found-ab");
map.put("ss", "found-ss"); // ß
Dictionary<String> dict = builder.make(map);
final String string = "Abcde and ab c Upon aß AB basS Time\u00E0bA\u0300b";
DictionaryCharList<String> x = new DictionaryCharList<String>(
converter.getDictionary(), string);
x.hasCharAt(Integer.MAX_VALUE); // force growth
System.out.println("Internal: "
+ x.sourceSubSequence(0, x.getKnownLength()));
TestStateDictionaryBuilder.tryFind(string,
new DictionaryCharList<String>(converter.getDictionary(),
string), dict, Filter.ALL);
TestStateDictionaryBuilder.tryFind(string,
new DictionaryCharList<String>(converter.getDictionary(),
string), dict, Filter.LONGEST_MATCH);
}
}