blob: 3216f73217e771d88c8209df255cb833c63b65e5 [file] [log] [blame]
/*
**********************************************************************
* Copyright (c) 2006-2007, Google and others. All Rights Reserved.
**********************************************************************
* Author: Mark Davis
**********************************************************************
*/
package org.unicode.cldr.util;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.unicode.cldr.util.Dictionary.Matcher.Status;
/**
* This is a simple dictionary class used for testing. Should be in the package usertest, but it's a pain to rename
* files in CVS.
*
* @author markdavis
*/
public class SimpleDictionary<T> extends Dictionary<T> {
private TreeMap<CharSequence, T> data = new TreeMap<CharSequence, T>();
private Set<CharSequence> possibleMatchesBefore;
private Set<CharSequence> possibleMatchesAfter;
private Status finalStatus;
boolean done;
private int matchCount;
private CharSequence lastEntry = "";
public static class SimpleDictionaryBuilder<T> implements DictionaryBuilder<T> {
public SimpleDictionary<T> make(Map<CharSequence, T> source) {
return new SimpleDictionary(source);
}
}
private SimpleDictionary(Map<CharSequence, T> source) {
for (CharSequence text : source.keySet()) {
addMapping(text, source.get(text));
}
}
// private List<T> results;
//
// public static <U> Dictionary<U> getInstance(Map<CharSequence, U> source) {
// SimpleDictionary<U> result = new SimpleDictionary<U>();
// // if T is not an integer, then get the results, and assign each a number
// Map<U,Integer> valueToInt = new HashMap<U,Integer>();
// result.results = new ArrayList<U>();
// int count = 0;
// for (U value : source.values()) {
// Integer oldValue = valueToInt.get(value);
// if (oldValue == null) {
// result.results.add(value);
// valueToInt.put(value, count++);
// }
// }
//
//
// for (CharSequence text : source.keySet()) {
// result.addMapping(text, valueToInt.get(source.get(text)));
// }
// return result;
// }
private void addMapping(CharSequence text, T result) {
if (CharUtilities.compare(text, lastEntry) <= 0) {
throw new IllegalArgumentException("Each string must be greater than the previous one.");
}
lastEntry = text;
data.put(text, result);
}
public Iterator<Entry<CharSequence, T>> getMapping() {
return Collections.unmodifiableMap(data).entrySet().iterator();
}
@Override
public Matcher<T> getMatcher() {
return new SimpleMatcher();
}
private class SimpleMatcher extends Matcher<T> {
@Override
public Matcher<T> setOffset(int offset) {
possibleMatchesBefore = data.keySet();
done = false;
matchValue = null;
return super.setOffset(offset);
}
/**
* Dumb implementation.
*
*/
@Override
public Status next() {
// There are two degenerate cases: our dictionary is empty, or we are called on an empty string.
// As long as we get matches, we return them.
// When we fail, we return one of two statuses
// DONE if there were no more matches in the dictionary past the last match
// SINGLE if there was a longer match, plus the longest offset that we successfully got to.
// if we have already narrowed down to the end, just return the status
// everything should already be set to make this work.
if (done) {
if (finalStatus == Status.NONE) {
matchValue = null;
}
return finalStatus;
}
CharSequence firstMatch = null;
while (text.hasCharAt(matchEnd)) {
// get the next probe value
++matchEnd;
CharSequence probe = text.subSequence(offset, matchEnd);
// narrow to the items that start with the probe
// this filters Before into After
firstMatch = filterToStartsWith(probe);
// if we have a full match, return it
if (firstMatch != null && firstMatch.length() == probe.length()) {
possibleMatchesAfter.remove(firstMatch);
possibleMatchesBefore = possibleMatchesAfter;
matchValue = data.get(firstMatch);
finalStatus = Status.NONE;
return Status.MATCH;
}
// See if we've run out
// example: probe = "man"
// three cases, based on what was in the set
// {man}: return DONE (we did a match before)
// {man, many}: return SINGLE
// {man, many, manner}: return PLURAL
// {many}: return SINGLE
if (possibleMatchesAfter.size() == 0) {
--matchEnd; // backup
break;
}
possibleMatchesBefore = possibleMatchesAfter;
}
// no more work to be done.
done = true;
if (matchEnd == offset || possibleMatchesBefore.size() == 0) {
matchValue = null;
return finalStatus = Status.NONE;
}
if (firstMatch == null) { // just in case we skipped the above loop
firstMatch = possibleMatchesBefore.iterator().next();
}
matchValue = data.get(firstMatch);
matchCount = possibleMatchesBefore.size();
return finalStatus = Status.PARTIAL;
}
public boolean nextUniquePartial() {
// we have already set the matchValue, so we don't need to reset here.
return matchCount == 1;
}
/**
* Returns the first matching item, if there is one, and
* filters the rest of the list to those that match the probe.
*
* @param probe
* @return
*/
private CharSequence filterToStartsWith(CharSequence probe) {
CharSequence result = null;
possibleMatchesAfter = new TreeSet<CharSequence>();
for (CharSequence item : possibleMatchesBefore) {
if (startsWith(item, probe)) {
if (result == null) {
result = item;
}
possibleMatchesAfter.add(item);
}
}
return result;
}
public boolean contains(CharSequence text) {
return data.containsKey(text);
}
public T get(CharSequence text) {
return data.get(text);
}
// public static class GeqGetter<K> {
// private Set<K> set;
// private Iterator<K> iterator;
// private Comparator<? super K> comparator;
// boolean done;
// K lastItem = null;
// private int count;
//
// public GeqGetter(Set<K> set, Comparator<K> comparator) {
// this.comparator = comparator;
// this.set = set;
// reset();
// }
//
// public GeqGetter reset() {
// iterator = set.iterator();
// done = false;
// return this;
// }
//
// /**
// * Returns least element greater than or equal to probe.
// * @param probe
// * @return
// */
// public K getGeq(K probe) {
// if (lastItem != null && comparator.compare(lastItem, probe) >= 0) {
// return lastItem;
// }
// count = 0;
// while (iterator.hasNext()) {
// lastItem = iterator.next();
// ++count;
// if (comparator.compare(lastItem, probe) >= 0) {
// return lastItem;
// }
// }
// lastItem = null;
// return lastItem;
// }
// }
@Override
public Dictionary<T> getDictionary() {
// TODO Auto-generated method stub
return SimpleDictionary.this;
}
// public static class CharSequenceComparator implements Comparator<CharSequence> {
//
// public int compare(CharSequence first, CharSequence second) {
// int minLen = first.length();
// if (minLen > second.length())
// minLen = second.length();
// int result;
// for (int i = 0; i < minLen; ++i) {
// if (0 != (result = first.charAt(i) - second.charAt(i)))
// return result;
// }
// return first.length() - second.length();
// }
//
// }
}
public static boolean startsWith(CharSequence first, CharSequence possiblePrefix) {
if (first.length() < possiblePrefix.length()) {
return false;
}
for (int i = 0; i < possiblePrefix.length(); ++i) {
if (first.charAt(i) != possiblePrefix.charAt(i)) {
return false;
}
}
return true;
}
}