tools/java/org/unicode/cldr/util/ReferenceStringSearch.java - platform/external/cldr - Git at Google

 /*
  **********************************************************************
  * Copyright (c) 2006-2007, Google and others.  All Rights Reserved.
  **********************************************************************
  * Author: Mark Davis
  **********************************************************************
  */
 package org.unicode.cldr.util;

 import java.util.ArrayList;

 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.CollationElementIterator;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;

 /**
  * This is intended to be a reference implementation for StringSearch. The
  * implementation is thus not high performance; it just transforms both the key
  * (what is searched for) and the target into collationElement space, and
  * searches there. It is however, architecturally cleaner in that it first has a
  * search that finds and extended range, then picks a boundary within that.
  * <p>
  * The key is that there is a match for a key string in a target text at positions <a,b> iff collator.compare(key,
  * target.substring(a,b)) == 0.
  *
  * @author markdavis
  */
 public class ReferenceStringSearch {
     private static final int PADDING = 3;

     private RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);

     private BreakIterator breaker;

     private String key;

     private String target;

     // all of the following are in processed CollationElementSpace

     private int[] keyBuffer;

     private CollationElementIterator2 targetIterator;

     private int[] targetBuffer;

     private int targetBufferStart;
     private int targetBufferLength;

     // Map from target items back to native indices
     private int[] targetBackMapBefore;

     private int[] targetBackMapAfter;
     private int lastAfter;

     // input flags to indicate which way we want to go within the range of
     // possible boundaries
     boolean widestStart, widestLimit;

     /**
      * A struct for showing match information with the range of possible
      * boundaries.
      */
     public static class ExtendedRange {
         int minStart, maxStart, minLimit, maxLimit;

         public String toString() {
             return minStart + ", " + maxStart + ", " + minLimit + ", " + maxLimit;
         }

         public String toString(String key, String target) {
             return "'" + target.substring(0, minStart) + "["
                 + target.substring(minStart, maxStart) + "{"
                 + target.substring(maxStart, minLimit) + "}"
                 + target.substring(minLimit, maxLimit) + "]"
                 + target.substring(maxLimit) + "'";
         }
     }

     /**
      * A struct that just shows the start/end, based on the settings given.
      */
     public static class Range {
         public int start;
         public int limit;

         public String toString() {
             return start + ", " + limit;
         }

         public String toString(String key, String target) {
             return "'" + target.substring(0, start) + "["
                 + target.substring(start, limit) + "]"
                 + target.substring(limit) + "'";
         }
     }

     public RuleBasedCollator getCollator() {
         return collator;
     }

     /**
      * If the collator settings are changed externally, be sure to call
      * setCollator();
      */
     public ReferenceStringSearch setCollator(RuleBasedCollator collator) {
         this.collator = collator;
         targetIterator = new CollationElementIterator2(collator);
         // reset the key and target, since their collationElements may change
         if (key != null) {
             setKey(key);
         }
         if (target != null) {
             setTarget(target);
         }
         return this;
     }

     public BreakIterator getBreaker() {
         return breaker;
     }

     public ReferenceStringSearch setBreaker(BreakIterator breaker) {
         this.breaker = breaker;
         if (target != null) {
             breaker.setText(target);
         }
         return this;
     }

     /**
      * If true, will pick the largest possible limit for a match.
      */
     public boolean isWidestLimit() {
         return widestLimit;
     }

     /**
      * If true, will pick the largest possible limit for a match.
      */
     public void setWidestLimit(boolean widestLimit) {
         this.widestLimit = widestLimit;
     }

     /**
      * If true, will pick the least possible start for a match.
      */
     public boolean isWidestStart() {
         return widestStart;
     }

     /**
      * If true, will pick the least possible start for a match.
      */
     public void setWidestStart(boolean widestStart) {
         this.widestStart = widestStart;
     }

     public String getKey() {
         return key;
     }

     public ReferenceStringSearch setKey(String key) {
         this.key = key;
         ArrayList<Integer> keyBufferList = new ArrayList<Integer>();
         CollationElementIterator2 keyIterator = new CollationElementIterator2(collator).setText(key);
         while (true) {
             int collationElement = keyIterator.nextProcessed();
             if (collationElement == CollationElementIterator.NULLORDER) {
                 break;
             }
             // store the primary, plus the index before and after.
             keyBufferList.add(collationElement);
         }
         keyBuffer = getIntBuffer(keyBufferList);
         return this;
     }

     public int getNativeOffset() {
         return targetBackMapBefore[targetBufferStart];
     }

     public ReferenceStringSearch setNativeOffset(int nativeOffset) {
         if (nativeOffset < 0 || nativeOffset > target.length()) {
             throw new ArrayIndexOutOfBoundsException();
         }
         // use dumb implementation. Better implementation would leverage
         // contents of current buffer
         targetIterator.setOffset(nativeOffset);
         targetBufferStart = 0;
         targetBufferLength = 0;
         lastAfter = 0;
         if (nativeOffset != 0) {
             // we have to reset lastAfter!
             targetIterator.previousProcessed();
             lastAfter = targetIterator.offsetAfter;
         }
         fillBuffer();
         return this;
     }

     public String getTarget() {
         return target;
     }

     /**
      * Set the text to search within.
      */
     public ReferenceStringSearch setTarget(String target) {
         this.target = target;
         if (breaker != null) {
             breaker.setText(target);
         }
         targetIterator.setText(target); // TODO optimize creation
         // make the buffer just a bit bigger than we need
         // note: we could optimize to reuse buffers, but probably not worth it
         targetBuffer = new int[keyBuffer.length + PADDING];
         targetBackMapBefore = new int[keyBuffer.length + PADDING];
         targetBackMapAfter = new int[keyBuffer.length + PADDING];
         targetBufferStart = 0;
         lastAfter = 0;
         targetBufferLength = 0;
         fillBuffer();
         return this;
     }

     /**
      * We keep the circular buffer as start + length instead of start + limit so
      * we can distinguish the 0 case.
      *
      * @return
      */
     private boolean shiftBuffer() {
         lastAfter = targetBackMapAfter[targetBufferStart]; // remember last after
         ++targetBufferStart;
         if (targetBufferStart >= targetBuffer.length) {
             targetBufferStart = 0;
         }
         --targetBufferLength;
         return fillBuffer();
     }

     private boolean fillBuffer() {
         // TODO mark as done so we don't call extra at the end
         while (targetBufferLength < keyBuffer.length + 1) {
             int ce = targetIterator.nextProcessed();
             if (ce == CollationElementIterator.NULLORDER) {
                 return false;
             }
             int targetBufferLimit = targetBufferStart + targetBufferLength;
             if (targetBufferLimit >= targetBuffer.length) {
                 targetBufferLimit -= targetBuffer.length;
             }
             targetBuffer[targetBufferLimit] = ce;
             targetBackMapBefore[targetBufferLimit] = targetIterator.offsetBefore;
             targetBackMapAfter[targetBufferLimit] = targetIterator.offsetAfter;
             ++targetBufferLength;
         }
         return true;
     }

     /**
      * Find the next match, and set the min/maxStart/Limit. Return false if not
      * found. Here is an example, where [...] is the maximal match, and {..} the
      * minimal.
      *
      * <pre>
      *  Raw Positions of 'eß' in ' ess eß ESS̀ '
      *    '[ {ess} ]eß ESS̀ '
      *    ' ess[ {eß} ]ESS̀ '
      *    ' ess eß[ {ESS}̀ ]'
      * </pre>
      *
      * Matches may overlap. Thus
      *
      * <pre>
      *  Raw Positions of 'a!a' in 'b.a.a.a.b'
      *    'b[.{a.a}.]a.b'
      *    'b.a[.{a.a}.]b'
      * </pre>
      */
     public boolean searchForwards(ExtendedRange position) {
         while (targetBufferLength >= keyBuffer.length) {
             if (matchesAt()) {
                 // minStart is from the After of previous item.
                 position.minStart = lastAfter;

                 position.maxStart = targetBackMapBefore[targetBufferStart];
                 int last = targetBufferStart + keyBuffer.length - 1;
                 if (last >= targetBuffer.length) {
                     last -= targetBuffer.length;
                 }
                 position.minLimit = targetBackMapAfter[last];

                 // maxLimit is from the Before of the next item
                 // or, if we are at the very end of the text, the text length.
                 if (targetBufferLength == keyBuffer.length) {
                     position.maxLimit = target.length();
                 } else {
                     ++last; // we reuse the position since we are already there.
                     if (last >= targetBuffer.length) {
                         last -= targetBuffer.length;
                     }
                     position.maxLimit = targetBackMapBefore[last];
                 }
                 shiftBuffer(); // move to next offset
                 return true;
             }
             shiftBuffer(); // move to next offset
         }
         return false;
     }

     /**
      * Simple match at position.
      */
     private boolean matchesAt() {
         int j = targetBufferStart;
         for (int i = 0; i < keyBuffer.length; ++i) {
             if (keyBuffer[i] != targetBuffer[j]) {
                 return false;
             }
             ++j;
             if (j >= targetBuffer.length) {
                 j = 0;
             }
         }
         return true;
     }

     private ExtendedRange internalPosition = new ExtendedRange();

     /**
      * This is the main public interface for searching. It filters out anything
      * that doesn't match the breaker, and adjusts the boundaries to the max/min
      * permitted. Examples:
      *
      * <pre>
      *  Positions of 'eß' in ' ess eß ESS̀ '
      *    ' [ess] eß ESS̀ '
      *    ' ess [eß] ESS̀ '
      *    ' ess eß [ESS̀] '
      *    Count: 3
      *
      * Positions of 'a!a' in 'b.a.a.a.b'
      *    'b.[a.a].a.b'
      *    'b.a.[a.a].b'
      *    Count: 2
      * </pre>
      *
      * @param position
      * @return
      */
     public boolean searchForwards(Range position) {
         while (true) {
             boolean succeeds = searchForwards(internalPosition);
             if (!succeeds) return false;
             position.start = getBoundary(breaker, internalPosition.minStart, internalPosition.maxStart, !widestStart);
             if (position.start == -1) continue; // failed to find the right boundary
             position.limit = getBoundary(breaker, internalPosition.minLimit, internalPosition.maxLimit, widestLimit);
             if (position.limit == -1) continue; // failed to find the right boundary
             return true;
         }
     }

     /****************** PRIVATES ***************/

     /**
      * This really ought to be just methods on CollationElementIterator.
      */
     public static class CollationElementIterator2 {
         private CollationElementIterator keyIterator;
         private int strengthMask;
         private int variableTop;
         private int offsetBefore;
         private int offsetAfter;

         public int getOffsetBefore() {
             return offsetBefore;
         }

         public int getOffsetAfter() {
             return offsetAfter;
         }

         public CollationElementIterator2 reset() {
             keyIterator.reset();
             return this;
         }

         public CollationElementIterator2 setOffset(int offset) {
             keyIterator.setOffset(offset);
             return this;
         }

         public CollationElementIterator2 setText(String source) {
             keyIterator.setText(source);
             return this;
         }

         public CollationElementIterator2(RuleBasedCollator collator) {
             // gather some information that we will need later
             strengthMask = 0xFFFF0000;
             variableTop = !collator.isAlternateHandlingShifted() ? -1 : collator.getVariableTop() | 0xFFFF;
             // this needs to be fixed a bit for case-level, etc.
             switch (collator.getStrength()) {
             case Collator.PRIMARY:
                 strengthMask = 0xFFFF0000;
                 break;
             case Collator.SECONDARY:
                 strengthMask = 0xFFFFFF00;
                 break;
             default:
                 strengthMask = 0xFFFFFFFF;
                 break;
             }
             keyIterator = collator.getCollationElementIterator("");
         }

         /**
          * This should be a method on CollationElementIterator. Returns next
          * non-zero collation element, setting indexBefore, indexAfter. Should also
          * process shifted and strength, masking as needed. If a collation element
          * has a continuation, then the indexAfter = indexBefore, for example, if
          * [CE1,CE2] form a single collation element for the characters between
          * native indexes 5 and 8, (C2 being a continuation, then the result of two
          * calls to nextProcessed would be [CE1, 5, 5] then [CE1, 5,8].
          * <p>
          * previousProcessed would do similar things, backwards.
          *
          */
         int nextProcessed() {
             while (true) {
                 offsetBefore = keyIterator.getOffset();
                 int collationElement = keyIterator.next();
                 if (collationElement != CollationElementIterator.NULLORDER) {

                     // note: the collation element iterator ought to give us processed values, but it doesn't
                     // so we have to simulate that.
                     collationElement &= strengthMask; // mask to only the strengths we have
                     // check for shifted.
                     // TODO This is not exactly right, and we also need to eject any following combining marks,
                     // so fix later.
                     if (collationElement < variableTop && collationElement > 0xFFFF) {
                         continue;
                     }
                     if (collationElement == 0) {
                         continue;
                     }

                 }
                 offsetAfter = keyIterator.getOffset();
                 return collationElement;
             }
         }

         int previousProcessed() {
             while (true) {
                 offsetAfter = keyIterator.getOffset();
                 int collationElement = keyIterator.previous();
                 if (collationElement != CollationElementIterator.NULLORDER) {

                     // note: the collation element iterator ought to give us processed values, but it doesn't
                     // so we have to simulate that.
                     collationElement &= strengthMask; // mask to only the strengths we have
                     // check for shifted.
                     // TODO This is not exactly right, and we also need to eject any following combining marks,
                     // so fix later.
                     if (collationElement < variableTop && collationElement > 0xFFFF) {
                         continue;
                     }
                     if (collationElement == 0) {
                         continue;
                     }

                 }
                 offsetBefore = keyIterator.getOffset();
                 return collationElement;
             }
         }
     }

     /**
      * Utility
      *
      * @param keyBufferList
      * @return
      */
     private int[] getIntBuffer(ArrayList<Integer> keyBufferList) {
         int[] buffer = new int[keyBufferList.size()];
         for (int i = 0; i < keyBufferList.size(); ++i) {
             buffer[i] = keyBufferList.get(i).intValue();
         }
         return buffer;
     }

     /**
      * This really should be on breakIterator, so it can be done more efficiently.
      * <p>
      * Get the item that is on a boundary according to the breaker, and is between the input boundaries. The boolean
      * greatest controls whether we pick the least or the greatest possible offset that works according to the breaker.
      * <p>
      * Returns -1 if there is no valid offset according to the breaker.
      *
      * @param minBoundary
      * @param maxBoundary
      * @param greatest
      * @return
      */
     public static int getBoundary(BreakIterator breaker, int minBoundary, int maxBoundary, boolean greatest) {
         if (breaker == null) return greatest ? maxBoundary : minBoundary;
         int result;
         // this may or may not be the most efficient way to test; ask Andy
         if (greatest) {
             result = breaker.preceding(maxBoundary + 1);
             if (result < minBoundary) {
                 result = -1;
             }
         } else {
             result = breaker.following(minBoundary - 1);
             if (result < minBoundary) {
                 result = -1;
             }
         }
         return result;
     }

 }
	/*
	**********************************************************************
	* Copyright (c) 2006-2007, Google and others. All Rights Reserved.
	**********************************************************************
	* Author: Mark Davis
	**********************************************************************
	*/
	package org.unicode.cldr.util;

	import java.util.ArrayList;

	import com.ibm.icu.text.BreakIterator;
	import com.ibm.icu.text.CollationElementIterator;
	import com.ibm.icu.text.Collator;
	import com.ibm.icu.text.RuleBasedCollator;
	import com.ibm.icu.util.ULocale;

	/**
	* This is intended to be a reference implementation for StringSearch. The
	* implementation is thus not high performance; it just transforms both the key
	* (what is searched for) and the target into collationElement space, and
	* searches there. It is however, architecturally cleaner in that it first has a
	* search that finds and extended range, then picks a boundary within that.
	* <p>
	* The key is that there is a match for a key string in a target text at positions <a,b> iff collator.compare(key,
	* target.substring(a,b)) == 0.
	*
	* @author markdavis
	*/
	public class ReferenceStringSearch {
	private static final int PADDING = 3;

	private RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT);

	private BreakIterator breaker;

	private String key;

	private String target;

	// all of the following are in processed CollationElementSpace

	private int[] keyBuffer;

	private CollationElementIterator2 targetIterator;

	private int[] targetBuffer;

	private int targetBufferStart;
	private int targetBufferLength;

	// Map from target items back to native indices
	private int[] targetBackMapBefore;

	private int[] targetBackMapAfter;
	private int lastAfter;

	// input flags to indicate which way we want to go within the range of
	// possible boundaries
	boolean widestStart, widestLimit;

	/**
	* A struct for showing match information with the range of possible
	* boundaries.
	*/
	public static class ExtendedRange {
	int minStart, maxStart, minLimit, maxLimit;

	public String toString() {
	return minStart + ", " + maxStart + ", " + minLimit + ", " + maxLimit;
	}

	public String toString(String key, String target) {
	return "'" + target.substring(0, minStart) + "["
	+ target.substring(minStart, maxStart) + "{"
	+ target.substring(maxStart, minLimit) + "}"
	+ target.substring(minLimit, maxLimit) + "]"
	+ target.substring(maxLimit) + "'";
	}
	}

	/**
	* A struct that just shows the start/end, based on the settings given.
	*/
	public static class Range {
	public int start;
	public int limit;

	public String toString() {
	return start + ", " + limit;
	}

	public String toString(String key, String target) {
	return "'" + target.substring(0, start) + "["
	+ target.substring(start, limit) + "]"
	+ target.substring(limit) + "'";
	}
	}

	public RuleBasedCollator getCollator() {
	return collator;
	}

	/**
	* If the collator settings are changed externally, be sure to call
	* setCollator();
	*/
	public ReferenceStringSearch setCollator(RuleBasedCollator collator) {
	this.collator = collator;
	targetIterator = new CollationElementIterator2(collator);
	// reset the key and target, since their collationElements may change
	if (key != null) {
	setKey(key);
	}
	if (target != null) {
	setTarget(target);
	}
	return this;
	}

	public BreakIterator getBreaker() {
	return breaker;
	}

	public ReferenceStringSearch setBreaker(BreakIterator breaker) {
	this.breaker = breaker;
	if (target != null) {
	breaker.setText(target);
	}
	return this;
	}

	/**
	* If true, will pick the largest possible limit for a match.
	*/
	public boolean isWidestLimit() {
	return widestLimit;
	}

	/**
	* If true, will pick the largest possible limit for a match.
	*/
	public void setWidestLimit(boolean widestLimit) {
	this.widestLimit = widestLimit;
	}

	/**
	* If true, will pick the least possible start for a match.
	*/
	public boolean isWidestStart() {
	return widestStart;
	}

	/**
	* If true, will pick the least possible start for a match.
	*/
	public void setWidestStart(boolean widestStart) {
	this.widestStart = widestStart;
	}

	public String getKey() {
	return key;
	}

	public ReferenceStringSearch setKey(String key) {
	this.key = key;
	ArrayList<Integer> keyBufferList = new ArrayList<Integer>();
	CollationElementIterator2 keyIterator = new CollationElementIterator2(collator).setText(key);
	while (true) {
	int collationElement = keyIterator.nextProcessed();
	if (collationElement == CollationElementIterator.NULLORDER) {
	break;
	}
	// store the primary, plus the index before and after.
	keyBufferList.add(collationElement);
	}
	keyBuffer = getIntBuffer(keyBufferList);
	return this;
	}

	public int getNativeOffset() {
	return targetBackMapBefore[targetBufferStart];
	}

	public ReferenceStringSearch setNativeOffset(int nativeOffset) {
	if (nativeOffset < 0 \|\| nativeOffset > target.length()) {
	throw new ArrayIndexOutOfBoundsException();
	}
	// use dumb implementation. Better implementation would leverage
	// contents of current buffer
	targetIterator.setOffset(nativeOffset);
	targetBufferStart = 0;
	targetBufferLength = 0;
	lastAfter = 0;
	if (nativeOffset != 0) {
	// we have to reset lastAfter!
	targetIterator.previousProcessed();
	lastAfter = targetIterator.offsetAfter;
	}
	fillBuffer();
	return this;
	}

	public String getTarget() {
	return target;
	}

	/**
	* Set the text to search within.
	*/
	public ReferenceStringSearch setTarget(String target) {
	this.target = target;
	if (breaker != null) {
	breaker.setText(target);
	}
	targetIterator.setText(target); // TODO optimize creation
	// make the buffer just a bit bigger than we need
	// note: we could optimize to reuse buffers, but probably not worth it
	targetBuffer = new int[keyBuffer.length + PADDING];
	targetBackMapBefore = new int[keyBuffer.length + PADDING];
	targetBackMapAfter = new int[keyBuffer.length + PADDING];
	targetBufferStart = 0;
	lastAfter = 0;
	targetBufferLength = 0;
	fillBuffer();
	return this;
	}

	/**
	* We keep the circular buffer as start + length instead of start + limit so
	* we can distinguish the 0 case.
	*
	* @return
	*/
	private boolean shiftBuffer() {
	lastAfter = targetBackMapAfter[targetBufferStart]; // remember last after
	++targetBufferStart;
	if (targetBufferStart >= targetBuffer.length) {
	targetBufferStart = 0;
	}
	--targetBufferLength;
	return fillBuffer();
	}

	private boolean fillBuffer() {
	// TODO mark as done so we don't call extra at the end
	while (targetBufferLength < keyBuffer.length + 1) {
	int ce = targetIterator.nextProcessed();
	if (ce == CollationElementIterator.NULLORDER) {
	return false;
	}
	int targetBufferLimit = targetBufferStart + targetBufferLength;
	if (targetBufferLimit >= targetBuffer.length) {
	targetBufferLimit -= targetBuffer.length;
	}
	targetBuffer[targetBufferLimit] = ce;
	targetBackMapBefore[targetBufferLimit] = targetIterator.offsetBefore;
	targetBackMapAfter[targetBufferLimit] = targetIterator.offsetAfter;
	++targetBufferLength;
	}
	return true;
	}

	/**
	* Find the next match, and set the min/maxStart/Limit. Return false if not
	* found. Here is an example, where [...] is the maximal match, and {..} the
	* minimal.
	*
	* <pre>
	* Raw Positions of 'eß' in ' ess eß ESS̀ '
	* '[ {ess} ]eß ESS̀ '
	* ' ess[ {eß} ]ESS̀ '
	* ' ess eß[ {ESS}̀ ]'
	* </pre>
	*
	* Matches may overlap. Thus
	*
	* <pre>
	* Raw Positions of 'a!a' in 'b.a.a.a.b'
	* 'b[.{a.a}.]a.b'
	* 'b.a[.{a.a}.]b'
	* </pre>
	*/
	public boolean searchForwards(ExtendedRange position) {
	while (targetBufferLength >= keyBuffer.length) {
	if (matchesAt()) {
	// minStart is from the After of previous item.
	position.minStart = lastAfter;

	position.maxStart = targetBackMapBefore[targetBufferStart];
	int last = targetBufferStart + keyBuffer.length - 1;
	if (last >= targetBuffer.length) {
	last -= targetBuffer.length;
	}
	position.minLimit = targetBackMapAfter[last];

	// maxLimit is from the Before of the next item
	// or, if we are at the very end of the text, the text length.
	if (targetBufferLength == keyBuffer.length) {
	position.maxLimit = target.length();
	} else {
	++last; // we reuse the position since we are already there.
	if (last >= targetBuffer.length) {
	last -= targetBuffer.length;
	}
	position.maxLimit = targetBackMapBefore[last];
	}
	shiftBuffer(); // move to next offset
	return true;
	}
	shiftBuffer(); // move to next offset
	}
	return false;
	}

	/**
	* Simple match at position.
	*/
	private boolean matchesAt() {
	int j = targetBufferStart;
	for (int i = 0; i < keyBuffer.length; ++i) {
	if (keyBuffer[i] != targetBuffer[j]) {
	return false;
	}
	++j;
	if (j >= targetBuffer.length) {
	j = 0;
	}
	}
	return true;
	}

	private ExtendedRange internalPosition = new ExtendedRange();

	/**
	* This is the main public interface for searching. It filters out anything
	* that doesn't match the breaker, and adjusts the boundaries to the max/min
	* permitted. Examples:
	*
	* <pre>
	* Positions of 'eß' in ' ess eß ESS̀ '
	* ' [ess] eß ESS̀ '
	* ' ess [eß] ESS̀ '
	* ' ess eß [ESS̀] '
	* Count: 3
	*
	* Positions of 'a!a' in 'b.a.a.a.b'
	* 'b.[a.a].a.b'
	* 'b.a.[a.a].b'
	* Count: 2
	* </pre>
	*
	* @param position
	* @return
	*/
	public boolean searchForwards(Range position) {
	while (true) {
	boolean succeeds = searchForwards(internalPosition);
	if (!succeeds) return false;
	position.start = getBoundary(breaker, internalPosition.minStart, internalPosition.maxStart, !widestStart);
	if (position.start == -1) continue; // failed to find the right boundary
	position.limit = getBoundary(breaker, internalPosition.minLimit, internalPosition.maxLimit, widestLimit);
	if (position.limit == -1) continue; // failed to find the right boundary
	return true;
	}
	}

	/**************** PRIVATES *************/

	/**
	* This really ought to be just methods on CollationElementIterator.
	*/
	public static class CollationElementIterator2 {
	private CollationElementIterator keyIterator;
	private int strengthMask;
	private int variableTop;
	private int offsetBefore;
	private int offsetAfter;

	public int getOffsetBefore() {
	return offsetBefore;
	}

	public int getOffsetAfter() {
	return offsetAfter;
	}

	public CollationElementIterator2 reset() {
	keyIterator.reset();
	return this;
	}

	public CollationElementIterator2 setOffset(int offset) {
	keyIterator.setOffset(offset);
	return this;
	}

	public CollationElementIterator2 setText(String source) {
	keyIterator.setText(source);
	return this;
	}

	public CollationElementIterator2(RuleBasedCollator collator) {
	// gather some information that we will need later
	strengthMask = 0xFFFF0000;
	variableTop = !collator.isAlternateHandlingShifted() ? -1 : collator.getVariableTop() \| 0xFFFF;
	// this needs to be fixed a bit for case-level, etc.
	switch (collator.getStrength()) {
	case Collator.PRIMARY:
	strengthMask = 0xFFFF0000;
	break;
	case Collator.SECONDARY:
	strengthMask = 0xFFFFFF00;
	break;
	default:
	strengthMask = 0xFFFFFFFF;
	break;
	}
	keyIterator = collator.getCollationElementIterator("");
	}

	/**
	* This should be a method on CollationElementIterator. Returns next
	* non-zero collation element, setting indexBefore, indexAfter. Should also
	* process shifted and strength, masking as needed. If a collation element
	* has a continuation, then the indexAfter = indexBefore, for example, if
	* [CE1,CE2] form a single collation element for the characters between
	* native indexes 5 and 8, (C2 being a continuation, then the result of two
	* calls to nextProcessed would be [CE1, 5, 5] then [CE1, 5,8].
	* <p>
	* previousProcessed would do similar things, backwards.
	*
	*/
	int nextProcessed() {
	while (true) {
	offsetBefore = keyIterator.getOffset();
	int collationElement = keyIterator.next();
	if (collationElement != CollationElementIterator.NULLORDER) {

	// note: the collation element iterator ought to give us processed values, but it doesn't
	// so we have to simulate that.
	collationElement &= strengthMask; // mask to only the strengths we have
	// check for shifted.
	// TODO This is not exactly right, and we also need to eject any following combining marks,
	// so fix later.
	if (collationElement < variableTop && collationElement > 0xFFFF) {
	continue;
	}
	if (collationElement == 0) {
	continue;
	}

	}
	offsetAfter = keyIterator.getOffset();
	return collationElement;
	}
	}

	int previousProcessed() {
	while (true) {
	offsetAfter = keyIterator.getOffset();
	int collationElement = keyIterator.previous();
	if (collationElement != CollationElementIterator.NULLORDER) {

	// note: the collation element iterator ought to give us processed values, but it doesn't
	// so we have to simulate that.
	collationElement &= strengthMask; // mask to only the strengths we have
	// check for shifted.
	// TODO This is not exactly right, and we also need to eject any following combining marks,
	// so fix later.
	if (collationElement < variableTop && collationElement > 0xFFFF) {
	continue;
	}
	if (collationElement == 0) {
	continue;
	}

	}
	offsetBefore = keyIterator.getOffset();
	return collationElement;
	}
	}
	}

	/**
	* Utility
	*
	* @param keyBufferList
	* @return
	*/
	private int[] getIntBuffer(ArrayList<Integer> keyBufferList) {
	int[] buffer = new int[keyBufferList.size()];
	for (int i = 0; i < keyBufferList.size(); ++i) {
	buffer[i] = keyBufferList.get(i).intValue();
	}
	return buffer;
	}

	/**
	* This really should be on breakIterator, so it can be done more efficiently.
	* <p>
	* Get the item that is on a boundary according to the breaker, and is between the input boundaries. The boolean
	* greatest controls whether we pick the least or the greatest possible offset that works according to the breaker.
	* <p>
	* Returns -1 if there is no valid offset according to the breaker.
	*
	* @param minBoundary
	* @param maxBoundary
	* @param greatest
	* @return
	*/
	public static int getBoundary(BreakIterator breaker, int minBoundary, int maxBoundary, boolean greatest) {
	if (breaker == null) return greatest ? maxBoundary : minBoundary;
	int result;
	// this may or may not be the most efficient way to test; ask Andy
	if (greatest) {
	result = breaker.preceding(maxBoundary + 1);
	if (result < minBoundary) {
	result = -1;
	}
	} else {
	result = breaker.following(minBoundary - 1);
	if (result < minBoundary) {
	result = -1;
	}
	}
	return result;
	}

	}