| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package java.text; |
| |
| import libcore.icu.CollationElementIteratorICU; |
| |
| /** |
| * Created by a {@code RuleBasedCollator} to iterate through a string. The |
| * result of each iteration is a 32-bit collation element that defines the |
| * ordering priority of the next character or sequence of characters in the |
| * source string. |
| * <p> |
| * For illustration, consider the following in Spanish: |
| * <p> |
| * "ca": the first collation element is collation_element('c') and second |
| * collation element is collation_element('a'). |
| * <p> |
| * Since "ch" in Spanish sorts as one entity, the example below returns one |
| * collation element for the two characters 'c' and 'h': |
| * <p> |
| * "cha": the first collation element is collation_element('ch') and the second |
| * one is collation_element('a'). |
| * <p> |
| * In German, since the character '\u0086' is a composed character of 'a' |
| * and 'e', the iterator returns two collation elements for the single character |
| * '\u0086': |
| * <p> |
| * "\u0086b": the first collation element is collation_element('a'), the |
| * second one is collation_element('e'), and the third collation element is |
| * collation_element('b'). |
| */ |
| public final class CollationElementIterator { |
| |
| /** |
| * This constant is returned by the iterator in the methods |
| * {@code next()} and {@code previous()} when the end or the |
| * beginning of the source string has been reached, and there are no more |
| * valid collation elements to return. |
| */ |
| public static final int NULLORDER = -1; |
| |
| private CollationElementIteratorICU icuIterator; |
| |
| CollationElementIterator(CollationElementIteratorICU iterator) { |
| this.icuIterator = iterator; |
| } |
| |
| /** |
| * Obtains the maximum length of any expansion sequence that ends with the |
| * specified collation element. Returns {@code 1} if there is no expansion |
| * with this collation element as the last element. |
| * |
| * @param order |
| * a collation element that has been previously obtained from a |
| * call to either the {@link #next()} or {@link #previous()} |
| * method. |
| * @return the maximum length of any expansion sequence ending with the |
| * specified collation element. |
| */ |
| public int getMaxExpansion(int order) { |
| return this.icuIterator.getMaxExpansion(order); |
| } |
| |
| /** |
| * Obtains the character offset in the source string corresponding to the |
| * next collation element. This value could be any of: |
| * <ul> |
| * <li>The index of the first character in the source string that matches |
| * the value of the next collation element. This means that if |
| * {@code setOffset(offset)} sets the index in the middle of a contraction, |
| * {@code getOffset()} returns the index of the first character in the |
| * contraction, which may not be equal to the original offset that was set. |
| * Hence calling {@code getOffset()} immediately after |
| * {@code setOffset(offset)} does not guarantee that the original offset set |
| * will be returned.</li> |
| * <li>If normalization is on, the index of the immediate subsequent |
| * character, or composite character with the first character, having a |
| * combining class of 0.</li> |
| * <li>The length of the source string, if iteration has reached the end. |
| * </li> |
| * </ul> |
| * |
| * @return The position of the collation element in the source string that |
| * will be returned by the next invocation of the {@link #next()} |
| * method. |
| */ |
| public int getOffset() { |
| return this.icuIterator.getOffset(); |
| } |
| |
| /** |
| * Obtains the next collation element in the source string. |
| * |
| * @return the next collation element or {@code NULLORDER} if the end |
| * of the iteration has been reached. |
| */ |
| public int next() { |
| return this.icuIterator.next(); |
| } |
| |
| /** |
| * Obtains the previous collation element in the source string. |
| * |
| * @return the previous collation element, or {@code NULLORDER} when |
| * the start of the iteration has been reached. |
| */ |
| public int previous() { |
| return this.icuIterator.previous(); |
| } |
| |
| /** |
| * Obtains the primary order of the specified collation element, i.e. the |
| * first 16 bits. This value is unsigned. |
| * |
| * @param order |
| * the element of the collation. |
| * @return the element's 16 bit primary order. |
| */ |
| public static final int primaryOrder(int order) { |
| return CollationElementIteratorICU.primaryOrder(order); |
| } |
| |
| /** |
| * Repositions the cursor to point at the first element of the current |
| * string. The next call to {@link #next()} or {@link #previous()} will |
| * return the first and last collation element in the string, respectively. |
| * <p> |
| * If the {@code RuleBasedCollator} used by this iterator has had its |
| * attributes changed, calling {@code reset()} reinitializes the iterator to |
| * use the new attributes. |
| */ |
| public void reset() { |
| this.icuIterator.reset(); |
| } |
| |
| /** |
| * Obtains the secondary order of the specified collation element, i.e. the |
| * 16th to 23th bits, inclusive. This value is unsigned. |
| * |
| * @param order |
| * the element of the collator. |
| * @return the 8 bit secondary order of the element. |
| */ |
| public static final short secondaryOrder(int order) { |
| return (short) CollationElementIteratorICU.secondaryOrder(order); |
| } |
| |
| /** |
| * Points the iterator at the collation element associated with the |
| * character in the source string which is found at the supplied offset. |
| * After this call completes, an invocation of the {@link #next()} method |
| * will return this collation element. |
| * <p> |
| * If {@code newOffset} corresponds to a character which is part of a |
| * sequence that maps to a single collation element then the iterator is |
| * adjusted to the start of that sequence. As a result of this, any |
| * subsequent call made to {@code getOffset()} may not return the same value |
| * set by this method. |
| * <p> |
| * If the decomposition mode is on, and offset is in the middle of a |
| * decomposable range of source text, the iterator may not return a correct |
| * result for the next forwards or backwards iteration. The user must ensure |
| * that the offset is not in the middle of a decomposable range. |
| * |
| * @param newOffset |
| * the character offset into the original source string to set. |
| * Note that this is not an offset into the corresponding |
| * sequence of collation elements. |
| */ |
| public void setOffset(int newOffset) { |
| this.icuIterator.setOffset(newOffset); |
| } |
| |
| /** |
| * Sets a new source string iterator for iteration, and resets the offset to |
| * the beginning of the text. |
| * |
| * @param source |
| * the new source string iterator for iteration. |
| */ |
| public void setText(CharacterIterator source) { |
| this.icuIterator.setText(source); |
| } |
| |
| /** |
| * Sets a new source string for iteration, and resets the offset to the |
| * beginning of the text. |
| * |
| * @param source |
| * the new source string for iteration. |
| */ |
| public void setText(String source) { |
| this.icuIterator.setText(source); |
| } |
| |
| /** |
| * Obtains the tertiary order of the specified collation element, i.e. the |
| * last 8 bits. This value is unsigned. |
| * |
| * @param order |
| * the element of the collation. |
| * @return the 8 bit tertiary order of the element. |
| */ |
| public static final short tertiaryOrder(int order) { |
| return (short) CollationElementIteratorICU.tertiaryOrder(order); |
| } |
| } |