android_icu4j/src/main/java/android/icu/impl/coll/CollationData.java - platform/external/icu - Git at Google

 /* GENERATED SOURCE. DO NOT MODIFY. */
 /*
 *******************************************************************************
 * Copyright (C) 2010-2015, International Business Machines
 * Corporation and others.  All Rights Reserved.
 *******************************************************************************
 * CollationData.java, ported from collationdata.h/.cpp
 *
 * C++ version created on: 2010oct27
 * created by: Markus W. Scherer
 */

 package android.icu.impl.coll;

 import android.icu.impl.Normalizer2Impl;
 import android.icu.impl.Trie2_32;
 import android.icu.lang.UScript;
 import android.icu.text.Collator;
 import android.icu.text.UnicodeSet;
 import android.icu.util.ICUException;

 /**
  * Collation data container.
  * Immutable data created by a CollationDataBuilder, or loaded from a file,
  * or deserialized from API-provided binary data.
  *
  * Includes data for the collation base (root/default), aliased if this is not the base.
  * @hide Only a subset of ICU is exposed in Android
  * @hide All android.icu classes are currently hidden
  */
 public final class CollationData {
     // Note: The ucadata.icu loader could discover the reserved ranges by setting an array
     // parallel with the ranges, and resetting ranges that are indexed.
     // The reordering builder code could clone the resulting template array.
     static final int REORDER_RESERVED_BEFORE_LATIN = Collator.ReorderCodes.FIRST + 14;
     static final int REORDER_RESERVED_AFTER_LATIN = Collator.ReorderCodes.FIRST + 15;

     static final int MAX_NUM_SPECIAL_REORDER_CODES = 8;

     CollationData(Normalizer2Impl nfc) {
         nfcImpl = nfc;
     }

     public int getCE32(int c) {
         return trie.get(c);
     }

     int getCE32FromSupplementary(int c) {
         return trie.get(c);  // TODO: port UTRIE2_GET32_FROM_SUPP(trie, c) to Java?
     }

     boolean isDigit(int c) {
         return c < 0x660 ? c <= 0x39 && 0x30 <= c :
                 Collation.hasCE32Tag(getCE32(c), Collation.DIGIT_TAG);
     }

     public boolean isUnsafeBackward(int c, boolean numeric) {
         return unsafeBackwardSet.contains(c) || (numeric && isDigit(c));
     }

     public boolean isCompressibleLeadByte(int b) {
         return compressibleBytes[b];
     }

     public boolean isCompressiblePrimary(long p) {
         return isCompressibleLeadByte((int)p >>> 24);
     }

     /**
      * Returns the CE32 from two contexts words.
      * Access to the defaultCE32 for contraction and prefix matching.
      */
     int getCE32FromContexts(int index) {
         return ((int)contexts.charAt(index) << 16) | contexts.charAt(index + 1);
     }

     /**
      * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG).
      * Requires that ce32 is special.
      */
     int getIndirectCE32(int ce32) {
         assert(Collation.isSpecialCE32(ce32));
         int tag = Collation.tagFromCE32(ce32);
         if(tag == Collation.DIGIT_TAG) {
             // Fetch the non-numeric-collation CE32.
             ce32 = ce32s[Collation.indexFromCE32(ce32)];
         } else if(tag == Collation.LEAD_SURROGATE_TAG) {
             ce32 = Collation.UNASSIGNED_CE32;
         } else if(tag == Collation.U0000_TAG) {
             // Fetch the normal ce32 for U+0000.
             ce32 = ce32s[0];
         }
         return ce32;
     }

     /**
      * Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG),
      * if ce32 is special.
      */
     int getFinalCE32(int ce32) {
         if(Collation.isSpecialCE32(ce32)) {
             ce32 = getIndirectCE32(ce32);
         }
         return ce32;
     }

     /**
      * Computes a CE from c's ce32 which has the OFFSET_TAG.
      */
     long getCEFromOffsetCE32(int c, int ce32) {
         long dataCE = ces[Collation.indexFromCE32(ce32)];
         return Collation.makeCE(Collation.getThreeBytePrimaryForOffsetData(c, dataCE));
     }

     /**
      * Returns the single CE that c maps to.
      * Throws UnsupportedOperationException if c does not map to a single CE.
      */
     long getSingleCE(int c) {
         CollationData d;
         int ce32 = getCE32(c);
         if(ce32 == Collation.FALLBACK_CE32) {
             d = base;
             ce32 = base.getCE32(c);
         } else {
             d = this;
         }
         while(Collation.isSpecialCE32(ce32)) {
             switch(Collation.tagFromCE32(ce32)) {
             case Collation.LATIN_EXPANSION_TAG:
             case Collation.BUILDER_DATA_TAG:
             case Collation.PREFIX_TAG:
             case Collation.CONTRACTION_TAG:
             case Collation.HANGUL_TAG:
             case Collation.LEAD_SURROGATE_TAG:
                 throw new UnsupportedOperationException(String.format(
                         "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
                         c, ce32));
             case Collation.FALLBACK_TAG:
             case Collation.RESERVED_TAG_3:
                 throw new AssertionError(String.format(
                         "unexpected CE32 tag for U+%04X (CE32 0x%08x)", c, ce32));
             case Collation.LONG_PRIMARY_TAG:
                 return Collation.ceFromLongPrimaryCE32(ce32);
             case Collation.LONG_SECONDARY_TAG:
                 return Collation.ceFromLongSecondaryCE32(ce32);
             case Collation.EXPANSION32_TAG:
                 if(Collation.lengthFromCE32(ce32) == 1) {
                     ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
                     break;
                 } else {
                     throw new UnsupportedOperationException(String.format(
                             "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
                             c, ce32));
                 }
             case Collation.EXPANSION_TAG: {
                 if(Collation.lengthFromCE32(ce32) == 1) {
                     return d.ces[Collation.indexFromCE32(ce32)];
                 } else {
                     throw new UnsupportedOperationException(String.format(
                             "there is not exactly one collation element for U+%04X (CE32 0x%08x)",
                             c, ce32));
                 }
             }
             case Collation.DIGIT_TAG:
                 // Fetch the non-numeric-collation CE32 and continue.
                 ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
                 break;
             case Collation.U0000_TAG:
                 assert(c == 0);
                 // Fetch the normal ce32 for U+0000 and continue.
                 ce32 = d.ce32s[0];
                 break;
             case Collation.OFFSET_TAG:
                 return d.getCEFromOffsetCE32(c, ce32);
             case Collation.IMPLICIT_TAG:
                 return Collation.unassignedCEFromCodePoint(c);
             }
         }
         return Collation.ceFromSimpleCE32(ce32);
     }

     /**
      * Returns the FCD16 value for code point c. c must be >= 0.
      */
     int getFCD16(int c) {
         return nfcImpl.getFCD16(c);
     }

     /**
      * Returns the first primary for the script's reordering group.
      * @return the primary with only the first primary lead byte of the group
      *         (not necessarily an actual root collator primary weight),
      *         or 0 if the script is unknown
      */
     long getFirstPrimaryForGroup(int script) {
         int index = getScriptIndex(script);
         return index == 0 ? 0 : (long)scriptStarts[index] << 16;
     }

     /**
      * Returns the last primary for the script's reordering group.
      * @return the last primary of the group
      *         (not an actual root collator primary weight),
      *         or 0 if the script is unknown
      */
     public long getLastPrimaryForGroup(int script) {
         int index = getScriptIndex(script);
         if(index == 0) {
             return 0;
         }
         long limit = scriptStarts[index + 1];
         return (limit << 16) - 1;
     }

     /**
      * Finds the reordering group which contains the primary weight.
      * @return the first script of the group, or -1 if the weight is beyond the last group
      */
     public int getGroupForPrimary(long p) {
         p >>= 16;
         if(p < scriptStarts[1] || scriptStarts[scriptStarts.length - 1] <= p) {
             return -1;
         }
         int index = 1;
         while(p >= scriptStarts[index + 1]) { ++index; }
         for(int i = 0; i < numScripts; ++i) {
             if(scriptsIndex[i] == index) {
                 return i;
             }
         }
         for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
             if(scriptsIndex[numScripts + i] == index) {
                 return Collator.ReorderCodes.FIRST + i;
             }
         }
         return -1;
     }

     private int getScriptIndex(int script) {
         if(script < 0) {
             return 0;
         } else if(script < numScripts) {
             return scriptsIndex[script];
         } else if(script < Collator.ReorderCodes.FIRST) {
             return 0;
         } else {
             script -= Collator.ReorderCodes.FIRST;
             if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
                 return scriptsIndex[numScripts + script];
             } else {
                 return 0;
             }
         }
     }

     public int[] getEquivalentScripts(int script) {
         int index = getScriptIndex(script);
         if(index == 0) { return EMPTY_INT_ARRAY; }
         if(script >= Collator.ReorderCodes.FIRST) {
             // Special groups have no aliases.
             return new int[] { script };
         }

         int length = 0;
         for(int i = 0; i < numScripts; ++i) {
             if(scriptsIndex[i] == index) {
                 ++length;
             }
         }
         int[] dest = new int[length];
         if(length == 1) {
             dest[0] = script;
             return dest;
         }
         length = 0;
         for(int i = 0; i < numScripts; ++i) {
             if(scriptsIndex[i] == index) {
                 dest[length++] = i;
             }
         }
         return dest;
     }

     /**
      * Writes the permutation of primary-weight ranges
      * for the given reordering of scripts and groups.
      * The caller checks for illegal arguments and
      * takes care of [DEFAULT] and memory allocation.
      *
      * <p>Each list element will be a (limit, offset) pair as described
      * for the CollationSettings.reorderRanges.
      * The list will be empty if no ranges are reordered.
      */
     void makeReorderRanges(int[] reorder, UVector32 ranges) {
         makeReorderRanges(reorder, false, ranges);
     }

     private void makeReorderRanges(int[] reorder, boolean latinMustMove, UVector32 ranges) {
         ranges.removeAllElements();
         int length = reorder.length;
         if(length == 0 || (length == 1 && reorder[0] == UScript.UNKNOWN)) {
             return;
         }

         // Maps each script-or-group range to a new lead byte.
         short[] table = new short[scriptStarts.length - 1];  // C++: uint8_t[]

         {
             // Set "don't care" values for reserved ranges.
             int index = scriptsIndex[
                     numScripts + REORDER_RESERVED_BEFORE_LATIN - Collator.ReorderCodes.FIRST];
             if(index != 0) {
                 table[index] = 0xff;
             }
             index = scriptsIndex[
                     numScripts + REORDER_RESERVED_AFTER_LATIN - Collator.ReorderCodes.FIRST];
             if(index != 0) {
                 table[index] = 0xff;
             }
         }

         // Never reorder special low and high primary lead bytes.
         assert(scriptStarts.length >= 2);
         assert(scriptStarts[0] == 0);
         int lowStart = scriptStarts[1];
         assert(lowStart == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8));
         int highLimit = scriptStarts[scriptStarts.length - 1];
         assert(highLimit == (Collation.TRAIL_WEIGHT_BYTE << 8));

         // Get the set of special reorder codes in the input list.
         // This supports a fixed number of special reorder codes;
         // it works for data with codes beyond Collator.ReorderCodes.LIMIT.
         int specials = 0;
         for(int i = 0; i < length; ++i) {
             int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST;
             if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
                 specials |= 1 << reorderCode;
             }
         }

         // Start the reordering with the special low reorder codes that do not occur in the input.
         for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
             int index = scriptsIndex[numScripts + i];
             if(index != 0 && (specials & (1 << i)) == 0) {
                 lowStart = addLowScriptRange(table, index, lowStart);
             }
         }

         // Skip the reserved range before Latin if Latin is the first script,
         // so that we do not move it unnecessarily.
         int skippedReserved = 0;
         if(specials == 0 && reorder[0] == UScript.LATIN && !latinMustMove) {
             int index = scriptsIndex[UScript.LATIN];
             assert(index != 0);
             int start = scriptStarts[index];
             assert(lowStart <= start);
             skippedReserved = start - lowStart;
             lowStart = start;
         }

         // Reorder according to the input scripts, continuing from the bottom of the primary range.
         boolean hasReorderToEnd = false;
         for(int i = 0; i < length;) {
             int script = reorder[i++];
             if(script == UScript.UNKNOWN) {
                 // Put the remaining scripts at the top.
                 hasReorderToEnd = true;
                 while(i < length) {
                     script = reorder[--length];
                     if(script == UScript.UNKNOWN) {  // Must occur at most once.
                         throw new IllegalArgumentException(
                                 "setReorderCodes(): duplicate UScript.UNKNOWN");
                     }
                     if(script == Collator.ReorderCodes.DEFAULT) {
                         throw new IllegalArgumentException(
                                 "setReorderCodes(): UScript.DEFAULT together with other scripts");
                     }
                     int index = getScriptIndex(script);
                     if(index == 0) { continue; }
                     if(table[index] != 0) {  // Duplicate or equivalent script.
                         throw new IllegalArgumentException(
                                 "setReorderCodes(): duplicate or equivalent script " +
                                 scriptCodeString(script));
                     }
                     highLimit = addHighScriptRange(table, index, highLimit);
                 }
                 break;
             }
             if(script == Collator.ReorderCodes.DEFAULT) {
                 // The default code must be the only one in the list, and that is handled by the caller.
                 // Otherwise it must not be used.
                 throw new IllegalArgumentException(
                         "setReorderCodes(): UScript.DEFAULT together with other scripts");
             }
             int index = getScriptIndex(script);
             if(index == 0) { continue; }
             if(table[index] != 0) {  // Duplicate or equivalent script.
                 throw new IllegalArgumentException(
                         "setReorderCodes(): duplicate or equivalent script " +
                         scriptCodeString(script));
             }
             lowStart = addLowScriptRange(table, index, lowStart);
         }

         // Put all remaining scripts into the middle.
         for(int i = 1; i < scriptStarts.length - 1; ++i) {
             int leadByte = table[i];
             if(leadByte != 0) { continue; }
             int start = scriptStarts[i];
             if(!hasReorderToEnd && start > lowStart) {
                 // No need to move this script.
                 lowStart = start;
             }
             lowStart = addLowScriptRange(table, i, lowStart);
         }
         if(lowStart > highLimit) {
             if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
                 // Try not skipping the before-Latin reserved range.
                 makeReorderRanges(reorder, true, ranges);
                 return;
             }
             // We need more primary lead bytes than available, despite the reserved ranges.
             throw new ICUException(
                     "setReorderCodes(): reordering too many partial-primary-lead-byte scripts");
         }

         // Turn lead bytes into a list of (limit, offset) pairs.
         // Encode each pair in one list element:
         // Upper 16 bits = limit, lower 16 = signed lead byte offset.
         int offset = 0;
         for(int i = 1;; ++i) {
             int nextOffset = offset;
             while(i < scriptStarts.length - 1) {
                 int newLeadByte = table[i];
                 if(newLeadByte == 0xff) {
                     // "Don't care" lead byte for reserved range, continue with current offset.
                 } else {
                     nextOffset = newLeadByte - (scriptStarts[i] >> 8);
                     if(nextOffset != offset) { break; }
                 }
                 ++i;
             }
             if(offset != 0 || i < scriptStarts.length - 1) {
                 ranges.addElement(((int)scriptStarts[i] << 16) | (offset & 0xffff));
             }
             if(i == scriptStarts.length - 1) { break; }
             offset = nextOffset;
         }
     }

     private int addLowScriptRange(short[] table, int index, int lowStart) {
         int start = scriptStarts[index];
         if((start & 0xff) < (lowStart & 0xff)) {
             lowStart += 0x100;
         }
         table[index] = (short)(lowStart >> 8);
         int limit = scriptStarts[index + 1];
         lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
         return lowStart;
     }

     private int addHighScriptRange(short[] table, int index, int highLimit) {
         int limit = scriptStarts[index + 1];
         if((limit & 0xff) > (highLimit & 0xff)) {
             highLimit -= 0x100;
         }
         int start = scriptStarts[index];
         highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
         table[index] = (short)(highLimit >> 8);
         return highLimit;
     }

     private static String scriptCodeString(int script) {
         // Do not use the script name here: We do not want to depend on that data.
         return (script < Collator.ReorderCodes.FIRST) ?
                 Integer.toString(script) : "0x" + Integer.toHexString(script);
     }

     private static final int[] EMPTY_INT_ARRAY = new int[0];

     /** @see jamoCE32s */
     static final int JAMO_CE32S_LENGTH = 19 + 21 + 27;

     /** Main lookup trie. */
     Trie2_32 trie;
     /**
      * Array of CE32 values.
      * At index 0 there must be CE32(U+0000)
      * to support U+0000's special-tag for NUL-termination handling.
      */
     int[] ce32s;
     /** Array of CE values for expansions and OFFSET_TAG. */
     long[] ces;
     /** Array of prefix and contraction-suffix matching data. */
     String contexts;
     /** Base collation data, or null if this data itself is a base. */
     public CollationData base;
     /**
      * Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T.
      * They are normally simple CE32s, rarely expansions.
      * For fast handling of HANGUL_TAG.
      */
     int[] jamoCE32s = new int[JAMO_CE32S_LENGTH];
     public Normalizer2Impl nfcImpl;
     /** The single-byte primary weight (xx000000) for numeric collation. */
     long numericPrimary = 0x12000000;

     /** 256 flags for which primary-weight lead bytes are compressible. */
     public boolean[] compressibleBytes;
     /**
      * Set of code points that are unsafe for starting string comparison after an identical prefix,
      * or in backwards CE iteration.
      */
     UnicodeSet unsafeBackwardSet;

     /**
      * Fast Latin table for common-Latin-text string comparisons.
      * Data structure see class CollationFastLatin.
      */
     public char[] fastLatinTable;
     /**
      * Header portion of the fastLatinTable.
      * In C++, these are one array, and the header is skipped for mapping characters.
      * In Java, two arrays work better.
      */
     char[] fastLatinTableHeader;

     /**
      * Data for scripts and reordering groups.
      * Uses include building a reordering permutation table and
      * providing script boundaries to AlphabeticIndex.
      */
     int numScripts;
     /**
      * The length of scriptsIndex is numScripts+16.
      * It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
      * 16 special reorder codes (not all used) are mapped starting at numScripts.
      * Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
      * There are special codes at the end for reorder-reserved primary ranges.
      *
      * <p>Multiple scripts may share a range and index, for example Hira & Kana.
      */
     char[] scriptsIndex;
     /**
      * Start primary weight (top 16 bits only) for a group/script/reserved range
      * indexed by scriptsIndex.
      * The first range (separators & terminators) and the last range (trailing weights)
      * are not reorderable, and no scriptsIndex entry points to them.
      */
     char[] scriptStarts;

     /**
      * Collation elements in the root collator.
      * Used by the CollationRootElements class. The data structure is described there.
      * null in a tailoring.
      */
     public long[] rootElements;
 }
	/* GENERATED SOURCE. DO NOT MODIFY. */
	/*
	*******************************************************************************
	* Copyright (C) 2010-2015, International Business Machines
	* Corporation and others. All Rights Reserved.
	*******************************************************************************
	* CollationData.java, ported from collationdata.h/.cpp
	*
	* C++ version created on: 2010oct27
	* created by: Markus W. Scherer
	*/

	package android.icu.impl.coll;

	import android.icu.impl.Normalizer2Impl;
	import android.icu.impl.Trie2_32;
	import android.icu.lang.UScript;
	import android.icu.text.Collator;
	import android.icu.text.UnicodeSet;
	import android.icu.util.ICUException;

	/**
	* Collation data container.
	* Immutable data created by a CollationDataBuilder, or loaded from a file,
	* or deserialized from API-provided binary data.
	*
	* Includes data for the collation base (root/default), aliased if this is not the base.
	* @hide Only a subset of ICU is exposed in Android
	* @hide All android.icu classes are currently hidden
	*/
	public final class CollationData {
	// Note: The ucadata.icu loader could discover the reserved ranges by setting an array
	// parallel with the ranges, and resetting ranges that are indexed.
	// The reordering builder code could clone the resulting template array.
	static final int REORDER_RESERVED_BEFORE_LATIN = Collator.ReorderCodes.FIRST + 14;
	static final int REORDER_RESERVED_AFTER_LATIN = Collator.ReorderCodes.FIRST + 15;

	static final int MAX_NUM_SPECIAL_REORDER_CODES = 8;

	CollationData(Normalizer2Impl nfc) {
	nfcImpl = nfc;
	}

	public int getCE32(int c) {
	return trie.get(c);
	}

	int getCE32FromSupplementary(int c) {
	return trie.get(c); // TODO: port UTRIE2_GET32_FROM_SUPP(trie, c) to Java?
	}

	boolean isDigit(int c) {
	return c < 0x660 ? c <= 0x39 && 0x30 <= c :
	Collation.hasCE32Tag(getCE32(c), Collation.DIGIT_TAG);
	}

	public boolean isUnsafeBackward(int c, boolean numeric) {
	return unsafeBackwardSet.contains(c) \|\| (numeric && isDigit(c));
	}

	public boolean isCompressibleLeadByte(int b) {
	return compressibleBytes[b];
	}

	public boolean isCompressiblePrimary(long p) {
	return isCompressibleLeadByte((int)p >>> 24);
	}

	/**
	* Returns the CE32 from two contexts words.
	* Access to the defaultCE32 for contraction and prefix matching.
	*/
	int getCE32FromContexts(int index) {
	return ((int)contexts.charAt(index) << 16) \| contexts.charAt(index + 1);
	}

	/**
	* Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG).
	* Requires that ce32 is special.
	*/
	int getIndirectCE32(int ce32) {
	assert(Collation.isSpecialCE32(ce32));
	int tag = Collation.tagFromCE32(ce32);
	if(tag == Collation.DIGIT_TAG) {
	// Fetch the non-numeric-collation CE32.
	ce32 = ce32s[Collation.indexFromCE32(ce32)];
	} else if(tag == Collation.LEAD_SURROGATE_TAG) {
	ce32 = Collation.UNASSIGNED_CE32;
	} else if(tag == Collation.U0000_TAG) {
	// Fetch the normal ce32 for U+0000.
	ce32 = ce32s[0];
	}
	return ce32;
	}

	/**
	* Returns the CE32 for an indirect special CE32 (e.g., with DIGIT_TAG),
	* if ce32 is special.
	*/
	int getFinalCE32(int ce32) {
	if(Collation.isSpecialCE32(ce32)) {
	ce32 = getIndirectCE32(ce32);
	}
	return ce32;
	}

	/**
	* Computes a CE from c's ce32 which has the OFFSET_TAG.
	*/
	long getCEFromOffsetCE32(int c, int ce32) {
	long dataCE = ces[Collation.indexFromCE32(ce32)];
	return Collation.makeCE(Collation.getThreeBytePrimaryForOffsetData(c, dataCE));
	}

	/**
	* Returns the single CE that c maps to.
	* Throws UnsupportedOperationException if c does not map to a single CE.
	*/
	long getSingleCE(int c) {
	CollationData d;
	int ce32 = getCE32(c);
	if(ce32 == Collation.FALLBACK_CE32) {
	d = base;
	ce32 = base.getCE32(c);
	} else {
	d = this;
	}
	while(Collation.isSpecialCE32(ce32)) {
	switch(Collation.tagFromCE32(ce32)) {
	case Collation.LATIN_EXPANSION_TAG:
	case Collation.BUILDER_DATA_TAG:
	case Collation.PREFIX_TAG:
	case Collation.CONTRACTION_TAG:
	case Collation.HANGUL_TAG:
	case Collation.LEAD_SURROGATE_TAG:
	throw new UnsupportedOperationException(String.format(
	"there is not exactly one collation element for U+%04X (CE32 0x%08x)",
	c, ce32));
	case Collation.FALLBACK_TAG:
	case Collation.RESERVED_TAG_3:
	throw new AssertionError(String.format(
	"unexpected CE32 tag for U+%04X (CE32 0x%08x)", c, ce32));
	case Collation.LONG_PRIMARY_TAG:
	return Collation.ceFromLongPrimaryCE32(ce32);
	case Collation.LONG_SECONDARY_TAG:
	return Collation.ceFromLongSecondaryCE32(ce32);
	case Collation.EXPANSION32_TAG:
	if(Collation.lengthFromCE32(ce32) == 1) {
	ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
	break;
	} else {
	throw new UnsupportedOperationException(String.format(
	"there is not exactly one collation element for U+%04X (CE32 0x%08x)",
	c, ce32));
	}
	case Collation.EXPANSION_TAG: {
	if(Collation.lengthFromCE32(ce32) == 1) {
	return d.ces[Collation.indexFromCE32(ce32)];
	} else {
	throw new UnsupportedOperationException(String.format(
	"there is not exactly one collation element for U+%04X (CE32 0x%08x)",
	c, ce32));
	}
	}
	case Collation.DIGIT_TAG:
	// Fetch the non-numeric-collation CE32 and continue.
	ce32 = d.ce32s[Collation.indexFromCE32(ce32)];
	break;
	case Collation.U0000_TAG:
	assert(c == 0);
	// Fetch the normal ce32 for U+0000 and continue.
	ce32 = d.ce32s[0];
	break;
	case Collation.OFFSET_TAG:
	return d.getCEFromOffsetCE32(c, ce32);
	case Collation.IMPLICIT_TAG:
	return Collation.unassignedCEFromCodePoint(c);
	}
	}
	return Collation.ceFromSimpleCE32(ce32);
	}

	/**
	* Returns the FCD16 value for code point c. c must be >= 0.
	*/
	int getFCD16(int c) {
	return nfcImpl.getFCD16(c);
	}

	/**
	* Returns the first primary for the script's reordering group.
	* @return the primary with only the first primary lead byte of the group
	* (not necessarily an actual root collator primary weight),
	* or 0 if the script is unknown
	*/
	long getFirstPrimaryForGroup(int script) {
	int index = getScriptIndex(script);
	return index == 0 ? 0 : (long)scriptStarts[index] << 16;
	}

	/**
	* Returns the last primary for the script's reordering group.
	* @return the last primary of the group
	* (not an actual root collator primary weight),
	* or 0 if the script is unknown
	*/
	public long getLastPrimaryForGroup(int script) {
	int index = getScriptIndex(script);
	if(index == 0) {
	return 0;
	}
	long limit = scriptStarts[index + 1];
	return (limit << 16) - 1;
	}

	/**
	* Finds the reordering group which contains the primary weight.
	* @return the first script of the group, or -1 if the weight is beyond the last group
	*/
	public int getGroupForPrimary(long p) {
	p >>= 16;
	if(p < scriptStarts[1] \|\| scriptStarts[scriptStarts.length - 1] <= p) {
	return -1;
	}
	int index = 1;
	while(p >= scriptStarts[index + 1]) { ++index; }
	for(int i = 0; i < numScripts; ++i) {
	if(scriptsIndex[i] == index) {
	return i;
	}
	}
	for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
	if(scriptsIndex[numScripts + i] == index) {
	return Collator.ReorderCodes.FIRST + i;
	}
	}
	return -1;
	}

	private int getScriptIndex(int script) {
	if(script < 0) {
	return 0;
	} else if(script < numScripts) {
	return scriptsIndex[script];
	} else if(script < Collator.ReorderCodes.FIRST) {
	return 0;
	} else {
	script -= Collator.ReorderCodes.FIRST;
	if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
	return scriptsIndex[numScripts + script];
	} else {
	return 0;
	}
	}
	}

	public int[] getEquivalentScripts(int script) {
	int index = getScriptIndex(script);
	if(index == 0) { return EMPTY_INT_ARRAY; }
	if(script >= Collator.ReorderCodes.FIRST) {
	// Special groups have no aliases.
	return new int[] { script };
	}

	int length = 0;
	for(int i = 0; i < numScripts; ++i) {
	if(scriptsIndex[i] == index) {
	++length;
	}
	}
	int[] dest = new int[length];
	if(length == 1) {
	dest[0] = script;
	return dest;
	}
	length = 0;
	for(int i = 0; i < numScripts; ++i) {
	if(scriptsIndex[i] == index) {
	dest[length++] = i;
	}
	}
	return dest;
	}

	/**
	* Writes the permutation of primary-weight ranges
	* for the given reordering of scripts and groups.
	* The caller checks for illegal arguments and
	* takes care of [DEFAULT] and memory allocation.
	*
	* <p>Each list element will be a (limit, offset) pair as described
	* for the CollationSettings.reorderRanges.
	* The list will be empty if no ranges are reordered.
	*/
	void makeReorderRanges(int[] reorder, UVector32 ranges) {
	makeReorderRanges(reorder, false, ranges);
	}

	private void makeReorderRanges(int[] reorder, boolean latinMustMove, UVector32 ranges) {
	ranges.removeAllElements();
	int length = reorder.length;
	if(length == 0 \|\| (length == 1 && reorder[0] == UScript.UNKNOWN)) {
	return;
	}

	// Maps each script-or-group range to a new lead byte.
	short[] table = new short[scriptStarts.length - 1]; // C++: uint8_t[]

	{
	// Set "don't care" values for reserved ranges.
	int index = scriptsIndex[
	numScripts + REORDER_RESERVED_BEFORE_LATIN - Collator.ReorderCodes.FIRST];
	if(index != 0) {
	table[index] = 0xff;
	}
	index = scriptsIndex[
	numScripts + REORDER_RESERVED_AFTER_LATIN - Collator.ReorderCodes.FIRST];
	if(index != 0) {
	table[index] = 0xff;
	}
	}

	// Never reorder special low and high primary lead bytes.
	assert(scriptStarts.length >= 2);
	assert(scriptStarts[0] == 0);
	int lowStart = scriptStarts[1];
	assert(lowStart == ((Collation.MERGE_SEPARATOR_BYTE + 1) << 8));
	int highLimit = scriptStarts[scriptStarts.length - 1];
	assert(highLimit == (Collation.TRAIL_WEIGHT_BYTE << 8));

	// Get the set of special reorder codes in the input list.
	// This supports a fixed number of special reorder codes;
	// it works for data with codes beyond Collator.ReorderCodes.LIMIT.
	int specials = 0;
	for(int i = 0; i < length; ++i) {
	int reorderCode = reorder[i] - Collator.ReorderCodes.FIRST;
	if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
	specials \|= 1 << reorderCode;
	}
	}

	// Start the reordering with the special low reorder codes that do not occur in the input.
	for(int i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
	int index = scriptsIndex[numScripts + i];
	if(index != 0 && (specials & (1 << i)) == 0) {
	lowStart = addLowScriptRange(table, index, lowStart);
	}
	}

	// Skip the reserved range before Latin if Latin is the first script,
	// so that we do not move it unnecessarily.
	int skippedReserved = 0;
	if(specials == 0 && reorder[0] == UScript.LATIN && !latinMustMove) {
	int index = scriptsIndex[UScript.LATIN];
	assert(index != 0);
	int start = scriptStarts[index];
	assert(lowStart <= start);
	skippedReserved = start - lowStart;
	lowStart = start;
	}

	// Reorder according to the input scripts, continuing from the bottom of the primary range.
	boolean hasReorderToEnd = false;
	for(int i = 0; i < length;) {
	int script = reorder[i++];
	if(script == UScript.UNKNOWN) {
	// Put the remaining scripts at the top.
	hasReorderToEnd = true;
	while(i < length) {
	script = reorder[--length];
	if(script == UScript.UNKNOWN) { // Must occur at most once.
	throw new IllegalArgumentException(
	"setReorderCodes(): duplicate UScript.UNKNOWN");
	}
	if(script == Collator.ReorderCodes.DEFAULT) {
	throw new IllegalArgumentException(
	"setReorderCodes(): UScript.DEFAULT together with other scripts");
	}
	int index = getScriptIndex(script);
	if(index == 0) { continue; }
	if(table[index] != 0) { // Duplicate or equivalent script.
	throw new IllegalArgumentException(
	"setReorderCodes(): duplicate or equivalent script " +
	scriptCodeString(script));
	}
	highLimit = addHighScriptRange(table, index, highLimit);
	}
	break;
	}
	if(script == Collator.ReorderCodes.DEFAULT) {
	// The default code must be the only one in the list, and that is handled by the caller.
	// Otherwise it must not be used.
	throw new IllegalArgumentException(
	"setReorderCodes(): UScript.DEFAULT together with other scripts");
	}
	int index = getScriptIndex(script);
	if(index == 0) { continue; }
	if(table[index] != 0) { // Duplicate or equivalent script.
	throw new IllegalArgumentException(
	"setReorderCodes(): duplicate or equivalent script " +
	scriptCodeString(script));
	}
	lowStart = addLowScriptRange(table, index, lowStart);
	}

	// Put all remaining scripts into the middle.
	for(int i = 1; i < scriptStarts.length - 1; ++i) {
	int leadByte = table[i];
	if(leadByte != 0) { continue; }
	int start = scriptStarts[i];
	if(!hasReorderToEnd && start > lowStart) {
	// No need to move this script.
	lowStart = start;
	}
	lowStart = addLowScriptRange(table, i, lowStart);
	}
	if(lowStart > highLimit) {
	if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
	// Try not skipping the before-Latin reserved range.
	makeReorderRanges(reorder, true, ranges);
	return;
	}
	// We need more primary lead bytes than available, despite the reserved ranges.
	throw new ICUException(
	"setReorderCodes(): reordering too many partial-primary-lead-byte scripts");
	}

	// Turn lead bytes into a list of (limit, offset) pairs.
	// Encode each pair in one list element:
	// Upper 16 bits = limit, lower 16 = signed lead byte offset.
	int offset = 0;
	for(int i = 1;; ++i) {
	int nextOffset = offset;
	while(i < scriptStarts.length - 1) {
	int newLeadByte = table[i];
	if(newLeadByte == 0xff) {
	// "Don't care" lead byte for reserved range, continue with current offset.
	} else {
	nextOffset = newLeadByte - (scriptStarts[i] >> 8);
	if(nextOffset != offset) { break; }
	}
	++i;
	}
	if(offset != 0 \|\| i < scriptStarts.length - 1) {
	ranges.addElement(((int)scriptStarts[i] << 16) \| (offset & 0xffff));
	}
	if(i == scriptStarts.length - 1) { break; }
	offset = nextOffset;
	}
	}

	private int addLowScriptRange(short[] table, int index, int lowStart) {
	int start = scriptStarts[index];
	if((start & 0xff) < (lowStart & 0xff)) {
	lowStart += 0x100;
	}
	table[index] = (short)(lowStart >> 8);
	int limit = scriptStarts[index + 1];
	lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) \| (limit & 0xff);
	return lowStart;
	}

	private int addHighScriptRange(short[] table, int index, int highLimit) {
	int limit = scriptStarts[index + 1];
	if((limit & 0xff) > (highLimit & 0xff)) {
	highLimit -= 0x100;
	}
	int start = scriptStarts[index];
	highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) \| (start & 0xff);
	table[index] = (short)(highLimit >> 8);
	return highLimit;
	}

	private static String scriptCodeString(int script) {
	// Do not use the script name here: We do not want to depend on that data.
	return (script < Collator.ReorderCodes.FIRST) ?
	Integer.toString(script) : "0x" + Integer.toHexString(script);
	}

	private static final int[] EMPTY_INT_ARRAY = new int[0];

	/** @see jamoCE32s */
	static final int JAMO_CE32S_LENGTH = 19 + 21 + 27;

	/** Main lookup trie. */
	Trie2_32 trie;
	/**
	* Array of CE32 values.
	* At index 0 there must be CE32(U+0000)
	* to support U+0000's special-tag for NUL-termination handling.
	*/
	int[] ce32s;
	/** Array of CE values for expansions and OFFSET_TAG. */
	long[] ces;
	/** Array of prefix and contraction-suffix matching data. */
	String contexts;
	/** Base collation data, or null if this data itself is a base. */
	public CollationData base;
	/**
	* Simple array of JAMO_CE32S_LENGTH=19+21+27 CE32s, one per canonical Jamo L/V/T.
	* They are normally simple CE32s, rarely expansions.
	* For fast handling of HANGUL_TAG.
	*/
	int[] jamoCE32s = new int[JAMO_CE32S_LENGTH];
	public Normalizer2Impl nfcImpl;
	/** The single-byte primary weight (xx000000) for numeric collation. */
	long numericPrimary = 0x12000000;

	/** 256 flags for which primary-weight lead bytes are compressible. */
	public boolean[] compressibleBytes;
	/**
	* Set of code points that are unsafe for starting string comparison after an identical prefix,
	* or in backwards CE iteration.
	*/
	UnicodeSet unsafeBackwardSet;

	/**
	* Fast Latin table for common-Latin-text string comparisons.
	* Data structure see class CollationFastLatin.
	*/
	public char[] fastLatinTable;
	/**
	* Header portion of the fastLatinTable.
	* In C++, these are one array, and the header is skipped for mapping characters.
	* In Java, two arrays work better.
	*/
	char[] fastLatinTableHeader;

	/**
	* Data for scripts and reordering groups.
	* Uses include building a reordering permutation table and
	* providing script boundaries to AlphabeticIndex.
	*/
	int numScripts;
	/**
	* The length of scriptsIndex is numScripts+16.
	* It maps from a UScriptCode or a special reorder code to an entry in scriptStarts.
	* 16 special reorder codes (not all used) are mapped starting at numScripts.
	* Up to MAX_NUM_SPECIAL_REORDER_CODES are codes for special groups like space/punct/digit.
	* There are special codes at the end for reorder-reserved primary ranges.
	*
	* <p>Multiple scripts may share a range and index, for example Hira & Kana.
	*/
	char[] scriptsIndex;
	/**
	* Start primary weight (top 16 bits only) for a group/script/reserved range
	* indexed by scriptsIndex.
	* The first range (separators & terminators) and the last range (trailing weights)
	* are not reorderable, and no scriptsIndex entry points to them.
	*/
	char[] scriptStarts;

	/**
	* Collation elements in the root collator.
	* Used by the CollationRootElements class. The data structure is described there.
	* null in a tailoring.
	*/
	public long[] rootElements;
	}