i18n/alphaindex.cpp - platform/external/icu4c.git - Git at Google

 /*
 *******************************************************************************
 * Copyright (C) 2009-2011, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */

 /**
  * \file
  * \brief C API: AlphabeticIndex class
  */

 #include "unicode/utypes.h"

 #include "unicode/alphaindex.h"
 #include "unicode/coll.h"
 #include "unicode/normalizer2.h"
 #include "unicode/strenum.h"
 #include "unicode/tblcoll.h"
 #include "unicode/ulocdata.h"
 #include "unicode/uniset.h"
 #include "unicode/uobject.h"
 #include "unicode/uscript.h"
 #include "unicode/usetiter.h"
 #include "unicode/ustring.h"

 #include "cstring.h"
 #include "mutex.h"
 #include "uassert.h"
 #include "ucln_in.h"
 #include "uhash.h"
 #include "uvector.h"

 //#include <string>
 // BEGIN android-removed
 // Apply the change from ICU trunk.
 // #include <iostream>
 // END android-removed
 U_NAMESPACE_BEGIN

 UOBJECT_DEFINE_NO_RTTI_IMPLEMENTATION(AlphabeticIndex)

 // Forward Declarations
 static int32_t U_CALLCONV
 PreferenceComparator(const void *context, const void *left, const void *right);

 static int32_t U_CALLCONV
 sortCollateComparator(const void *context, const void *left, const void *right);

 static int32_t U_CALLCONV
 recordCompareFn(const void *context, const void *left, const void *right);

 //
 //  UHash support function, delete a UnicodeSet
 //     TODO:  move this function into uhash.
 //
 static void U_CALLCONV
 uhash_deleteUnicodeSet(void *obj) {
     delete static_cast<UnicodeSet *>(obj);
 }

 //  UVector<Bucket *> support function, delete a Bucket.
 static void U_CALLCONV
 alphaIndex_deleteBucket(void *obj) {
     delete static_cast<AlphabeticIndex::Bucket *>(obj);
 }

 //  UVector<Record *> support function, delete a Record.
 static void U_CALLCONV
 alphaIndex_deleteRecord(void *obj) {
     delete static_cast<AlphabeticIndex::Record *>(obj);
 }


 static const Normalizer2 *nfkdNormalizer;

 //
 //  Append the contents of a UnicodeSet to a UVector of UnicodeStrings.
 //  Append everything - individual characters are handled as strings of length 1.
 //  The destination vector owns the appended strings.

 static void appendUnicodeSetToUVector(UVector &dest, const UnicodeSet &source, UErrorCode &status) {
     UnicodeSetIterator setIter(source);
     while (setIter.next()) {
         const UnicodeString &str = setIter.getString();
         dest.addElement(str.clone(), status);
     }
 }


 AlphabeticIndex::AlphabeticIndex(const Locale &locale, UErrorCode &status) {
     init(status);
     if (U_FAILURE(status)) {
         return;
     }
     locale_ = locale;
     langType_ = langTypeFromLocale(locale_);

     collator_ = Collator::createInstance(locale, status);
     if (collator_ != NULL) {
         collatorPrimaryOnly_ = collator_->clone();
     }
     if (collatorPrimaryOnly_ != NULL) {
         collatorPrimaryOnly_->setStrength(Collator::PRIMARY);
     }
     getIndexExemplars(*initialLabels_, locale, status);
     indexBuildRequired_ = TRUE;
     if ((collator_ == NULL || collatorPrimaryOnly_ == NULL) && U_SUCCESS(status)) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
     firstScriptCharacters_ = firstStringsInScript(status);
 }


 AlphabeticIndex::~AlphabeticIndex() {
     uhash_close(alreadyIn_);
     delete bucketList_;
     delete collator_;
     delete collatorPrimaryOnly_;
     delete firstScriptCharacters_;
     delete labels_;
     delete inputRecords_;
     delete noDistinctSorting_;
     delete notAlphabetic_;
     delete initialLabels_;
 }


 AlphabeticIndex &AlphabeticIndex::addLabels(const UnicodeSet &additions, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     initialLabels_->addAll(additions);
     return *this;
 }


 AlphabeticIndex &AlphabeticIndex::addLabels(const Locale &locale, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     UnicodeSet additions;
     getIndexExemplars(additions, locale, status);
     initialLabels_->addAll(additions);
     return *this;
 }


 int32_t AlphabeticIndex::getBucketCount(UErrorCode &status) {
     buildIndex(status);
     if (U_FAILURE(status)) {
         return 0;
     }
     return bucketList_->size();
 }


 int32_t AlphabeticIndex::getRecordCount(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return 0;
     }
     return inputRecords_->size();
 }


 void AlphabeticIndex::buildIndex(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return;
     }
     if (!indexBuildRequired_) {
         return;
     }

     // Discard any already-built data.
     // This is important when the user builds and uses an index, then subsequently modifies it,
     // necessitating a rebuild.

     bucketList_->removeAllElements();
     labels_->removeAllElements();
     uhash_removeAll(alreadyIn_);
     noDistinctSorting_->clear();
     notAlphabetic_->clear();

     // first sort the incoming Labels, with a "best" ordering among items
     // that are the same according to the collator

     UVector preferenceSorting(status);   // Vector of UnicodeStrings; owned by the vector.
     preferenceSorting.setDeleter(uhash_deleteUnicodeString);
     appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
     preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);

     // We now make a set of Labels.
     // Some of the input may, however, be redundant.
     // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
     // So we make a pass through, filtering out those cases.
     // TODO: filtering these out would seem to be at odds with the eventual goal
     //       of being able to split buckets that contain too many items.

     UnicodeSet labelSet;
     for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
         UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
         // TODO:  Since preferenceSorting was originally populated from the contents of a UnicodeSet,
         //        is it even possible for duplicates to show up in this check?
         if (labelSet.contains(item)) {
             UnicodeSetIterator itemAlreadyInIter(labelSet);
             while (itemAlreadyInIter.next()) {
                 const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
                 if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
                     UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
                     if (targets == NULL) {
                         // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
                         targets = new UnicodeSet();
                         uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
                     }
                     targets->add(item);
                     break;
                 }
             }
         } else if (item.moveIndex32(0, 1) < item.length() &&  // Label contains more than one code point.
                    collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
             noDistinctSorting_->add(item);
         } else if (!ALPHABETIC->containsSome(item)) {
             notAlphabetic_->add(item);
         } else {
             labelSet.add(item);
         }
     }

     // Move the set of Labels from the set into a vector, and sort
     // according to the collator.

     appendUnicodeSetToUVector(*labels_, labelSet, status);
     labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);

     // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
     //    Implemented by copying the elements to be retained to a new UVector.

     const int32_t size = labelSet.size() - 1;
     if (size > maxLabelCount_) {
         UVector *newLabels = new UVector(status);
         newLabels->setDeleter(uhash_deleteUnicodeString);
         int32_t count = 0;
         int32_t old = -1;
         for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
             const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
             ++count;
             const int32_t bump = count * maxLabelCount_ / size;
             if (bump == old) {
                 // it.remove();
             } else {
                 newLabels->addElement(str->clone(), status);
                 old = bump;
             }
         }
         delete labels_;
         labels_ = newLabels;
     }

     // We now know the list of labels.
     // Create a corresponding list of buckets, one per label.

     buildBucketList(status);    // Corresponds to Java BucketList constructor.

     // Bin the Records into the Buckets.
     bucketRecords(status);

     indexBuildRequired_ = FALSE;
     resetBucketIterator(status);
 }

 //
 //  buildBucketList()    Corresponds to the BucketList constructor in the Java version.

 void AlphabeticIndex::buildBucketList(UErrorCode &status) {
     UnicodeString labelStr = getUnderflowLabel();
     Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
     bucketList_->addElement(b, status);

     // Build up the list, adding underflow, additions, overflow
     // insert infix labels as needed, using \uFFFF.
     const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
     b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
     bucketList_->addElement(b, status);

     UnicodeSet lastSet;
     UnicodeSet set;
     AlphabeticIndex::getScriptSet(lastSet, *last, status);
     lastSet.removeAll(*IGNORE_SCRIPTS);

     for (int i = 1; i < labels_->size(); ++i) {
         UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
         getScriptSet(set, *current, status);
         set.removeAll(*IGNORE_SCRIPTS);
         if (lastSet.containsNone(set)) {
             // check for adjacent
             const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
             if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
                 labelStr = getInflowLabel();
                 b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
                 bucketList_->addElement(b, status);
                 i++;
                 lastSet = set;
             }
         }
         b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
         bucketList_->addElement(b, status);
         last = current;
         lastSet = set;
     }
     const UnicodeString &limitString = getOverflowComparisonString(*last, status);
     b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
     bucketList_->addElement(b, status);
     // final overflow bucket
 }


 //
 //   Place all of the raw input records into the correct bucket.
 //
 //       Begin by sorting the input records; this lets us bin them in a single pass.
 //
 //       Note on storage management:  The input records are owned by the
 //       inputRecords_ vector, and will (eventually) be auto-deleted by it.
 //       The Bucket objects have pointers to the Record objects, but do not own them.
 //
 void AlphabeticIndex::bucketRecords(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return;
     }

     inputRecords_->sortWithUComparator(recordCompareFn, collator_, status);
     U_ASSERT(bucketList_->size() > 0);   // Should always have at least an overflow
                                          //   bucket, even if no user labels.
     int32_t bucketIndex = 0;
     Bucket *destBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex));
     Bucket *nextBucket = NULL;
     if (bucketIndex+1 < bucketList_->size()) {
         nextBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex+1));
     }
     int32_t recordIndex = 0;
     Record *r = static_cast<Record *>(inputRecords_->elementAt(recordIndex));
     while (recordIndex < inputRecords_->size()) {
         if (nextBucket == NULL ||
             collatorPrimaryOnly_->compare(r->sortingName_, nextBucket->lowerBoundary_) < 0) {
                 // Record goes in current bucket.  Advance to next record,
                 // stay on current bucket.
                 destBucket->records_->addElement(r, status);
                 ++recordIndex;
                 r = static_cast<Record *>(inputRecords_->elementAt(recordIndex));
         } else {
             // Advance to the next bucket, stay on current record.
             bucketIndex++;
             destBucket = nextBucket;
             if (bucketIndex+1 < bucketList_->size()) {
                 nextBucket = static_cast<Bucket *>(bucketList_->elementAt(bucketIndex+1));
             } else {
                 nextBucket = NULL;
             }
             U_ASSERT(destBucket != NULL);
         }
     }

 }


 void AlphabeticIndex::getIndexExemplars(UnicodeSet  &dest, const Locale &locale, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return;
     }

     LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
     UnicodeSet exemplars;
     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
     if (U_SUCCESS(status)) {
         dest.addAll(exemplars);
         return;
     }
     status = U_ZERO_ERROR;  // Clear out U_MISSING_RESOURCE_ERROR

     // Locale data did not include explicit Index characters.
     // Synthesize a set of them from the locale's standard exemplar characters.

     ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
     if (U_FAILURE(status)) {
         return;
     }

     // Upper-case any that aren't already so.
     //   (We only do this for synthesized index characters.)

     UnicodeSetIterator it(exemplars);
     UnicodeString upperC;
     UnicodeSet  lowersToRemove;
     UnicodeSet  uppersToAdd;
     while (it.next()) {
         const UnicodeString &exemplarC = it.getString();
         upperC = exemplarC;
         upperC.toUpper(locale);
         if (exemplarC != upperC) {
             lowersToRemove.add(exemplarC);
             uppersToAdd.add(upperC);
         }
     }
     exemplars.removeAll(lowersToRemove);
     exemplars.addAll(uppersToAdd);

     // get the exemplars, and handle special cases

     // question: should we add auxiliary exemplars?
     if (exemplars.containsSome(*CORE_LATIN)) {
         exemplars.addAll(*CORE_LATIN);
     }
     if (exemplars.containsSome(*HANGUL)) {
         // cut down to small list
         UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status);
         exemplars.removeAll(BLOCK_HANGUL_SYLLABLES);
         exemplars.addAll(*HANGUL);
     }
     if (exemplars.containsSome(*ETHIOPIC)) {
         // cut down to small list
         // make use of the fact that Ethiopic is allocated in 8's, where
         // the base is 0 mod 8.
         UnicodeSetIterator  it(*ETHIOPIC);
         while (it.next() && !it.isString()) {
             if ((it.getCodepoint() & 0x7) != 0) {
                 exemplars.remove(it.getCodepoint());
             }
         }
     }
     dest.addAll(exemplars);
 }


 /*
  * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
  */
 static const UChar32 CGJ = (UChar)0x034F;
 UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
     UnicodeString result;
     if (item.length() == 0) {
         return result;
     }
     int32_t i = 0;
     for (;;) {
         UChar32  cp = item.char32At(i);
         result.append(cp);
         i = item.moveIndex32(i, 1);
         if (i >= item.length()) {
             break;
         }
         result.append(CGJ);
     }
     return result;
 }


 UBool AlphabeticIndex::operator==(const AlphabeticIndex& /* other */) const {
     return FALSE;
 }


 UBool AlphabeticIndex::operator!=(const AlphabeticIndex& /* other */) const {
     return FALSE;
 }


 const RuleBasedCollator &AlphabeticIndex::getCollator() const {
     // There are no known non-RuleBasedCollator collators, and none ever expected.
     // But, in case that changes, better a null pointer than a wrong type.
     return *dynamic_cast<RuleBasedCollator *>(collator_);
 }


 const UnicodeString &AlphabeticIndex::getInflowLabel() const {
     return inflowLabel_;
 }

 const UnicodeString &AlphabeticIndex::getOverflowLabel() const {
     return overflowLabel_;
 }


 const UnicodeString &AlphabeticIndex::getUnderflowLabel() const {
     return underflowLabel_;
 }


 AlphabeticIndex &AlphabeticIndex::setInflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     inflowLabel_ = label;
     indexBuildRequired_ = TRUE;
     return *this;
 }


 AlphabeticIndex &AlphabeticIndex::setOverflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     overflowLabel_ = label;
     indexBuildRequired_ = TRUE;
     return *this;
 }


 AlphabeticIndex &AlphabeticIndex::setUnderflowLabel(const UnicodeString &label, UErrorCode &/*status*/) {
     underflowLabel_ = label;
     indexBuildRequired_ = TRUE;
     return *this;
 }


 int32_t AlphabeticIndex::getMaxLabelCount() const {
     return maxLabelCount_;
 }


 AlphabeticIndex &AlphabeticIndex::setMaxLabelCount(int32_t maxLabelCount, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     if (maxLabelCount <= 0) {
         status = U_ILLEGAL_ARGUMENT_ERROR;
         return *this;
     }
     maxLabelCount_ = maxLabelCount;
     if (maxLabelCount < bucketList_->size()) {
         indexBuildRequired_ = TRUE;
     }
     return *this;
 }


 const UnicodeString &AlphabeticIndex::getOverflowComparisonString(const UnicodeString &lowerLimit, UErrorCode &/*status*/) {
     for (int32_t i=0; i<firstScriptCharacters_->size(); i++) {
         const UnicodeString *s =
                 static_cast<const UnicodeString *>(firstScriptCharacters_->elementAt(i));
         if (collator_->compare(*s, lowerLimit) > 0) {
             return *s;
         }
     }
     return *EMPTY_STRING;
 }

 UnicodeSet *AlphabeticIndex::getScriptSet(UnicodeSet &dest, const UnicodeString &codePoint, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return &dest;
     }
     UChar32 cp = codePoint.char32At(0);
     UScriptCode scriptCode = uscript_getScript(cp, &status);
     dest.applyIntPropertyValue(UCHAR_SCRIPT, scriptCode, status);
     return &dest;
 }

 //
 //  init() - Common code for constructors.
 //

 void AlphabeticIndex::init(UErrorCode &status) {
     // Initialize statics if needed.
     AlphabeticIndex::staticInit(status);

     // Put the object into a known state so that the destructor will function.

     alreadyIn_             = NULL;
     bucketList_            = NULL;
     collator_              = NULL;
     collatorPrimaryOnly_   = NULL;
     currentBucket_         = NULL;
     firstScriptCharacters_ = NULL;
     initialLabels_         = NULL;
     indexBuildRequired_    = TRUE;
     inputRecords_          = NULL;
     itemsIterIndex_        = 0;
     labels_                = NULL;
     labelsIterIndex_       = 0;
     maxLabelCount_         = 99;
     noDistinctSorting_     = NULL;
     notAlphabetic_         = NULL;
     recordCounter_         = 0;

     if (U_FAILURE(status)) {
         return;
     }
     alreadyIn_             = uhash_open(uhash_hashUnicodeString,    // Key Hash,
                                         uhash_compareUnicodeString, // key Comparator,
                                         NULL,                       // value Comparator
                                         &status);
     uhash_setKeyDeleter(alreadyIn_, uhash_deleteUnicodeString);
     uhash_setValueDeleter(alreadyIn_, uhash_deleteUnicodeSet);

     bucketList_            = new UVector(status);
     bucketList_->setDeleter(alphaIndex_deleteBucket);
     labels_                = new UVector(status);
     labels_->setDeleter(uhash_deleteUnicodeString);
     labels_->setComparer(uhash_compareUnicodeString);
     inputRecords_          = new UVector(status);
     inputRecords_->setDeleter(alphaIndex_deleteRecord);

     noDistinctSorting_     = new UnicodeSet();
     notAlphabetic_         = new UnicodeSet();
     initialLabels_         = new UnicodeSet();

     inflowLabel_.remove();
     inflowLabel_.append((UChar)0x2026);    // Ellipsis
     overflowLabel_ = inflowLabel_;
     underflowLabel_ = inflowLabel_;

     // TODO:  check for memory allocation failures.
 }


 static  UBool  indexCharactersAreInitialized = FALSE;

 //  Index Characters Clean up function.  Delete statically allocated constant stuff.
 U_CDECL_BEGIN
 static UBool U_CALLCONV indexCharacters_cleanup(void) {
     AlphabeticIndex::staticCleanup();
     return TRUE;
 }
 U_CDECL_END

 void AlphabeticIndex::staticCleanup() {
     delete ALPHABETIC;
     ALPHABETIC = NULL;
     delete HANGUL;
     HANGUL = NULL;
     delete ETHIOPIC;
     ETHIOPIC = NULL;
     delete CORE_LATIN;
     CORE_LATIN = NULL;
     delete IGNORE_SCRIPTS;
     IGNORE_SCRIPTS = NULL;
     delete TO_TRY;
     TO_TRY = NULL;
     delete UNIHAN;
     UNIHAN = NULL;
     delete EMPTY_STRING;
     EMPTY_STRING = NULL;
     nfkdNormalizer = NULL;  // ref to a singleton.  Do not delete.
     indexCharactersAreInitialized = FALSE;
 }


 UnicodeSet *AlphabeticIndex::ALPHABETIC;
 UnicodeSet *AlphabeticIndex::HANGUL;
 UnicodeSet *AlphabeticIndex::ETHIOPIC;
 UnicodeSet *AlphabeticIndex::CORE_LATIN;
 UnicodeSet *AlphabeticIndex::IGNORE_SCRIPTS;
 UnicodeSet *AlphabeticIndex::TO_TRY;
 UnicodeSet *AlphabeticIndex::UNIHAN;
 const UnicodeString *AlphabeticIndex::EMPTY_STRING;

 //
 //  staticInit()    One-time initialization of constants.
 //                  Thread safe.  Called from constructors.
 //                  Mutex overhead is not a concern.  AlphabeticIndex constructors are
 //                  sufficiently heavy that the cost of the mutex check is not significant.

 void AlphabeticIndex::staticInit(UErrorCode &status) {
     static UMTX IndexCharsInitMutex;

     Mutex mutex(&IndexCharsInitMutex);
     if (indexCharactersAreInitialized || U_FAILURE(status)) {
         return;
     }
     UBool finishedInit = FALSE;

     {
         UnicodeString alphaString = UNICODE_STRING_SIMPLE("[[:alphabetic:]-[:mark:]]");
         ALPHABETIC = new UnicodeSet(alphaString, status);
         if (ALPHABETIC == NULL) {
             goto err;
         }

         HANGUL = new UnicodeSet();
         HANGUL->add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).add(0xB9C8).add(0xBC14).add(0xC0AC).
                 add(0xC544).add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).add(0xD30C).add(0xD558);
         if (HANGUL== NULL) {
             goto err;
         }


         UnicodeString EthiopicStr = UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]");
         ETHIOPIC = new UnicodeSet(EthiopicStr, status);
         if (ETHIOPIC == NULL) {
             goto err;
         }

         CORE_LATIN = new UnicodeSet((UChar32)0x61, (UChar32)0x7a);  // ('a', 'z');
         if (CORE_LATIN == NULL) {
             goto err;
         }

         UnicodeString IgnoreStr= UNICODE_STRING_SIMPLE(
                 "[[:sc=Common:][:sc=inherited:][:script=Unknown:][:script=braille:]]");
         IGNORE_SCRIPTS = new UnicodeSet(IgnoreStr, status);
         IGNORE_SCRIPTS->freeze();
         if (IGNORE_SCRIPTS == NULL) {
             goto err;
         }

         UnicodeString nfcqcStr = UNICODE_STRING_SIMPLE("[:^nfcqc=no:]");
         TO_TRY = new UnicodeSet(nfcqcStr, status);
         if (TO_TRY == NULL) {
             goto err;
         }

         UnicodeString unihanStr = UNICODE_STRING_SIMPLE("[:script=Hani:]");
         UNIHAN = new UnicodeSet(unihanStr, status);
         if (UNIHAN == NULL) {
             goto err;
         }

         EMPTY_STRING = new UnicodeString();

         nfkdNormalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, status);
         if (nfkdNormalizer == NULL) {
             goto err;
         }
     }
     finishedInit = TRUE;

   err:
     if (!finishedInit && U_SUCCESS(status)) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
     if (U_FAILURE(status)) {
         indexCharacters_cleanup();
         return;
     }
     ucln_i18n_registerCleanup(UCLN_I18N_INDEX_CHARACTERS, indexCharacters_cleanup);
     indexCharactersAreInitialized = TRUE;
 }


 //
 //  Comparison function for UVector<UnicodeString *> sorting with a collator.
 //
 static int32_t U_CALLCONV
 sortCollateComparator(const void *context, const void *left, const void *right) {
     const UHashTok *leftTok = static_cast<const UHashTok *>(left);
     const UHashTok *rightTok = static_cast<const UHashTok *>(right);
     const UnicodeString *leftString  = static_cast<const UnicodeString *>(leftTok->pointer);
     const UnicodeString *rightString = static_cast<const UnicodeString *>(rightTok->pointer);
     const Collator *col = static_cast<const Collator *>(context);

     if (leftString == rightString) {
         // Catches case where both are NULL
         return 0;
     }
     if (leftString == NULL) {
         return 1;
     };
     if (rightString == NULL) {
         return -1;
     }
     Collator::EComparisonResult r = col->compare(*leftString, *rightString);
     return (int32_t) r;
 }

 //
 //  Comparison function for UVector<Record *> sorting with a collator.
 //
 static int32_t U_CALLCONV
 recordCompareFn(const void *context, const void *left, const void *right) {
     const UHashTok *leftTok = static_cast<const UHashTok *>(left);
     const UHashTok *rightTok = static_cast<const UHashTok *>(right);
     const AlphabeticIndex::Record *leftRec  = static_cast<const AlphabeticIndex::Record *>(leftTok->pointer);
     const AlphabeticIndex::Record *rightRec = static_cast<const AlphabeticIndex::Record *>(rightTok->pointer);
     const Collator *col = static_cast<const Collator *>(context);

     Collator::EComparisonResult r = col->compare(leftRec->sortingName_, rightRec->sortingName_);
     if (r == Collator::EQUAL) {
         if (leftRec->serialNumber_ < rightRec->serialNumber_) {
             r = Collator::LESS;
         } else if (leftRec->serialNumber_ > rightRec->serialNumber_) {
             r = Collator::GREATER;
         }
     }
     return (int32_t) r;
 }


 #if 0
 //
 //  First characters in scripts.
 //  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
 //  The vector is sorted according to this index's collation.
 //
 //  This code is too slow to use, so for now hard code the data.
 //    Hard coded implementation is follows.
 //
 UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {

     if (U_FAILURE(status)) {
         return NULL;
     }

     UnicodeString results[USCRIPT_CODE_LIMIT];
     UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");

     UnicodeSetIterator siter(*TO_TRY);
     while (siter.next()) {
         const UnicodeString &current = siter.getString();
         Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
         if (r < 0) {  // TODO fix; we only want "real" script characters, not
                       // symbols.
             continue;
         }

         int script = uscript_getScript(current.char32At(0), &status);
         if (results[script].length() == 0) {
             results[script] = current;
         }
         else if (ruleBasedCollator->compare(current, results[script]) < 0) {
             results[script] = current;
         }
     }

     UnicodeSet extras;
     UnicodeSet expansions;
     RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
     const UCollator *uRuleBasedCollator = rbc->getUCollator();
     ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
     extras.addAll(expansions).removeAll(*TO_TRY);
     if (extras.size() != 0) {
         const Normalizer2 *normalizer = Normalizer2::getInstance(NULL, "nfkc", UNORM2_COMPOSE, status);
         UnicodeSetIterator extrasIter(extras);
         while (extrasIter.next()) {
             const UnicodeString &current = extrasIter.next();
             if (!TO_TRY->containsAll(current))
                 continue;
             if (!normalizer->isNormalized(current, status) ||
                 ruleBasedCollator->compare(current, LOWER_A) < 0) {
                 continue;
             }
             int script = uscript_getScript(current.char32At(0), &status);
             if (results[script].length() == 0) {
                 results[script] = current;
             } else if (ruleBasedCollator->compare(current, results[script]) < 0) {
                 results[script] = current;
             }
         }
     }

     UVector *dest = new UVector(status);
     dest->setDeleter(uhash_deleteUnicodeString);
     for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
         if (results[i].length() > 0) {
             dest->addElement(results[i].clone(), status);
         }
     }
     dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
     return dest;
 }
 #endif


 //
 //  First characters in scripts.
 //  Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
 //  The vector is sorted according to this index's collation.
 //
 //  It takes too much time to compute this from character properties, so hard code it for now.
 //  Character constants copied from corresponding declaration in ICU4J.

 static UChar HACK_FIRST_CHARS_IN_SCRIPTS[] =  { 0x61, 0, 0x03B1, 0,
             0x2C81, 0, 0x0430, 0, 0x2C30, 0, 0x10D0, 0, 0x0561, 0, 0x05D0, 0, 0xD802, 0xDD00, 0, 0x0800, 0, 0x0621, 0, 0x0710, 0,
             0x0780, 0, 0x07CA, 0, 0x2D30, 0, 0x1200, 0, 0x0950, 0, 0x0985, 0, 0x0A74, 0, 0x0AD0, 0, 0x0B05, 0, 0x0BD0, 0,
             0x0C05, 0, 0x0C85, 0, 0x0D05, 0, 0x0D85, 0, 0xABC0, 0, 0xA800, 0, 0xA882, 0, 0xD804, 0xDC83, 0, 0x1B83, 0,
             0xD802, 0xDE00, 0, 0x0E01, 0, 0x0E81, 0, 0xAA80, 0, 0x0F40, 0, 0x1C00, 0, 0xA840, 0, 0x1900, 0, 0x1700, 0, 0x1720, 0,
             0x1740, 0, 0x1760, 0, 0x1A00, 0, 0xA930, 0, 0xA90A, 0, 0x1000, 0, 0x1780, 0, 0x1950, 0, 0x1980, 0, 0x1A20, 0,
             0xAA00, 0, 0x1B05, 0, 0xA984, 0, 0x1880, 0, 0x1C5A, 0, 0x13A0, 0, 0x1401, 0, 0x1681, 0, 0x16A0, 0, 0xD803, 0xDC00, 0,
             0xA500, 0, 0xA6A0, 0, 0x1100, 0, 0x3041, 0, 0x30A1, 0, 0x3105, 0, 0xA000, 0, 0xA4F8, 0, 0xD800, 0xDE80, 0,
             0xD800, 0xDEA0, 0, 0xD802, 0xDD20, 0, 0xD800, 0xDF00, 0, 0xD800, 0xDF30, 0, 0xD801, 0xDC28, 0, 0xD801, 0xDC50, 0,
             0xD801, 0xDC80, 0, 0xD800, 0xDC00, 0, 0xD802, 0xDC00, 0, 0xD802, 0xDE60, 0, 0xD802, 0xDF00, 0, 0xD802, 0xDC40, 0,
             0xD802, 0xDF40, 0, 0xD802, 0xDF60, 0, 0xD800, 0xDF80, 0, 0xD800, 0xDFA0, 0, 0xD808, 0xDC00, 0, 0xD80C, 0xDC00, 0, 0x4E00, 0 };

 UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return NULL;
     }
     UVector *dest = new UVector(status);
     if (dest == NULL && U_SUCCESS(status)) {
         status = U_MEMORY_ALLOCATION_ERROR;
         return NULL;
     }
     dest->setDeleter(uhash_deleteUnicodeString);
     const UChar *src  = HACK_FIRST_CHARS_IN_SCRIPTS;
     const UChar *limit = src + sizeof(HACK_FIRST_CHARS_IN_SCRIPTS) / sizeof(HACK_FIRST_CHARS_IN_SCRIPTS[0]);
     do {
         if (U_FAILURE(status)) {
             return dest;
         }
         UnicodeString *str = new UnicodeString(src, -1);
         if (str == NULL) {
             status = U_MEMORY_ALLOCATION_ERROR;
         }
         dest->addElement(str, status);
         src += str->length() + 1;
     } while (src < limit);
     dest->sortWithUComparator(sortCollateComparator, collator_, status);
     return dest;
 }


 AlphabeticIndex::ELangType AlphabeticIndex::langTypeFromLocale(const Locale &loc) {
     const char *lang = loc.getLanguage();
     if (uprv_strcmp(lang, "zh") != 0) {
         return kNormal;
     }
     const char *script = loc.getScript();
     if (uprv_strcmp(script, "Hant") == 0) {
         return kTraditional;
     }
     const char *country = loc.getCountry();
     if (uprv_strcmp(country, "TW") == 0) {
         return kTraditional;
     }
     return kSimplified;
 }


 //
 // Pinyin Hacks.  Direct port from Java.
 //

 static const UChar32  probeCharInLong = 0x28EAD;


 static const UChar PINYIN_LOWER_BOUNDS_SHORT[] = {      // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz"
             0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
             /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};


 // Pinyin lookup tables copied, pasted (and reformatted) from the ICU4J code.

 AlphabeticIndex::PinyinLookup AlphabeticIndex::HACK_PINYIN_LOOKUP_SHORT = {
         {(UChar)0,      (UChar)0, (UChar)0}, // A
         {(UChar)0x516B, (UChar)0, (UChar)0}, // B
         {(UChar)0x5693, (UChar)0, (UChar)0}, // C
         {(UChar)0x5491, (UChar)0, (UChar)0}, // D
         {(UChar)0x59B8, (UChar)0, (UChar)0}, // E
         {(UChar)0x53D1, (UChar)0, (UChar)0}, // F
         {(UChar)0x65EE, (UChar)0, (UChar)0}, // G
         {(UChar)0x54C8, (UChar)0, (UChar)0}, // H
         {(UChar)0x4E0C, (UChar)0, (UChar)0}, // J
         {(UChar)0x5494, (UChar)0, (UChar)0}, // K
         {(UChar)0x5783, (UChar)0, (UChar)0}, // L
         {(UChar)0x5452, (UChar)0, (UChar)0}, // M
         {(UChar)0x5514, (UChar)0, (UChar)0}, // N
         {(UChar)0x5594, (UChar)0, (UChar)0}, // O
         {(UChar)0x5991, (UChar)0, (UChar)0}, // P
         {(UChar)0x4E03, (UChar)0, (UChar)0}, // Q
         {(UChar)0x513F, (UChar)0, (UChar)0}, // R
         {(UChar)0x4EE8, (UChar)0, (UChar)0}, // S
         {(UChar)0x4ED6, (UChar)0, (UChar)0}, // T
         {(UChar)0x7A75, (UChar)0, (UChar)0}, // W
         {(UChar)0x5915, (UChar)0, (UChar)0}, // X
         {(UChar)0x4E2B, (UChar)0, (UChar)0}, // Y
         {(UChar)0x5E00, (UChar)0, (UChar)0}, // Z
         {(UChar)0xFFFF, (UChar)0, (UChar)0}, // mark end of array
     };

 static const UChar PINYIN_LOWER_BOUNDS_LONG[] = {   // "\u0101bcd\u0113fghjkl\u1E3F\u0144\u014Dpqrstwxyz";
             0x0101, 0x62, 0x63, 0x64, 0x0113, 0x66, 0x67, 0x68, 0x6A, 0x6B, /*l*/0x6C, 0x1E3F, 0x0144, 0x014D,
             /*p*/0x70, 0x71, 0x72, 0x73, 0x74, /*w*/0x77, 0x78, 0x79, 0x7A};

 AlphabeticIndex::PinyinLookup AlphabeticIndex::HACK_PINYIN_LOOKUP_LONG = {
         {(UChar)0,      (UChar)0,      (UChar)0}, // A
         {(UChar)0x516B, (UChar)0,      (UChar)0}, // b
         {(UChar)0xD863, (UChar)0xDEAD, (UChar)0}, // c
         {(UChar)0xD844, (UChar)0xDE51, (UChar)0}, // d
         {(UChar)0x59B8, (UChar)0,      (UChar)0}, // e
         {(UChar)0x53D1, (UChar)0,      (UChar)0}, // f
         {(UChar)0xD844, (UChar)0xDE45, (UChar)0}, // g
         {(UChar)0x54C8, (UChar)0,      (UChar)0}, // h
         {(UChar)0x4E0C, (UChar)0,      (UChar)0}, // j
         {(UChar)0x5494, (UChar)0,      (UChar)0}, // k
         {(UChar)0x3547, (UChar)0,      (UChar)0}, // l
         {(UChar)0x5452, (UChar)0,      (UChar)0}, // m
         {(UChar)0x5514, (UChar)0,      (UChar)0}, // n
         {(UChar)0x5594, (UChar)0,      (UChar)0}, // o
         {(UChar)0xD84F, (UChar)0xDC7A, (UChar)0}, // p
         {(UChar)0x4E03, (UChar)0,      (UChar)0}, // q
         {(UChar)0x513F, (UChar)0,      (UChar)0}, // r
         {(UChar)0x4EE8, (UChar)0,      (UChar)0}, // s
         {(UChar)0x4ED6, (UChar)0,      (UChar)0}, // t
         {(UChar)0x7A75, (UChar)0,      (UChar)0}, // w
         {(UChar)0x5915, (UChar)0,      (UChar)0}, // x
         {(UChar)0x4E2B, (UChar)0,      (UChar)0}, // y
         {(UChar)0x5E00, (UChar)0,      (UChar)0}, // z
         {(UChar)0xFFFF, (UChar)0,      (UChar)0}, // mark end of array
     };


 //
 //  Probe the collation data, and decide which Pinyin tables should be used
 //
 //  ICU can be built with a choice between two Chinese collations.
 //  The hack Pinyin tables to use depend on which one is in use.
 //  We can assume that any given copy of ICU will have only one of the collations available,
 //  and that there is no way, in a given process, to create two alphabetic indexes using
 //  different Chinese collations.  Which means the probe can be done once
 //  and the results cached.
 //
 //  This whole arrangement is temporary.
 //
 AlphabeticIndex::PinyinLookup *AlphabeticIndex::HACK_PINYIN_LOOKUP  = NULL;
 const UChar  *AlphabeticIndex::PINYIN_LOWER_BOUNDS = NULL;

 void AlphabeticIndex::initPinyinBounds(const Collator *col, UErrorCode &status) {
     {
         Mutex m;
         if (PINYIN_LOWER_BOUNDS != NULL) {
             return;
         }
     }
     UnicodeSet *colSet = col->getTailoredSet(status);
     if (U_FAILURE(status) || colSet == NULL) {
         delete colSet;
         if (U_SUCCESS(status))  {
             status = U_MEMORY_ALLOCATION_ERROR;
         }
         return;
     }
     UBool useLongTables = colSet->contains(probeCharInLong);
     delete colSet;
     {
         Mutex m;
         if (useLongTables) {
             PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG;
             HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_LONG;
         } else {
             PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT;
             HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_SHORT;
         }
     }
 }

 // Pinyin Hack:
 //    Modify a Chinese name by prepending a Latin letter.  The modified name is used
 //      when putting records (names) into buckets, to put the name under a Latin index heading.

 void AlphabeticIndex::hackName(UnicodeString &dest, const UnicodeString &name, const Collator *col) {

     if (langType_ != kSimplified || !UNIHAN->contains(name.char32At(0))) {
         dest = name;
         return;
     }

     UErrorCode status = U_ZERO_ERROR;
     initPinyinBounds(col, status);
     if (U_FAILURE(status)) {
         dest = name;
         return;
     }
     // TODO:  use binary search
     int index;
     for (index=0; ; index++) {
         if ((*HACK_PINYIN_LOOKUP)[index][0] == (UChar)0xffff) {
             index--;
             break;
         }
         int32_t compareResult = col->compare(name, (*HACK_PINYIN_LOOKUP)[index]);
         if (compareResult < 0) {
             index--;
         }
         if (compareResult <= 0) {
             break;
         }
     }
     UChar c = PINYIN_LOWER_BOUNDS[index];
     dest.setTo(c);
     dest.append(name);
     return;
 }


 /**
  * Comparator that returns "better" items first, where shorter NFKD is better, and otherwise NFKD binary order is
  * better, and otherwise binary order is better.
  *
  * For use with array sort or UVector.
  * @param context  A UErrorCode pointer.
  * @param left     A UHashTok pointer, which must refer to a UnicodeString *
  * @param right    A UHashTok pointer, which must refer to a UnicodeString *
  */

 static int32_t U_CALLCONV
 PreferenceComparator(const void *context, const void *left, const void *right) {
     const UHashTok *leftTok  = static_cast<const UHashTok *>(left);
     const UHashTok *rightTok = static_cast<const UHashTok *>(right);
     const UnicodeString *s1  = static_cast<const UnicodeString *>(leftTok->pointer);
     const UnicodeString *s2  = static_cast<const UnicodeString *>(rightTok->pointer);
     UErrorCode &status       = *(UErrorCode *)(context);   // Cast off both static and const.
     if (s1 == s2) {
         return 0;
     }

     UnicodeString n1 = nfkdNormalizer->normalize(*s1, status);
     UnicodeString n2 = nfkdNormalizer->normalize(*s2, status);
     int32_t result = n1.length() - n2.length();
     if (result != 0) {
         return result;
     }

     result = n1.compareCodePointOrder(n2);
     if (result != 0) {
         return result;
     }
     return s1->compareCodePointOrder(*s2);
 }


 //
 //  Constructor & Destructor for AlphabeticIndex::Record
 //
 //     Records are internal only, instances are not directly surfaced in the public API.
 //     This class is mostly struct-like, with all public fields.

 AlphabeticIndex::Record::Record(AlphabeticIndex *alphaIndex, const UnicodeString &name, const void *data):
     alphaIndex_(alphaIndex), name_(name), data_(data)
 {
     UnicodeString prefixedName;
     alphaIndex->hackName(sortingName_, name_, alphaIndex->collatorPrimaryOnly_);
     serialNumber_ = ++alphaIndex->recordCounter_;
 }

 AlphabeticIndex::Record::~Record() {
 }


 AlphabeticIndex & AlphabeticIndex::addRecord(const UnicodeString &name, const void *data, UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     Record *r = new Record(this, name, data);
     inputRecords_->addElement(r, status);
     indexBuildRequired_ = TRUE;
     //std::string ss;
     //std::string ss2;
     //std::cout << "added record: name = \"" << r->name_.toUTF8String(ss) << "\"" <<
     //             "   sortingName = \"" << r->sortingName_.toUTF8String(ss2) << "\"" << std::endl;
     return *this;
 }


 AlphabeticIndex &AlphabeticIndex::clearRecords(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     inputRecords_->removeAllElements();
     indexBuildRequired_ = TRUE;
     return *this;
 }


 int32_t AlphabeticIndex::getBucketIndex(const UnicodeString &name, UErrorCode &status) {
     buildIndex(status);
     if (U_FAILURE(status)) {
         return 0;
     }

     // For simplified Chinese prepend a prefix to the name.
     //   For non-Chinese locales or non-Chinese names, the name is not modified.

     UnicodeString prefixedName;
     hackName(prefixedName, name, collatorPrimaryOnly_);

     // TODO:  use a binary search.
     for (int32_t i = 0; i < bucketList_->size(); ++i) {
         Bucket *bucket = static_cast<Bucket *>(bucketList_->elementAt(i));
         Collator::EComparisonResult comp = collatorPrimaryOnly_->compare(prefixedName, bucket->lowerBoundary_);
         if (comp < 0) {
             return i - 1;
         }
     }
     // Loop runs until we find the bucket following the one that would hold prefixedName.
     // If the prefixedName belongs in the last bucket the loop will drop out the bottom rather
     //  than returning from the middle.

     return bucketList_->size() - 1;
 }


 int32_t AlphabeticIndex::getBucketIndex() const {
     return labelsIterIndex_;
 }


 UBool AlphabeticIndex::nextBucket(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return FALSE;
     }
     if (indexBuildRequired_ && currentBucket_ != NULL) {
         status = U_ENUM_OUT_OF_SYNC_ERROR;
         return FALSE;
     }
     buildIndex(status);
     if (U_FAILURE(status)) {
         return FALSE;
     }
     ++labelsIterIndex_;
     if (labelsIterIndex_ >= bucketList_->size()) {
         labelsIterIndex_ = bucketList_->size();
         return FALSE;
     }
     currentBucket_ = static_cast<Bucket *>(bucketList_->elementAt(labelsIterIndex_));
     resetRecordIterator();
     return TRUE;
 }

 const UnicodeString &AlphabeticIndex::getBucketLabel() const {
     if (currentBucket_ != NULL) {
         return currentBucket_->label_;
     } else {
         return *EMPTY_STRING;
     }
 }


 UAlphabeticIndexLabelType AlphabeticIndex::getBucketLabelType() const {
     if (currentBucket_ != NULL) {
         return currentBucket_->labelType_;
     } else {
         return U_ALPHAINDEX_NORMAL;
     }
 }


 int32_t AlphabeticIndex::getBucketRecordCount() const {
     if (currentBucket_ != NULL) {
         return currentBucket_->records_->size();
     } else {
         return 0;
     }
 }


 AlphabeticIndex &AlphabeticIndex::resetBucketIterator(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return *this;
     }
     buildIndex(status);
     labelsIterIndex_ = -1;
     currentBucket_ = NULL;
     return *this;
 }


 UBool AlphabeticIndex::nextRecord(UErrorCode &status) {
     if (U_FAILURE(status)) {
         return FALSE;
     }
     if (currentBucket_ == NULL) {
         // We are trying to iterate over the items in a bucket, but there is no
         // current bucket from the enumeration of buckets.
         status = U_INVALID_STATE_ERROR;
         return FALSE;
     }
     if (indexBuildRequired_) {
         status = U_ENUM_OUT_OF_SYNC_ERROR;
         return FALSE;
     }
     ++itemsIterIndex_;
     if (itemsIterIndex_ >= currentBucket_->records_->size()) {
         itemsIterIndex_  = currentBucket_->records_->size();
         return FALSE;
     }
     return TRUE;
 }


 const UnicodeString &AlphabeticIndex::getRecordName() const {
     const UnicodeString *retStr = EMPTY_STRING;
     if (currentBucket_ != NULL &&
         itemsIterIndex_ >= 0 &&
         itemsIterIndex_ < currentBucket_->records_->size()) {
             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
             retStr = &item->name_;
     }
     return *retStr;
 }

 const void *AlphabeticIndex::getRecordData() const {
     const void *retPtr = NULL;
     if (currentBucket_ != NULL &&
         itemsIterIndex_ >= 0 &&
         itemsIterIndex_ < currentBucket_->records_->size()) {
             Record *item = static_cast<Record *>(currentBucket_->records_->elementAt(itemsIterIndex_));
             retPtr = item->data_;
     }
     return retPtr;
 }


 AlphabeticIndex & AlphabeticIndex::resetRecordIterator() {
     itemsIterIndex_ = -1;
     return *this;
 }


 AlphabeticIndex::Bucket::Bucket(const UnicodeString &label,
                                 const UnicodeString &lowerBoundary,
                                 UAlphabeticIndexLabelType type,
                                 UErrorCode &status):
          label_(label), lowerBoundary_(lowerBoundary), labelType_(type), records_(NULL) {
     if (U_FAILURE(status)) {
         return;
     }
     records_ = new UVector(status);
     if (records_ == NULL && U_SUCCESS(status)) {
         status = U_MEMORY_ALLOCATION_ERROR;
     }
 }


 AlphabeticIndex::Bucket::~Bucket() {
     delete records_;
 }

 U_NAMESPACE_END