| /* |
| ******************************************************************************* |
| * Copyright (C) 1996-2014, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| * file name: ucol_res.cpp |
| * encoding: US-ASCII |
| * tab size: 8 (not used) |
| * indentation:4 |
| * |
| * Description: |
| * This file contains dependencies that the collation run-time doesn't normally |
| * need. This mainly contains resource bundle usage and collation meta information |
| * |
| * Modification history |
| * Date Name Comments |
| * 1996-1999 various members of ICU team maintained C API for collation framework |
| * 02/16/2001 synwee Added internal method getPrevSpecialCE |
| * 03/01/2001 synwee Added maxexpansion functionality. |
| * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant |
| * 12/08/2004 grhoten Split part of ucol.cpp into ucol_res.cpp |
| * 2012-2014 markus Rewritten in C++ again. |
| */ |
| |
| #include "unicode/utypes.h" |
| |
| #if !UCONFIG_NO_COLLATION |
| |
| #include "unicode/coll.h" |
| #include "unicode/localpointer.h" |
| #include "unicode/locid.h" |
| #include "unicode/tblcoll.h" |
| #include "unicode/ucol.h" |
| #include "unicode/uloc.h" |
| #include "unicode/unistr.h" |
| #include "unicode/ures.h" |
| #include "cmemory.h" |
| #include "cstring.h" |
| #include "collationdatareader.h" |
| #include "collationroot.h" |
| #include "collationtailoring.h" |
| #include "putilimp.h" |
| #include "uassert.h" |
| #include "ucln_in.h" |
| #include "ucol_imp.h" |
| #include "uenumimp.h" |
| #include "ulist.h" |
| #include "umutex.h" |
| #include "uresimp.h" |
| #include "ustrenum.h" |
| #include "utracimp.h" |
| |
| #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) |
| |
| U_NAMESPACE_BEGIN |
| |
| namespace { |
| |
| static const UChar *rootRules = NULL; |
| static int32_t rootRulesLength = 0; |
| static UResourceBundle *rootBundle = NULL; |
| static UInitOnce gInitOnce = U_INITONCE_INITIALIZER; |
| |
| } // namespace |
| |
| U_CDECL_BEGIN |
| |
| static UBool U_CALLCONV |
| ucol_res_cleanup() { |
| rootRules = NULL; |
| rootRulesLength = 0; |
| ures_close(rootBundle); |
| rootBundle = NULL; |
| gInitOnce.reset(); |
| return TRUE; |
| } |
| |
| U_CDECL_END |
| |
| void |
| CollationLoader::loadRootRules(UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { return; } |
| rootBundle = ures_open(U_ICUDATA_COLL, kRootLocaleName, &errorCode); |
| if(U_FAILURE(errorCode)) { return; } |
| rootRules = ures_getStringByKey(rootBundle, "UCARules", &rootRulesLength, &errorCode); |
| if(U_FAILURE(errorCode)) { |
| ures_close(rootBundle); |
| rootBundle = NULL; |
| return; |
| } |
| ucln_i18n_registerCleanup(UCLN_I18N_UCOL_RES, ucol_res_cleanup); |
| } |
| |
| void |
| CollationLoader::appendRootRules(UnicodeString &s) { |
| UErrorCode errorCode = U_ZERO_ERROR; |
| umtx_initOnce(gInitOnce, CollationLoader::loadRootRules, errorCode); |
| if(U_SUCCESS(errorCode)) { |
| s.append(rootRules, rootRulesLength); |
| } |
| } |
| |
| UnicodeString * |
| CollationLoader::loadRules(const char *localeID, const char *collationType, UErrorCode &errorCode) { |
| if(U_FAILURE(errorCode)) { return NULL; } |
| U_ASSERT(collationType != NULL && *collationType != 0); |
| // Copy the type for lowercasing. |
| char type[16]; |
| int32_t typeLength = uprv_strlen(collationType); |
| if(typeLength >= LENGTHOF(type)) { |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| uprv_memcpy(type, collationType, typeLength + 1); |
| T_CString_toLowerCase(type); |
| |
| LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, localeID, &errorCode)); |
| LocalUResourceBundlePointer collations( |
| ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); |
| LocalUResourceBundlePointer data( |
| ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); |
| int32_t length; |
| const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, &errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| |
| // No string pointer aliasing so that we need not hold onto the resource bundle. |
| UnicodeString *rules = new UnicodeString(s, length); |
| if(rules == NULL) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| return rules; |
| } |
| |
| const CollationTailoring * |
| CollationLoader::loadTailoring(const Locale &locale, Locale &validLocale, UErrorCode &errorCode) { |
| const CollationTailoring *root = CollationRoot::getRoot(errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| const char *name = locale.getName(); |
| if(*name == 0 || uprv_strcmp(name, "root") == 0) { |
| validLocale = Locale::getRoot(); |
| return root; |
| } |
| |
| LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_COLL, name, &errorCode)); |
| if(errorCode == U_MISSING_RESOURCE_ERROR) { |
| errorCode = U_USING_DEFAULT_WARNING; |
| validLocale = Locale::getRoot(); |
| return root; |
| } |
| const char *vLocale = ures_getLocaleByType(bundle.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| validLocale = Locale(vLocale); |
| |
| // There are zero or more tailorings in the collations table. |
| LocalUResourceBundlePointer collations( |
| ures_getByKey(bundle.getAlias(), "collations", NULL, &errorCode)); |
| if(errorCode == U_MISSING_RESOURCE_ERROR) { |
| errorCode = U_USING_DEFAULT_WARNING; |
| return root; |
| } |
| if(U_FAILURE(errorCode)) { return NULL; } |
| |
| // Fetch the collation type from the locale ID and the default type from the data. |
| char type[16]; |
| int32_t typeLength = locale.getKeywordValue("collation", type, LENGTHOF(type) - 1, errorCode); |
| if(U_FAILURE(errorCode)) { |
| errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| type[typeLength] = 0; // in case of U_NOT_TERMINATED_WARNING |
| char defaultType[16]; |
| { |
| UErrorCode internalErrorCode = U_ZERO_ERROR; |
| LocalUResourceBundlePointer def( |
| ures_getByKeyWithFallback(collations.getAlias(), "default", NULL, |
| &internalErrorCode)); |
| int32_t length; |
| const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); |
| if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { |
| u_UCharsToChars(s, defaultType, length + 1); |
| } else { |
| uprv_strcpy(defaultType, "standard"); |
| } |
| } |
| if(typeLength == 0 || uprv_strcmp(type, "default") == 0) { |
| uprv_strcpy(type, defaultType); |
| } else { |
| T_CString_toLowerCase(type); |
| } |
| |
| // Load the collations/type tailoring, with type fallback. |
| UBool typeFallback = FALSE; |
| LocalUResourceBundlePointer data( |
| ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); |
| if(errorCode == U_MISSING_RESOURCE_ERROR && |
| typeLength > 6 && uprv_strncmp(type, "search", 6) == 0) { |
| // fall back from something like "searchjl" to "search" |
| typeFallback = TRUE; |
| type[6] = 0; |
| errorCode = U_ZERO_ERROR; |
| data.adoptInstead( |
| ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); |
| } |
| if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, defaultType) != 0) { |
| // fall back to the default type |
| typeFallback = TRUE; |
| uprv_strcpy(type, defaultType); |
| errorCode = U_ZERO_ERROR; |
| data.adoptInstead( |
| ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); |
| } |
| if(errorCode == U_MISSING_RESOURCE_ERROR && uprv_strcmp(type, "standard") != 0) { |
| // fall back to the "standard" type |
| typeFallback = TRUE; |
| uprv_strcpy(type, "standard"); |
| errorCode = U_ZERO_ERROR; |
| data.adoptInstead( |
| ures_getByKeyWithFallback(collations.getAlias(), type, NULL, &errorCode)); |
| } |
| if(errorCode == U_MISSING_RESOURCE_ERROR) { |
| errorCode = U_USING_DEFAULT_WARNING; |
| return root; |
| } |
| if(U_FAILURE(errorCode)) { return NULL; } |
| |
| LocalPointer<CollationTailoring> t(new CollationTailoring(root->settings)); |
| if(t.isNull() || t->isBogus()) { |
| errorCode = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| |
| // Is this the same as the root collator? If so, then use that instead. |
| const char *actualLocale = ures_getLocaleByType(data.getAlias(), ULOC_ACTUAL_LOCALE, &errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| if((*actualLocale == 0 || uprv_strcmp(actualLocale, "root") == 0) && |
| uprv_strcmp(type, "standard") == 0) { |
| if(typeFallback) { |
| errorCode = U_USING_DEFAULT_WARNING; |
| } |
| return root; |
| } |
| t->actualLocale = Locale(actualLocale); |
| |
| // deserialize |
| LocalUResourceBundlePointer binary( |
| ures_getByKey(data.getAlias(), "%%CollationBin", NULL, &errorCode)); |
| // Note: U_MISSING_RESOURCE_ERROR --> The old code built from rules if available |
| // but that created undesirable dependencies. |
| int32_t length; |
| const uint8_t *inBytes = ures_getBinary(binary.getAlias(), &length, &errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| CollationDataReader::read(root, inBytes, length, *t, errorCode); |
| // Note: U_COLLATOR_VERSION_MISMATCH --> The old code built from rules if available |
| // but that created undesirable dependencies. |
| if(U_FAILURE(errorCode)) { return NULL; } |
| |
| // Try to fetch the optional rules string. |
| { |
| UErrorCode internalErrorCode = U_ZERO_ERROR; |
| int32_t length; |
| const UChar *s = ures_getStringByKey(data.getAlias(), "Sequence", &length, |
| &internalErrorCode); |
| if(U_SUCCESS(errorCode)) { |
| t->rules.setTo(TRUE, s, length); |
| } |
| } |
| |
| // Set the collation types on the informational locales, |
| // except when they match the default types (for brevity and backwards compatibility). |
| // For the valid locale, suppress the default type. |
| if(uprv_strcmp(type, defaultType) != 0) { |
| validLocale.setKeywordValue("collation", type, errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| } |
| |
| // For the actual locale, suppress the default type *according to the actual locale*. |
| // For example, zh has default=pinyin and contains all of the Chinese tailorings. |
| // zh_Hant has default=stroke but has no other data. |
| // For the valid locale "zh_Hant" we need to suppress stroke. |
| // For the actual locale "zh" we need to suppress pinyin instead. |
| if(uprv_strcmp(actualLocale, vLocale) != 0) { |
| // Opening a bundle for the actual locale should always succeed. |
| LocalUResourceBundlePointer actualBundle( |
| ures_open(U_ICUDATA_COLL, actualLocale, &errorCode)); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| UErrorCode internalErrorCode = U_ZERO_ERROR; |
| LocalUResourceBundlePointer def( |
| ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL, |
| &internalErrorCode)); |
| int32_t length; |
| const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); |
| if(U_SUCCESS(internalErrorCode) && length < LENGTHOF(defaultType)) { |
| u_UCharsToChars(s, defaultType, length + 1); |
| } else { |
| uprv_strcpy(defaultType, "standard"); |
| } |
| } |
| if(uprv_strcmp(type, defaultType) != 0) { |
| t->actualLocale.setKeywordValue("collation", type, errorCode); |
| if(U_FAILURE(errorCode)) { return NULL; } |
| } |
| |
| if(typeFallback) { |
| errorCode = U_USING_DEFAULT_WARNING; |
| } |
| t->bundle = bundle.orphan(); |
| return t.orphan(); |
| } |
| |
| U_NAMESPACE_END |
| |
| U_NAMESPACE_USE |
| |
| U_CAPI UCollator* |
| ucol_open(const char *loc, |
| UErrorCode *status) |
| { |
| U_NAMESPACE_USE |
| |
| UTRACE_ENTRY_OC(UTRACE_UCOL_OPEN); |
| UTRACE_DATA1(UTRACE_INFO, "locale = \"%s\"", loc); |
| UCollator *result = NULL; |
| |
| Collator *coll = Collator::createInstance(loc, *status); |
| if(U_SUCCESS(*status)) { |
| result = coll->toUCollator(); |
| } |
| UTRACE_EXIT_PTR_STATUS(result, *status); |
| return result; |
| } |
| |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_getDisplayName( const char *objLoc, |
| const char *dispLoc, |
| UChar *result, |
| int32_t resultLength, |
| UErrorCode *status) |
| { |
| U_NAMESPACE_USE |
| |
| if(U_FAILURE(*status)) return -1; |
| UnicodeString dst; |
| if(!(result==NULL && resultLength==0)) { |
| // NULL destination for pure preflighting: empty dummy string |
| // otherwise, alias the destination buffer |
| dst.setTo(result, 0, resultLength); |
| } |
| Collator::getDisplayName(Locale(objLoc), Locale(dispLoc), dst); |
| return dst.extract(result, resultLength, *status); |
| } |
| |
| U_CAPI const char* U_EXPORT2 |
| ucol_getAvailable(int32_t index) |
| { |
| int32_t count = 0; |
| const Locale *loc = Collator::getAvailableLocales(count); |
| if (loc != NULL && index < count) { |
| return loc[index].getName(); |
| } |
| return NULL; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_countAvailable() |
| { |
| int32_t count = 0; |
| Collator::getAvailableLocales(count); |
| return count; |
| } |
| |
| #if !UCONFIG_NO_SERVICE |
| U_CAPI UEnumeration* U_EXPORT2 |
| ucol_openAvailableLocales(UErrorCode *status) { |
| U_NAMESPACE_USE |
| |
| // This is a wrapper over Collator::getAvailableLocales() |
| if (U_FAILURE(*status)) { |
| return NULL; |
| } |
| StringEnumeration *s = icu::Collator::getAvailableLocales(); |
| if (s == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| return NULL; |
| } |
| return uenum_openFromStringEnumeration(s, status); |
| } |
| #endif |
| |
| // Note: KEYWORDS[0] != RESOURCE_NAME - alan |
| |
| static const char RESOURCE_NAME[] = "collations"; |
| |
| static const char* const KEYWORDS[] = { "collation" }; |
| |
| #define KEYWORD_COUNT LENGTHOF(KEYWORDS) |
| |
| U_CAPI UEnumeration* U_EXPORT2 |
| ucol_getKeywords(UErrorCode *status) { |
| UEnumeration *result = NULL; |
| if (U_SUCCESS(*status)) { |
| return uenum_openCharStringsEnumeration(KEYWORDS, KEYWORD_COUNT, status); |
| } |
| return result; |
| } |
| |
| U_CAPI UEnumeration* U_EXPORT2 |
| ucol_getKeywordValues(const char *keyword, UErrorCode *status) { |
| if (U_FAILURE(*status)) { |
| return NULL; |
| } |
| // hard-coded to accept exactly one collation keyword |
| // modify if additional collation keyword is added later |
| if (keyword==NULL || uprv_strcmp(keyword, KEYWORDS[0])!=0) |
| { |
| *status = U_ILLEGAL_ARGUMENT_ERROR; |
| return NULL; |
| } |
| return ures_getKeywordValues(U_ICUDATA_COLL, RESOURCE_NAME, status); |
| } |
| |
| static const UEnumeration defaultKeywordValues = { |
| NULL, |
| NULL, |
| ulist_close_keyword_values_iterator, |
| ulist_count_keyword_values, |
| uenum_unextDefault, |
| ulist_next_keyword_value, |
| ulist_reset_keyword_values_iterator |
| }; |
| |
| #include <stdio.h> |
| |
| U_CAPI UEnumeration* U_EXPORT2 |
| ucol_getKeywordValuesForLocale(const char* /*key*/, const char* locale, |
| UBool /*commonlyUsed*/, UErrorCode* status) { |
| /* Get the locale base name. */ |
| char localeBuffer[ULOC_FULLNAME_CAPACITY] = ""; |
| uloc_getBaseName(locale, localeBuffer, sizeof(localeBuffer), status); |
| |
| /* Create the 2 lists |
| * -values is the temp location for the keyword values |
| * -results hold the actual list used by the UEnumeration object |
| */ |
| UList *values = ulist_createEmptyList(status); |
| UList *results = ulist_createEmptyList(status); |
| UEnumeration *en = (UEnumeration *)uprv_malloc(sizeof(UEnumeration)); |
| if (U_FAILURE(*status) || en == NULL) { |
| if (en == NULL) { |
| *status = U_MEMORY_ALLOCATION_ERROR; |
| } else { |
| uprv_free(en); |
| } |
| ulist_deleteList(values); |
| ulist_deleteList(results); |
| return NULL; |
| } |
| |
| memcpy(en, &defaultKeywordValues, sizeof(UEnumeration)); |
| en->context = results; |
| |
| /* Open the resource bundle for collation with the given locale. */ |
| UResourceBundle bundle, collations, collres, defres; |
| ures_initStackObject(&bundle); |
| ures_initStackObject(&collations); |
| ures_initStackObject(&collres); |
| ures_initStackObject(&defres); |
| |
| ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); |
| |
| while (U_SUCCESS(*status)) { |
| ures_getByKey(&bundle, RESOURCE_NAME, &collations, status); |
| ures_resetIterator(&collations); |
| while (U_SUCCESS(*status) && ures_hasNext(&collations)) { |
| ures_getNextResource(&collations, &collres, status); |
| const char *key = ures_getKey(&collres); |
| /* If the key is default, get the string and store it in results list only |
| * if results list is empty. |
| */ |
| if (uprv_strcmp(key, "default") == 0) { |
| if (ulist_getListSize(results) == 0) { |
| char *defcoll = (char *)uprv_malloc(sizeof(char) * ULOC_KEYWORDS_CAPACITY); |
| int32_t defcollLength = ULOC_KEYWORDS_CAPACITY; |
| |
| ures_getNextResource(&collres, &defres, status); |
| #if U_CHARSET_FAMILY==U_ASCII_FAMILY |
| /* optimize - use the utf-8 string */ |
| ures_getUTF8String(&defres, defcoll, &defcollLength, TRUE, status); |
| #else |
| { |
| const UChar* defString = ures_getString(&defres, &defcollLength, status); |
| if(U_SUCCESS(*status)) { |
| if(defcollLength+1 > ULOC_KEYWORDS_CAPACITY) { |
| *status = U_BUFFER_OVERFLOW_ERROR; |
| } else { |
| u_UCharsToChars(defString, defcoll, defcollLength+1); |
| } |
| } |
| } |
| #endif |
| |
| ulist_addItemBeginList(results, defcoll, TRUE, status); |
| } |
| } else { |
| ulist_addItemEndList(values, key, FALSE, status); |
| } |
| } |
| |
| /* If the locale is "" this is root so exit. */ |
| if (uprv_strlen(localeBuffer) == 0) { |
| break; |
| } |
| /* Get the parent locale and open a new resource bundle. */ |
| uloc_getParent(localeBuffer, localeBuffer, sizeof(localeBuffer), status); |
| ures_openFillIn(&bundle, U_ICUDATA_COLL, localeBuffer, status); |
| } |
| |
| ures_close(&defres); |
| ures_close(&collres); |
| ures_close(&collations); |
| ures_close(&bundle); |
| |
| if (U_SUCCESS(*status)) { |
| char *value = NULL; |
| ulist_resetList(values); |
| while ((value = (char *)ulist_getNext(values)) != NULL) { |
| if (!ulist_containsString(results, value, (int32_t)uprv_strlen(value))) { |
| ulist_addItemEndList(results, value, FALSE, status); |
| if (U_FAILURE(*status)) { |
| break; |
| } |
| } |
| } |
| } |
| |
| ulist_deleteList(values); |
| |
| if (U_FAILURE(*status)){ |
| uenum_close(en); |
| en = NULL; |
| } else { |
| ulist_resetList(results); |
| } |
| |
| return en; |
| } |
| |
| U_CAPI int32_t U_EXPORT2 |
| ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, |
| const char* keyword, const char* locale, |
| UBool* isAvailable, UErrorCode* status) |
| { |
| // N.B.: Resource name is "collations" but keyword is "collation" |
| return ures_getFunctionalEquivalent(result, resultCapacity, U_ICUDATA_COLL, |
| "collations", keyword, locale, |
| isAvailable, TRUE, status); |
| } |
| |
| #endif /* #if !UCONFIG_NO_COLLATION */ |