Implementing traditional grouping of Hiragana characters in contacts lists
Bug: 2407129
Change-Id: I8d83f9012846f61484b76f718e8127090eba39ec
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp
index f82c9d2..4d3da61 100644
--- a/android/PhonebookIndex.cpp
+++ b/android/PhonebookIndex.cpp
@@ -25,7 +25,7 @@
#include "PhonebookIndex.h"
#include "PhoneticStringUtils.h"
-#define SMALL_BUFFER_SIZE 10
+#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
namespace android {
@@ -118,45 +118,79 @@
|| (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
}
-UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) {
- UChar dest[SMALL_BUFFER_SIZE];
+int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
+ UBool * isError)
+{
+ if (size < MIN_OUTPUT_SIZE) {
+ *isError = TRUE;
+ return 0;
+ }
- // Normalize the first character to remove accents using the NFD normalization
- UErrorCode errorCode = U_ZERO_ERROR;
- int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD,
- 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
- if (U_FAILURE(errorCode) || len == 0) {
+ *isError = FALSE;
+
+ // Normalize the first character to remove accents using the NFD normalization
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t len = unorm_next(iter, out, size, UNORM_NFD,
+ 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ *isError = TRUE;
+ return 0;
+ }
+
+ if (len == 0) { // Empty input string
+ return 0;
+ }
+
+ UChar c = out[0];
+
+ // We are only interested in letters
+ if (!u_isalpha(c)) {
+ return 0;
+ }
+
+ c = u_toupper(c);
+
+ // Check for explicitly mapped characters
+ UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
+ if (c_mapped != 0) {
+ out[0] = c_mapped;
+ return 1;
+ }
+
+ // Convert Kanas to Hiragana
+ UChar next = len > 2 ? out[1] : 0;
+ c = android::GetNormalizedCodePoint(c, next, NULL);
+
+ // Traditional grouping of Hiragana characters
+ if (0x3042 <= c && c <= 0x309F) {
+ if (c < 0x304B) c = 0x3042; // a
+ else if (c < 0x3055) c = 0x304B; // ka
+ else if (c < 0x305F) c = 0x3055; // sa
+ else if (c < 0x306A) c = 0x305F; // ta
+ else if (c < 0x306F) c = 0x306A; // na
+ else if (c < 0x307E) c = 0x306F; // ha
+ else if (c < 0x3084) c = 0x307E; // ma
+ else if (c < 0x3089) c = 0x3084; // ya
+ else if (c < 0x308F) c = 0x3089; // ra
+ else c = 0x308F; // wa
+ out[0] = c;
+ return 1;
+ }
+
+ if (is_CJK(c)) {
+ if (strncmp(locale, "ja", 2) == 0) {
+ // Japanese word meaning "misc" or "other"
+ out[0] = 0x305D;
+ out[1] = 0x306E;
+ out[2] = 0x4ED6;
+ return 3;
+ } else {
return 0;
}
+ }
- UChar c = dest[0];
-
- // We are only interested in letters
- if (!u_isalpha(c)) {
- return 0;
- }
-
- c = u_toupper(c);
-
- // Check for explicitly mapped characters
- UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
- if (c_mapped != 0) {
- return c_mapped;
- }
-
- // Convert Kanas to Hiragana
- UChar next = len > 2 ? dest[1] : 0;
- c = android::GetNormalizedCodePoint(c, next, NULL);
-
- if (is_CJK(c)) {
- if (strncmp(locale, "ja", 2) == 0) {
- return 0x8A18; // Kanji character used as a heading in letters, notices and other documents
- } else {
- return 0;
- }
- }
-
- return c;
+ out[0] = c;
+ return 1;
}
} // namespace android
diff --git a/android/PhonebookIndex.h b/android/PhonebookIndex.h
index f2bb289..5bf14f3 100644
--- a/android/PhonebookIndex.h
+++ b/android/PhonebookIndex.h
@@ -25,11 +25,21 @@
/**
* A character converter that takes a UNICODE character and produces the
- * phonebook index for it in the specified locale. For example, "a" becomes "A"
+ * phone book index for it in the specified locale. For example, "a" becomes "A"
* and so does A with accents. Conversion rules differ from locale
* locale, which is why this function takes locale as an argument.
+ *
+ * @param iter iterator if input characters
+ * @param locale the string representation of the current locale, e.g. "ja"
+ * @param out output buffer
+ * @param size size of the output buffer in bytes. The buffer should be large enough
+ * to hold the longest phone book index (e.g. a three-char word in Japan).
+ * @param isError will be set to TRUE if an error occurs
+ *
+ * @return number of characters returned
*/
-UChar GetPhonebookIndex(UCharIterator * iter, const char * locale);
+int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
+ UBool * isError);
} // namespace android
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 6b253c1..cdcc97b 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -95,17 +95,25 @@
UCharIterator iter;
uiter_setUTF8(&iter, src, -1);
- UChar index = android::GetPhonebookIndex(&iter, locale);
- if (index == 0) {
+ UBool isError = FALSE;
+ UChar index[SMALL_BUFFER_SIZE];
+ uint32_t len = android::GetPhonebookIndex(&iter, locale, index, sizeof(index), &isError);
+ if (isError) {
sqlite3_result_null(context);
return;
}
uint32_t outlen = 0;
uint8_t out[SMALL_BUFFER_SIZE];
- UBool isError = FALSE;
- U8_APPEND(out, outlen, SMALL_BUFFER_SIZE * sizeof(uint8_t), index, isError);
- if (isError || outlen == 0) {
+ for (uint32_t i = 0; i < len; i++) {
+ U8_APPEND(out, outlen, sizeof(out), index[i], isError);
+ if (isError) {
+ sqlite3_result_null(context);
+ return;
+ }
+ }
+
+ if (outlen == 0) {
sqlite3_result_null(context);
return;
}