Implementing traditional grouping of Hiragana characters in contacts lists

Bug: 2407129
Change-Id: I8d83f9012846f61484b76f718e8127090eba39ec
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp
index f82c9d2..4d3da61 100644
--- a/android/PhonebookIndex.cpp
+++ b/android/PhonebookIndex.cpp
@@ -25,7 +25,7 @@
 #include "PhonebookIndex.h"
 #include "PhoneticStringUtils.h"
 
-#define SMALL_BUFFER_SIZE 10
+#define MIN_OUTPUT_SIZE 6       // Minimum required size for the output buffer (in bytes)
 
 namespace android {
 
@@ -118,45 +118,79 @@
     || (0xf900 <= c && c <= 0xfaff);    // CJK_COMPATIBILITY_IDEOGRAPHS
 }
 
-UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) {
-    UChar dest[SMALL_BUFFER_SIZE];
+int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
+        UBool * isError)
+{
+  if (size < MIN_OUTPUT_SIZE) {
+    *isError = TRUE;
+    return 0;
+  }
 
-    // Normalize the first character to remove accents using the NFD normalization
-    UErrorCode errorCode = U_ZERO_ERROR;
-    int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD,
-            0 /* options */, TRUE /* normalize */, NULL, &errorCode);
-    if (U_FAILURE(errorCode) || len == 0) {
+  *isError = FALSE;
+
+  // Normalize the first character to remove accents using the NFD normalization
+  UErrorCode errorCode = U_ZERO_ERROR;
+  int32_t len = unorm_next(iter, out, size, UNORM_NFD,
+          0 /* options */, TRUE /* normalize */, NULL, &errorCode);
+  if (U_FAILURE(errorCode)) {
+    *isError = TRUE;
+    return 0;
+  }
+
+  if (len == 0) {   // Empty input string
+    return 0;
+  }
+
+  UChar c = out[0];
+
+  // We are only interested in letters
+  if (!u_isalpha(c)) {
+    return 0;
+  }
+
+  c = u_toupper(c);
+
+  // Check for explicitly mapped characters
+  UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
+  if (c_mapped != 0) {
+    out[0] = c_mapped;
+    return 1;
+  }
+
+  // Convert Kanas to Hiragana
+  UChar next = len > 2 ? out[1] : 0;
+  c = android::GetNormalizedCodePoint(c, next, NULL);
+
+  // Traditional grouping of Hiragana characters
+  if (0x3042 <= c && c <= 0x309F) {
+    if (c < 0x304B) c = 0x3042;         // a
+    else if (c < 0x3055) c = 0x304B;    // ka
+    else if (c < 0x305F) c = 0x3055;    // sa
+    else if (c < 0x306A) c = 0x305F;    // ta
+    else if (c < 0x306F) c = 0x306A;    // na
+    else if (c < 0x307E) c = 0x306F;    // ha
+    else if (c < 0x3084) c = 0x307E;    // ma
+    else if (c < 0x3089) c = 0x3084;    // ya
+    else if (c < 0x308F) c = 0x3089;    // ra
+    else c = 0x308F;                    // wa
+    out[0] = c;
+    return 1;
+  }
+
+  if (is_CJK(c)) {
+    if (strncmp(locale, "ja", 2) == 0) {
+      // Japanese word meaning "misc" or "other"
+      out[0] = 0x305D;
+      out[1] = 0x306E;
+      out[2] = 0x4ED6;
+      return 3;
+    } else {
       return 0;
     }
+  }
 
-    UChar c = dest[0];
-
-    // We are only interested in letters
-    if (!u_isalpha(c)) {
-      return 0;
-    }
-
-    c = u_toupper(c);
-
-    // Check for explicitly mapped characters
-    UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
-    if (c_mapped != 0) {
-      return c_mapped;
-    }
-
-    // Convert Kanas to Hiragana
-    UChar next = len > 2 ? dest[1] : 0;
-    c = android::GetNormalizedCodePoint(c, next, NULL);
-
-    if (is_CJK(c)) {
-      if (strncmp(locale, "ja", 2) == 0) {
-        return 0x8A18;  // Kanji character used as a heading in letters, notices and other documents
-      } else {
-        return 0;
-      }
-    }
-
-    return c;
+  out[0] = c;
+  return 1;
 }
 
 }  // namespace android
diff --git a/android/PhonebookIndex.h b/android/PhonebookIndex.h
index f2bb289..5bf14f3 100644
--- a/android/PhonebookIndex.h
+++ b/android/PhonebookIndex.h
@@ -25,11 +25,21 @@
 
 /**
  * A character converter that takes a UNICODE character and produces the
- * phonebook index for it in the specified locale. For example, "a" becomes "A"
+ * phone book index for it in the specified locale. For example, "a" becomes "A"
  * and so does A with accents. Conversion rules differ from locale
  * locale, which is why this function takes locale as an argument.
+ *
+ * @param iter iterator if input characters
+ * @param locale the string representation of the current locale, e.g. "ja"
+ * @param out output buffer
+ * @param size size of the output buffer in bytes. The buffer should be large enough
+ *        to hold the longest phone book index (e.g. a three-char word in Japan).
+ * @param isError will be set to TRUE if an error occurs
+ *
+ * @return number of characters returned
  */
-UChar GetPhonebookIndex(UCharIterator * iter, const char * locale);
+int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
+        UBool * isError);
 
 }  // namespace android
 
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 6b253c1..cdcc97b 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -95,17 +95,25 @@
     UCharIterator iter;
     uiter_setUTF8(&iter, src, -1);
 
-    UChar index = android::GetPhonebookIndex(&iter, locale);
-    if (index == 0) {
+    UBool isError = FALSE;
+    UChar index[SMALL_BUFFER_SIZE];
+    uint32_t len = android::GetPhonebookIndex(&iter, locale, index, sizeof(index), &isError);
+    if (isError) {
       sqlite3_result_null(context);
       return;
     }
 
     uint32_t outlen = 0;
     uint8_t out[SMALL_BUFFER_SIZE];
-    UBool isError = FALSE;
-    U8_APPEND(out, outlen, SMALL_BUFFER_SIZE * sizeof(uint8_t), index, isError);
-    if (isError || outlen == 0) {
+    for (uint32_t i = 0; i < len; i++) {
+      U8_APPEND(out, outlen, sizeof(out), index[i], isError);
+      if (isError) {
+        sqlite3_result_null(context);
+        return;
+      }
+    }
+
+    if (outlen == 0) {
       sqlite3_result_null(context);
       return;
     }