AI 143381: am: CL 143231 am: CL 143056 Fix PhoneticStringUtils so that it ignores whitespaces correctly.
  Original author: dmiyakawa
  Merged from: //branches/cupcake/...
  Original author: android-build
  Merged from: //branches/donutburger/...

Automated import of CL 143381
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
index 7c8d185..5f8781c 100644
--- a/android/PhoneticStringUtils.cpp
+++ b/android/PhoneticStringUtils.cpp
@@ -89,11 +89,10 @@
     }
 
     if (codepoint <= 0x0020 || codepoint == 0x3000) {
-        // Whitespace.
-        // Skip without increment of the variable "new_len".
+        // Whitespace should be ignored.
         // Note: Formally, more "whitespace" exist. This block only
         // handles part of them
-        return 0x0020;
+        return -1;
     } else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
                (0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
         // Ascii and fullwidth ascii
@@ -369,7 +368,7 @@
         for (codepoint_index = 0, i = 0, next = 0;
              static_cast<size_t>(i) < src_len &&
                      codepoint_index < MAX_CODEPOINTS;
-             i = next, codepoint_index++) {
+             i = next) {
             int codepoint = GetCodePointFromUtf8(src, src_len, i, &next);
             if (codepoint <= 0) {
                 return false;
@@ -384,12 +383,16 @@
                     GetPhoneticallySortableCodePoint(codepoint,
                                                      next_codepoint,
                                                      &next_is_consumed);
-
             // dakuten (voiced mark) or han-dakuten (half-voiced mark) existed.
             if (next_is_consumed) {
                 next = tmp_next;
             }
 
+            if (codepoints[codepoint_index] < 0) {
+              // Do not increment codepoint_index.
+              continue;
+            }
+
             if (codepoints[codepoint_index] < 128) {  // 1 << 7
                 new_len++;
             } else if (codepoints[codepoint_index] < 2048) {
@@ -407,9 +410,19 @@
             } else {
                 new_len += 6;
             }
+
+            codepoint_index++;
         }
     }
 
+    if (codepoint_index == 0) {
+        // If all of codepoints are invalid, we place the string at the end of
+        // the list.
+        codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
+        codepoint_index = 1;
+        new_len = 4;
+    }
+
     new_len += 1;  // For '\0'.
 
     *dst = static_cast<char *>(malloc(sizeof(char) * new_len));
diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h
index 5649783..7ebf9e0 100644
--- a/android/PhoneticStringUtils.h
+++ b/android/PhoneticStringUtils.h
@@ -29,7 +29,8 @@
 // depends on each Locale. Note that currently this function considers only
 // Japanese. The variable "next_is_consumed" is set to true if "next_codepoint"
 // is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
-// when previous "codepoint" is appropriate)
+// when previous "codepoint" is appropriate). If the codepoint should not be
+// considered when sorting (e.g. whitespaces), -1 is returned.
 int GetPhoneticallySortableCodePoint(int codepoint,
                                      int next_codepoint,
                                      bool *next_is_consumed);
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp
index ae9df2a..0541007 100644
--- a/android/PhoneticStringUtilsTest.cpp
+++ b/android/PhoneticStringUtilsTest.cpp
@@ -32,6 +32,7 @@
   void testGetCodePointFromUtf8();
   void testGetPhoneticallySortableCodePointAscii();
   void testGetPhoneticallySortableCodePointKana();
+  void testGetPhoneticallySortableCodePointWhitespaceOnly();
   void testGetPhoneticallySortableCodePointSimpleCompare();
   void testGetUtf8FromCodePoint();
   void testGetPhoneticallySortableString();
@@ -66,6 +67,7 @@
   DoOneTest(&TestExecutor::testGetCodePointFromUtf8);
   DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
   DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
+  DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
   DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
   DoOneTest(&TestExecutor::testGetUtf8FromCodePoint);
   DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
@@ -121,6 +123,8 @@
                                                     &next_is_consumed);
     if (halfwidth[i] < 0) {
       printf("returned value become negative at 0x%04X", codepoint);
+      m_success = false;
+      return;
     }
     if (next_is_consumed) {
       printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -133,6 +137,8 @@
                                                     &next_is_consumed);
     if (fullwidth[i] < 0) {
       printf("returned value become negative at 0x%04X", codepoint);
+      m_success = false;
+      return;
     }
     if (next_is_consumed) {
       printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -158,6 +164,8 @@
                                                    &next_is_consumed);
     if (hiragana[i] < 0) {
       printf("returned value become negative at 0x%04X", codepoint);
+      m_success = false;
+      return;
     }
     if (next_is_consumed) {
       printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -171,6 +179,8 @@
                                                    &next_is_consumed);
     if (fullwidth_katakana[i] < 0) {
       printf("returned value become negative at 0x%04X", codepoint);
+      m_success = false;
+      return;
     }
     if (next_is_consumed) {
       printf("next_is_consumed become true at 0x%04X", codepoint);
@@ -221,6 +231,19 @@
   }
 }
 
+void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
+  printf("testGetPhoneticallySortableCodePointWhitespaceOnly");
+  // Halfwidth space
+  int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
+  ASSERT_EQ_VALUE(result, -1);
+  // Fullwidth space
+  result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
+  ASSERT_EQ_VALUE(result, -1);
+  // tab
+  result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
+  ASSERT_EQ_VALUE(result, -1);
+}
+
 void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
   printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
 
@@ -345,6 +368,9 @@
   EXPECT_EQ_UTF8_UTF8(
       "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
       "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
+
+  // whitespace -> string which should be placed at last
+  EXPECT_EQ_UTF8_UTF8("    \t", "\xF0\x9F\xBF\xBD");
 }
 
 int main() {
diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp
index 55dcd5a..27334ef 100644
--- a/android/sqlite3_android.cpp
+++ b/android/sqlite3_android.cpp
@@ -82,7 +82,9 @@
     size_t len;
 
     if (!android::GetPhoneticallySortableString(src, &ret, &len)) {
-        sqlite3_result_null(context);
+        // Put this text at the end of a list.
+        sqlite3_result_text(context, "\xF0\x9F\xBF\xBD", -1, SQLITE_STATIC);
+        // sqlite3_result_null(context);
     } else {
         sqlite3_result_text(context, ret, len, free);
     }