Fixes mismatch between Java and C++ indices.
am: 47c9e5861f

Change-Id: I418e76ea0041df6a65c6e635737fdbb0743c0915
diff --git a/tests/textclassifier_jni_test.cc b/tests/textclassifier_jni_test.cc
new file mode 100644
index 0000000..c441cf5
--- /dev/null
+++ b/tests/textclassifier_jni_test.cc
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "textclassifier_jni.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier {
+namespace {
+
+TEST(TextClassifier, ConvertIndicesBMPUTF8) {
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}),
+            ConvertIndicesUTF8ToBMP("hello", {0, 5}));
+
+  // Simple example where the longer character is before the selection.
+  //  character ๐Ÿ˜ is 0x1f601
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("๐Ÿ˜ Hello World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("๐Ÿ˜ Hello World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before and in selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("๐Ÿ˜ Hell๐Ÿ˜ World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("๐Ÿ˜ Hell๐Ÿ˜ World.", {2, 7}),
+            std::make_pair(3, 9));
+
+  // Longer character is before and after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("๐Ÿ˜ Hello๐Ÿ˜World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("๐Ÿ˜ Hello๐Ÿ˜World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before in after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("๐Ÿ˜ Hell๐Ÿ˜๐Ÿ˜World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("๐Ÿ˜ Hell๐Ÿ˜๐Ÿ˜World.", {2, 7}),
+            std::make_pair(3, 9));
+}
+
+}  // namespace
+}  // namespace libtextclassifier
diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc
index 4cb4af5..84a1e32 100644
--- a/textclassifier_jni.cc
+++ b/textclassifier_jni.cc
@@ -16,67 +16,14 @@
 
 // Simple JNI wrapper for the SmartSelection library.
 
+#include "textclassifier_jni.h"
+
 #include <jni.h>
 #include <vector>
 
 #include "lang_id/lang-id.h"
 #include "smartselect/text-classification-model.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// SmartSelection.
-JNIEXPORT jlong JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeNew(JNIEnv* env,
-                                                          jobject thiz,
-                                                          jint fd);
-
-JNIEXPORT jintArray JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeSuggest(
-    JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
-    jint selection_end);
-
-JNIEXPORT jobjectArray JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeClassifyText(
-    JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
-    jint selection_end, jint input_flags);
-
-JNIEXPORT void JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeClose(JNIEnv* env,
-                                                            jobject thiz,
-                                                            jlong ptr);
-
-JNIEXPORT jstring JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeGetLanguage(JNIEnv* env,
-                                                                  jobject clazz,
-                                                                  jint fd);
-
-JNIEXPORT jint JNICALL
-Java_android_view_textclassifier_SmartSelection_nativeGetVersion(JNIEnv* env,
-                                                                 jobject clazz,
-                                                                 jint fd);
-
-// LangId.
-JNIEXPORT jlong JNICALL Java_android_view_textclassifier_LangId_nativeNew(
-    JNIEnv* env, jobject thiz, jint fd);
-
-JNIEXPORT jobjectArray JNICALL
-Java_android_view_textclassifier_LangId_nativeFindLanguages(JNIEnv* env,
-                                                            jobject thiz,
-                                                            jlong ptr,
-                                                            jstring text);
-
-JNIEXPORT void JNICALL Java_android_view_textclassifier_LangId_nativeClose(
-    JNIEnv* env, jobject thiz, jlong ptr);
-
-JNIEXPORT int JNICALL Java_android_view_textclassifier_LangId_nativeGetVersion(
-    JNIEnv* env, jobject clazz, jint fd);
-
-#ifdef __cplusplus
-}
-#endif
-
 using libtextclassifier::TextClassificationModel;
 using libtextclassifier::ModelOptions;
 using libtextclassifier::nlp_core::lang_id::LangId;
@@ -143,6 +90,68 @@
 
 }  // namespace
 
+namespace libtextclassifier {
+
+using libtextclassifier::CodepointSpan;
+
+namespace {
+
+CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
+                                    CodepointSpan orig_indices,
+                                    bool from_utf8) {
+  const libtextclassifier::UnicodeText unicode_str =
+      libtextclassifier::UTF8ToUnicodeText(utf8_str, /*do_copy=*/false);
+
+  int unicode_index = 0;
+  int bmp_index = 0;
+
+  const int* source_index;
+  const int* target_index;
+  if (from_utf8) {
+    source_index = &unicode_index;
+    target_index = &bmp_index;
+  } else {
+    source_index = &bmp_index;
+    target_index = &unicode_index;
+  }
+
+  CodepointSpan result{-1, -1};
+  for (auto it = unicode_str.begin(); it != unicode_str.end();
+       ++it, ++unicode_index, ++bmp_index) {
+    if (orig_indices.first == *source_index) {
+      result.first = *target_index;
+    }
+
+    if (orig_indices.second == *source_index) {
+      result.second = *target_index;
+    }
+
+    // There is 1 extra character in the input for each UTF8 character > 0xFFFF.
+    if (*it > 0xFFFF) {
+      ++bmp_index;
+    }
+  }
+  return result;
+}
+
+}  // namespace
+
+CodepointSpan ConvertIndicesBMPToUTF8(const std::string& utf8_str,
+                                      CodepointSpan orig_indices) {
+  return ConvertIndicesBMPUTF8(utf8_str, orig_indices, /*from_utf8=*/false);
+}
+
+CodepointSpan ConvertIndicesUTF8ToBMP(const std::string& utf8_str,
+                                      CodepointSpan orig_indices) {
+  return ConvertIndicesBMPUTF8(utf8_str, orig_indices, /*from_utf8=*/true);
+}
+
+}  // namespace libtextclassifier
+
+using libtextclassifier::ConvertIndicesUTF8ToBMP;
+using libtextclassifier::ConvertIndicesBMPToUTF8;
+using libtextclassifier::CodepointSpan;
+
 JNIEXPORT jlong JNICALL
 Java_android_view_textclassifier_SmartSelection_nativeNew(JNIEnv* env,
                                                           jobject thiz,
@@ -158,8 +167,12 @@
   TextClassificationModel* model =
       reinterpret_cast<TextClassificationModel*>(ptr);
 
-  const libtextclassifier::CodepointSpan selection = model->SuggestSelection(
-      ToStlString(env, context), {selection_begin, selection_end});
+  const std::string context_utf8 = ToStlString(env, context);
+  CodepointSpan input_indices =
+      ConvertIndicesBMPToUTF8(context_utf8, {selection_begin, selection_end});
+  CodepointSpan selection =
+      model->SuggestSelection(context_utf8, input_indices);
+  selection = ConvertIndicesUTF8ToBMP(context_utf8, selection);
 
   jintArray result = env->NewIntArray(2);
   env->SetIntArrayRegion(result, 0, 1, &(std::get<0>(selection)));
diff --git a/textclassifier_jni.h b/textclassifier_jni.h
new file mode 100644
index 0000000..28bb444
--- /dev/null
+++ b/textclassifier_jni.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_
+#define LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_
+
+#include <jni.h>
+#include <string>
+
+#include "smartselect/types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// SmartSelection.
+JNIEXPORT jlong JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeNew(JNIEnv* env,
+                                                          jobject thiz,
+                                                          jint fd);
+
+JNIEXPORT jintArray JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeSuggest(
+    JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+    jint selection_end);
+
+JNIEXPORT jobjectArray JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeClassifyText(
+    JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+    jint selection_end, jint input_flags);
+
+JNIEXPORT void JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeClose(JNIEnv* env,
+                                                            jobject thiz,
+                                                            jlong ptr);
+
+JNIEXPORT jstring JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeGetLanguage(JNIEnv* env,
+                                                                  jobject clazz,
+                                                                  jint fd);
+
+JNIEXPORT jint JNICALL
+Java_android_view_textclassifier_SmartSelection_nativeGetVersion(JNIEnv* env,
+                                                                 jobject clazz,
+                                                                 jint fd);
+
+// LangId.
+JNIEXPORT jlong JNICALL Java_android_view_textclassifier_LangId_nativeNew(
+    JNIEnv* env, jobject thiz, jint fd);
+
+JNIEXPORT jobjectArray JNICALL
+Java_android_view_textclassifier_LangId_nativeFindLanguages(JNIEnv* env,
+                                                            jobject thiz,
+                                                            jlong ptr,
+                                                            jstring text);
+
+JNIEXPORT void JNICALL Java_android_view_textclassifier_LangId_nativeClose(
+    JNIEnv* env, jobject thiz, jlong ptr);
+
+JNIEXPORT int JNICALL Java_android_view_textclassifier_LangId_nativeGetVersion(
+    JNIEnv* env, jobject clazz, jint fd);
+
+#ifdef __cplusplus
+}
+#endif
+
+namespace libtextclassifier {
+
+// Given a utf8 string and a span expressed in Java BMP (basic multilingual
+// plane) codepoints, converts it to a span expressed in utf8 codepoints.
+libtextclassifier::CodepointSpan ConvertIndicesBMPToUTF8(
+    const std::string& utf8_str, libtextclassifier::CodepointSpan bmp_indices);
+
+// Given a utf8 string and a span expressed in utf8 codepoints, converts it to a
+// span expressed in Java BMP (basic multilingual plane) codepoints.
+libtextclassifier::CodepointSpan ConvertIndicesUTF8ToBMP(
+    const std::string& utf8_str, libtextclassifier::CodepointSpan utf8_indices);
+
+}  // namespace libtextclassifier
+
+#endif  // LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_