[automerger skipped] DO NOT MERGE am: 3b7fdbe6c3 -s ours am: 67d4226be6 -s ours am: a8f81e51fc -s ours Change-Id: Ifae0c10bc079dbe5f353e9e0b37e677762414082

commit: f906a67732f746658e02ec39008053d3fe2bac7e [log] [tgz]
author: Xin Li <delphij@google.com> Mon Dec 10 12:50:41 2018 -0800
committer: android-build-merger <android-build-merger@google.com> Mon Dec 10 12:50:41 2018 -0800
tree: 03d9a22209e129e14999a1c0d93bbb2a056bc880
parent: 296b7b68be844bb90e55b83346ce24af8b729164 [diff]
parent: a8f81e51fc12fe38c245d8f43d77e1d2b686a664 [diff]
diff --git a/Android.bp b/Android.bp
index ca3521a..887ce14 100644
--- a/Android.bp
+++ b/Android.bp

@@ -21,7 +21,7 @@
 cc_defaults {
     name: "libtextclassifier_hash_defaults",
     srcs: [
-        "util/hash/farmhash.cc",
+        "utils/hash/farmhash.cc",
         "util/hash/hash.cc"
     ],
     cflags: [
@@ -44,3 +44,10 @@
     sdk_version: "current",
     stl: "libc++_static",
 }
+
+java_library_static {
+    name: "libtextclassifier-java",
+    sdk_version: "core_current",
+    no_framework_libs: true,
+    srcs: ["java/**/*.java"],
+}

diff --git a/Android.mk b/Android.mk
index 4e5bbf5..c89e0b9 100644
--- a/Android.mk
+++ b/Android.mk

@@ -33,12 +33,15 @@
     -Wno-undefined-var-template \
     -Wno-unused-function \
     -Wno-unused-parameter \
+    -Wno-extern-c-compat
 
 MY_LIBTEXTCLASSIFIER_CFLAGS := \
     $(MY_LIBTEXTCLASSIFIER_WARNING_CFLAGS) \
     -fvisibility=hidden \
     -DLIBTEXTCLASSIFIER_UNILIB_ICU \
-    -DZLIB_CONST
+    -DZLIB_CONST \
+    -DSAFTM_COMPACT_LOGGING \
+    -DTC3_WITH_ACTIONS_OPS
 
 # Only enable debug logging in userdebug/eng builds.
 ifneq (,$(filter userdebug eng, $(TARGET_BUILD_VARIANT)))
@@ -46,27 +49,16 @@
 endif
 
 # -----------------
-# flatbuffers
-# -----------------
-
-# Empty static library so that other projects can include just the basic
-# FlatBuffers headers as a module.
-include $(CLEAR_VARS)
-LOCAL_MODULE := flatbuffers
-LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
-LOCAL_EXPORT_CPPFLAGS := -std=c++11 -fexceptions -Wall \
-    -DFLATBUFFERS_TRACK_VERIFIER_BUFFER_SIZE
-
-include $(BUILD_STATIC_LIBRARY)
-
-# -----------------
 # libtextclassifier
 # -----------------
 
 include $(CLEAR_VARS)
 LOCAL_MODULE := libtextclassifier
-
+LOCAL_MODULE_CLASS := SHARED_LIBRARIES
 LOCAL_CPP_EXTENSION := .cc
+
+include $(LOCAL_PATH)/generate_flatbuffers.mk
+
 LOCAL_CFLAGS += $(MY_LIBTEXTCLASSIFIER_CFLAGS)
 LOCAL_STRIP_MODULE := $(LIBTEXTCLASSIFIER_STRIP_OPTS)
 
@@ -75,6 +67,8 @@
 LOCAL_C_INCLUDES := $(TOP)/external/zlib
 LOCAL_C_INCLUDES += $(TOP)/external/tensorflow
 LOCAL_C_INCLUDES += $(TOP)/external/flatbuffers/include
+LOCAL_C_INCLUDES += $(TOP)/external/libutf
+LOCAL_C_INCLUDES += $(intermediates)
 
 LOCAL_SHARED_LIBRARIES += liblog
 LOCAL_SHARED_LIBRARIES += libicuuc
@@ -82,15 +76,17 @@
 LOCAL_SHARED_LIBRARIES += libtflite
 LOCAL_SHARED_LIBRARIES += libz
 
-LOCAL_STATIC_LIBRARIES += flatbuffers
+LOCAL_STATIC_LIBRARIES += libutf
 
-LOCAL_REQUIRED_MODULES := textclassifier.en.model
-LOCAL_REQUIRED_MODULES += textclassifier.universal.model
+LOCAL_REQUIRED_MODULES := libtextclassifier_annotator_en_model
+LOCAL_REQUIRED_MODULES += libtextclassifier_annotator_universal_model
+LOCAL_REQUIRED_MODULES += libtextclassifier_actions_suggestions_model
+LOCAL_REQUIRED_MODULES += libtextclassifier_lang_id_model
 
 LOCAL_ADDITIONAL_DEPENDENCIES += $(LOCAL_PATH)/jni.lds
 LOCAL_LDFLAGS += -Wl,-version-script=$(LOCAL_PATH)/jni.lds
-LOCAL_CPPFLAGS_32 += -DLIBTEXTCLASSIFIER_TEST_DATA_DIR="\"/data/nativetest/libtextclassifier_tests/test_data/\""
-LOCAL_CPPFLAGS_64 += -DLIBTEXTCLASSIFIER_TEST_DATA_DIR="\"/data/nativetest64/libtextclassifier_tests/test_data/\""
+LOCAL_CPPFLAGS_32 += -DTC3_TEST_DATA_DIR="\"/data/nativetest/libtextclassifier_tests/test_data/\""
+LOCAL_CPPFLAGS_64 += -DTC3_TEST_DATA_DIR="\"/data/nativetest64/libtextclassifier_tests/test_data/\""
 
 include $(BUILD_SHARED_LIBRARY)
 
@@ -101,41 +97,48 @@
 include $(CLEAR_VARS)
 
 LOCAL_MODULE := libtextclassifier_tests
+LOCAL_MODULE_CLASS := NATIVE_TESTS
 LOCAL_COMPATIBILITY_SUITE := device-tests
 LOCAL_MODULE_TAGS := tests
-
 LOCAL_CPP_EXTENSION := .cc
+
+include $(LOCAL_PATH)/generate_flatbuffers.mk
+
 LOCAL_CFLAGS += $(MY_LIBTEXTCLASSIFIER_CFLAGS)
 LOCAL_STRIP_MODULE := $(LIBTEXTCLASSIFIER_STRIP_OPTS)
 
-LOCAL_TEST_DATA := $(call find-test-data-in-subdirs, $(LOCAL_PATH), *, test_data)
+LOCAL_TEST_DATA := $(call find-test-data-in-subdirs, $(LOCAL_PATH), *, annotator/test_data, actions/test_data)
 
-LOCAL_CPPFLAGS_32 += -DLIBTEXTCLASSIFIER_TEST_DATA_DIR="\"/data/nativetest/libtextclassifier_tests/test_data/\""
-LOCAL_CPPFLAGS_64 += -DLIBTEXTCLASSIFIER_TEST_DATA_DIR="\"/data/nativetest64/libtextclassifier_tests/test_data/\""
+LOCAL_CPPFLAGS_32 += -DTC3_TEST_DATA_DIR="\"/data/nativetest/libtextclassifier_tests/test_data/\""
+LOCAL_CPPFLAGS_64 += -DTC3_TEST_DATA_DIR="\"/data/nativetest64/libtextclassifier_tests/test_data/\""
 
-LOCAL_SRC_FILES := $(call all-subdir-cpp-files)
+# TODO: Do not filter out tflite test once the dependency issue is resolved.
+LOCAL_SRC_FILES := $(filter-out utils/tflite/%_test.cc,$(call all-subdir-cpp-files))
 
 LOCAL_C_INCLUDES := $(TOP)/external/zlib
 LOCAL_C_INCLUDES += $(TOP)/external/tensorflow
 LOCAL_C_INCLUDES += $(TOP)/external/flatbuffers/include
+LOCAL_C_INCLUDES += $(TOP)/external/libutf
+LOCAL_C_INCLUDES += $(intermediates)
 
-LOCAL_STATIC_LIBRARIES += libgmock
 LOCAL_SHARED_LIBRARIES += liblog
 LOCAL_SHARED_LIBRARIES += libicuuc
 LOCAL_SHARED_LIBRARIES += libicui18n
 LOCAL_SHARED_LIBRARIES += libtflite
 LOCAL_SHARED_LIBRARIES += libz
 
-LOCAL_STATIC_LIBRARIES += flatbuffers
+LOCAL_STATIC_LIBRARIES += libgmock
+LOCAL_STATIC_LIBRARIES += libutf
 
 include $(BUILD_NATIVE_TEST)
 
-# ----------------------
-# Smart Selection models
-# ----------------------
+# ----------------
+# Annotator models
+# ----------------
 
 include $(CLEAR_VARS)
-LOCAL_MODULE        := textclassifier.en.model
+LOCAL_MODULE        := libtextclassifier_annotator_en_model
+LOCAL_MODULE_STEM   := textclassifier.en.model
 LOCAL_MODULE_CLASS  := ETC
 LOCAL_MODULE_OWNER  := google
 LOCAL_SRC_FILES     := ./models/textclassifier.en.model
@@ -143,19 +146,37 @@
 include $(BUILD_PREBUILT)
 
 include $(CLEAR_VARS)
-LOCAL_MODULE        := textclassifier.universal.model
+LOCAL_MODULE        := libtextclassifier_annotator_universal_model
+LOCAL_MODULE_STEM   := textclassifier.universal.model
 LOCAL_MODULE_CLASS  := ETC
 LOCAL_MODULE_OWNER  := google
 LOCAL_SRC_FILES     := ./models/textclassifier.universal.model
 LOCAL_MODULE_PATH   := $(TARGET_OUT_ETC)/textclassifier
 include $(BUILD_PREBUILT)
 
-# -----------------------
-# Smart Selection bundles
-# -----------------------
+# ---------------------------
+# Actions Suggestions models
+# ---------------------------
+# STOPSHIP: The model size is now around 7.5mb, we should trim it down before shipping it.
 
 include $(CLEAR_VARS)
-LOCAL_MODULE           := textclassifier.bundle1
-LOCAL_REQUIRED_MODULES := textclassifier.en.model
-LOCAL_CFLAGS := $(MY_LIBTEXTCLASSIFIER_WARNING_CFLAGS)
-include $(BUILD_STATIC_LIBRARY)
+LOCAL_MODULE        := libtextclassifier_actions_suggestions_model
+LOCAL_MODULE_STEM   := actions_suggestions.model
+LOCAL_MODULE_CLASS  := ETC
+LOCAL_MODULE_OWNER  := google
+LOCAL_SRC_FILES     := ./models/actions_suggestions.model
+LOCAL_MODULE_PATH   := $(TARGET_OUT_ETC)/textclassifier
+include $(BUILD_PREBUILT)
+
+# ------------
+# LangId model
+# ------------
+
+include $(CLEAR_VARS)
+LOCAL_MODULE        := libtextclassifier_lang_id_model
+LOCAL_MODULE_STEM   := lang_id.model
+LOCAL_MODULE_CLASS  := ETC
+LOCAL_MODULE_OWNER  := google
+LOCAL_SRC_FILES     := ./models/lang_id.model
+LOCAL_MODULE_PATH   := $(TARGET_OUT_ETC)/textclassifier
+include $(BUILD_PREBUILT)

diff --git a/actions/actions-suggestions.cc b/actions/actions-suggestions.cc
new file mode 100644
index 0000000..d7d261f
--- /dev/null
+++ b/actions/actions-suggestions.cc

@@ -0,0 +1,400 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/actions-suggestions.h"
+#include "utils/base/logging.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace libtextclassifier3 {
+
+const std::string& ActionsSuggestions::kViewCalendarType =
+    *[]() { return new std::string("view_calendar"); }();
+const std::string& ActionsSuggestions::kViewMapType =
+    *[]() { return new std::string("view_map"); }();
+const std::string& ActionsSuggestions::kTrackFlightType =
+    *[]() { return new std::string("track_flight"); }();
+const std::string& ActionsSuggestions::kOpenUrlType =
+    *[]() { return new std::string("open_url"); }();
+const std::string& ActionsSuggestions::kSendSmsType =
+    *[]() { return new std::string("send_sms"); }();
+const std::string& ActionsSuggestions::kCallPhoneType =
+    *[]() { return new std::string("call_phone"); }();
+const std::string& ActionsSuggestions::kSendEmailType =
+    *[]() { return new std::string("send_email"); }();
+
+namespace {
+const ActionsModel* LoadAndVerifyModel(const uint8_t* addr, int size) {
+  flatbuffers::Verifier verifier(addr, size);
+  if (VerifyActionsModelBuffer(verifier)) {
+    return GetActionsModel(addr);
+  } else {
+    return nullptr;
+  }
+}
+
+}  // namespace
+
+std::unique_ptr<ActionsSuggestions> ActionsSuggestions::FromUnownedBuffer(
+    const uint8_t* buffer, const int size) {
+  auto actions = std::unique_ptr<ActionsSuggestions>(new ActionsSuggestions());
+  const ActionsModel* model = LoadAndVerifyModel(buffer, size);
+  if (model == nullptr) {
+    return nullptr;
+  }
+  actions->model_ = model;
+  if (!actions->ValidateAndInitialize()) {
+    return nullptr;
+  }
+  return actions;
+}
+
+std::unique_ptr<ActionsSuggestions> ActionsSuggestions::FromScopedMmap(
+    std::unique_ptr<libtextclassifier3::ScopedMmap> mmap) {
+  if (!mmap->handle().ok()) {
+    TC3_VLOG(1) << "Mmap failed.";
+    return nullptr;
+  }
+  const ActionsModel* model = LoadAndVerifyModel(
+      reinterpret_cast<const uint8_t*>(mmap->handle().start()),
+      mmap->handle().num_bytes());
+  if (!model) {
+    TC3_LOG(ERROR) << "Model verification failed.";
+    return nullptr;
+  }
+  auto actions = std::unique_ptr<ActionsSuggestions>(new ActionsSuggestions());
+  actions->model_ = model;
+  actions->mmap_ = std::move(mmap);
+
+  if (!actions->ValidateAndInitialize()) {
+    return nullptr;
+  }
+  return actions;
+}
+
+std::unique_ptr<ActionsSuggestions> ActionsSuggestions::FromFileDescriptor(
+    const int fd, const int offset, const int size) {
+  std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd, offset, size));
+  return FromScopedMmap(std::move(mmap));
+}
+
+std::unique_ptr<ActionsSuggestions> ActionsSuggestions::FromFileDescriptor(
+    const int fd) {
+  std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return FromScopedMmap(std::move(mmap));
+}
+
+std::unique_ptr<ActionsSuggestions> ActionsSuggestions::FromPath(
+    const std::string& path) {
+  std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(path));
+  return FromScopedMmap(std::move(mmap));
+}
+
+void ActionsSuggestions::SetAnnotator(const Annotator* annotator) {
+  annotator_ = annotator;
+}
+
+bool ActionsSuggestions::ValidateAndInitialize() {
+  if (model_ == nullptr) {
+    TC3_LOG(ERROR) << "No model specified.";
+    return false;
+  }
+
+  if (model_->tflite_model_spec()) {
+    model_executor_ = TfLiteModelExecutor::FromBuffer(
+        model_->tflite_model_spec()->tflite_model());
+    if (!model_executor_) {
+      TC3_LOG(ERROR) << "Could not initialize model executor.";
+      return false;
+    }
+  }
+
+  return true;
+}
+
+void ActionsSuggestions::SetupModelInput(
+    const std::vector<std::string>& context, const std::vector<int>& user_ids,
+    const std::vector<float>& time_diffs, const int num_suggestions,
+    tflite::Interpreter* interpreter) const {
+  if (model_->tflite_model_spec()->input_context() >= 0) {
+    model_executor_->SetInput<std::string>(
+        model_->tflite_model_spec()->input_context(), context, interpreter);
+  }
+  if (model_->tflite_model_spec()->input_context_length() >= 0) {
+    *interpreter
+         ->tensor(interpreter->inputs()[model_->tflite_model_spec()
+                                            ->input_context_length()])
+         ->data.i64 = context.size();
+  }
+  if (model_->tflite_model_spec()->input_user_id() >= 0) {
+    model_executor_->SetInput<int>(model_->tflite_model_spec()->input_user_id(),
+                                   user_ids, interpreter);
+  }
+  if (model_->tflite_model_spec()->input_num_suggestions() >= 0) {
+    *interpreter
+         ->tensor(interpreter->inputs()[model_->tflite_model_spec()
+                                            ->input_num_suggestions()])
+         ->data.i64 = num_suggestions;
+  }
+  if (model_->tflite_model_spec()->input_time_diffs() >= 0) {
+    model_executor_->SetInput<float>(
+        model_->tflite_model_spec()->input_time_diffs(), time_diffs,
+        interpreter);
+  }
+}
+
+void ActionsSuggestions::ReadModelOutput(
+    tflite::Interpreter* interpreter,
+    ActionsSuggestionsResponse* response) const {
+  // Read sensitivity and triggering score predictions.
+  if (model_->tflite_model_spec()->output_triggering_score() >= 0) {
+    const TensorView<float>& triggering_score =
+        model_executor_->OutputView<float>(
+            model_->tflite_model_spec()->output_triggering_score(),
+            interpreter);
+    if (!triggering_score.is_valid() || triggering_score.size() == 0) {
+      TC3_LOG(ERROR) << "Could not compute triggering score.";
+      return;
+    }
+    response->triggering_score = triggering_score.data()[0];
+    response->output_filtered_min_triggering_score =
+        (response->triggering_score < model_->min_triggering_confidence());
+  }
+  if (model_->tflite_model_spec()->output_sensitive_topic_score() >= 0) {
+    const TensorView<float>& sensitive_topic_score =
+        model_executor_->OutputView<float>(
+            model_->tflite_model_spec()->output_sensitive_topic_score(),
+            interpreter);
+    if (!sensitive_topic_score.is_valid() ||
+        sensitive_topic_score.dim(0) != 1) {
+      TC3_LOG(ERROR) << "Could not compute sensitive topic score.";
+      return;
+    }
+    response->sensitivity_score = sensitive_topic_score.data()[0];
+    response->output_filtered_sensitivity =
+        (response->sensitivity_score > model_->max_sensitive_topic_score());
+  }
+
+  // Suppress model outputs.
+  if (response->output_filtered_sensitivity) {
+    return;
+  }
+
+  // Read smart reply predictions.
+  if (!response->output_filtered_min_triggering_score &&
+      model_->tflite_model_spec()->output_replies() >= 0) {
+    const std::vector<tflite::StringRef> replies =
+        model_executor_->Output<tflite::StringRef>(
+            model_->tflite_model_spec()->output_replies(), interpreter);
+    TensorView<float> scores = model_executor_->OutputView<float>(
+        model_->tflite_model_spec()->output_replies_scores(), interpreter);
+    std::vector<ActionSuggestion> text_replies;
+    for (int i = 0; i < replies.size(); i++) {
+      response->actions.push_back({std::string(replies[i].str, replies[i].len),
+                                   model_->smart_reply_action_type()->str(),
+                                   scores.data()[i]});
+    }
+  }
+
+  // Read actions suggestions.
+  if (model_->tflite_model_spec()->output_actions_scores() >= 0) {
+    const TensorView<float> actions_scores = model_executor_->OutputView<float>(
+        model_->tflite_model_spec()->output_actions_scores(), interpreter);
+    for (int i = 0; i < model_->action_type()->Length(); i++) {
+      // Skip disabled action classes, such as the default other category.
+      if (!(*model_->action_type())[i]->enabled()) {
+        continue;
+      }
+      const float score = actions_scores.data()[i];
+      if (score < (*model_->action_type())[i]->min_triggering_score()) {
+        continue;
+      }
+      const std::string& output_class =
+          (*model_->action_type())[i]->name()->str();
+      if (score >= model_->min_actions_confidence()) {
+        response->actions.push_back(
+            {/*response_text=*/"", output_class, score});
+      }
+    }
+  }
+}
+
+void ActionsSuggestions::SuggestActionsFromModel(
+    const Conversation& conversation, const int num_messages,
+    ActionsSuggestionsResponse* response) const {
+  TC3_CHECK_LE(num_messages, conversation.messages.size());
+
+  if (!model_executor_) {
+    return;
+  }
+  std::unique_ptr<tflite::Interpreter> interpreter =
+      model_executor_->CreateInterpreter();
+
+  if (!interpreter) {
+    TC3_LOG(ERROR) << "Could not build TensorFlow Lite interpreter for the "
+                      "actions suggestions model.";
+    return;
+  }
+
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    TC3_LOG(ERROR)
+        << "Failed to allocate TensorFlow Lite tensors for the actions "
+           "suggestions model.";
+    return;
+  }
+
+  std::vector<std::string> context;
+  std::vector<int> user_ids;
+  std::vector<float> time_diffs;
+
+  // Gather last `num_messages` messages from the conversation.
+  for (int i = conversation.messages.size() - num_messages;
+       i < conversation.messages.size(); i++) {
+    const ConversationMessage& message = conversation.messages[i];
+    context.push_back(message.text);
+    user_ids.push_back(message.user_id);
+    time_diffs.push_back(message.time_diff_secs);
+  }
+
+  SetupModelInput(context, user_ids, time_diffs,
+                  /*num_suggestions=*/model_->num_smart_replies(),
+                  interpreter.get());
+
+  if (interpreter->Invoke() != kTfLiteOk) {
+    TC3_LOG(ERROR) << "Failed to invoke TensorFlow Lite interpreter.";
+    return;
+  }
+
+  ReadModelOutput(interpreter.get(), response);
+}
+
+void ActionsSuggestions::SuggestActionsFromAnnotations(
+    const Conversation& conversation, const ActionSuggestionOptions& options,
+    ActionsSuggestionsResponse* response) const {
+  if (model_->annotation_actions_spec() == nullptr ||
+      model_->annotation_actions_spec()->annotation_mapping() == nullptr ||
+      model_->annotation_actions_spec()->annotation_mapping()->size() == 0) {
+    return;
+  }
+
+  // Create actions based on the annotations present in the last message.
+  // TODO(smillius): Make this configurable.
+  std::vector<AnnotatedSpan> annotations =
+      conversation.messages.back().annotations;
+  if (annotations.empty() && annotator_ != nullptr) {
+    annotations = annotator_->Annotate(conversation.messages.back().text,
+                                       options.annotation_options);
+  }
+  const int message_index = conversation.messages.size() - 1;
+  for (const AnnotatedSpan& annotation : annotations) {
+    if (annotation.classification.empty() ||
+        annotation.classification[0].collection.empty()) {
+      continue;
+    }
+    CreateActionsFromAnnotationResult(message_index, annotation, response);
+  }
+}
+
+void ActionsSuggestions::CreateActionsFromAnnotationResult(
+    const int message_index, const AnnotatedSpan& annotation,
+    ActionsSuggestionsResponse* suggestions) const {
+  const ClassificationResult& classification_result =
+      annotation.classification[0];
+  ActionSuggestionAnnotation suggestion_annotation;
+  suggestion_annotation.message_index = message_index;
+  suggestion_annotation.span = annotation.span;
+  suggestion_annotation.entity = classification_result;
+  const std::string collection = classification_result.collection;
+
+  for (const AnnotationActionsSpec_::AnnotationMapping* mapping :
+       *model_->annotation_actions_spec()->annotation_mapping()) {
+    if (collection == mapping->annotation_collection()->str()) {
+      if (classification_result.score < mapping->min_annotation_score()) {
+        continue;
+      }
+      const float score =
+          (mapping->use_annotation_score() ? classification_result.score
+                                           : mapping->default_score());
+      suggestions->actions.push_back({/*response_text=*/"",
+                                      /*type=*/mapping->action_name()->str(),
+                                      /*score=*/score,
+                                      /*annotations=*/{suggestion_annotation}});
+    }
+  }
+}
+
+ActionsSuggestionsResponse ActionsSuggestions::SuggestActions(
+    const Conversation& conversation,
+    const ActionSuggestionOptions& options) const {
+  ActionsSuggestionsResponse response;
+  if (conversation.messages.empty()) {
+    return response;
+  }
+
+  const int conversation_history_length = conversation.messages.size();
+  const int max_conversation_history_length =
+      model_->max_conversation_history_length();
+  const int num_messages =
+      ((max_conversation_history_length < 0 ||
+        conversation_history_length < max_conversation_history_length)
+           ? conversation_history_length
+           : max_conversation_history_length);
+
+  if (num_messages <= 0) {
+    TC3_LOG(INFO) << "No messages provided for actions suggestions.";
+    return response;
+  }
+
+  int input_text_length = 0;
+  for (int i = conversation.messages.size() - num_messages;
+       i < conversation.messages.size(); i++) {
+    input_text_length += conversation.messages[i].text.length();
+  }
+
+  // Bail out if we are provided with too few or too much input.
+  if (input_text_length < model_->min_input_length() ||
+      (model_->max_input_length() >= 0 &&
+       input_text_length > model_->max_input_length())) {
+    TC3_LOG(INFO) << "Too much or not enough input for inference.";
+    return response;
+  }
+
+  SuggestActionsFromModel(conversation, num_messages, &response);
+
+  // Suppress all predictions if the conversation was deemed sensitive.
+  if (model_->suppress_on_sensitive_topic() &&
+      response.output_filtered_sensitivity) {
+    return response;
+  }
+
+  SuggestActionsFromAnnotations(conversation, options, &response);
+
+  // TODO(smillius): Properly rank the actions.
+
+  return response;
+}
+
+const ActionsModel* ViewActionsModel(const void* buffer, int size) {
+  if (buffer == nullptr) {
+    return nullptr;
+  }
+
+  return LoadAndVerifyModel(reinterpret_cast<const uint8_t*>(buffer), size);
+}
+
+}  // namespace libtextclassifier3

diff --git a/actions/actions-suggestions.h b/actions/actions-suggestions.h
new file mode 100644
index 0000000..fa7807e
--- /dev/null
+++ b/actions/actions-suggestions.h

@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_SUGGESTIONS_H_
+#define LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_SUGGESTIONS_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "actions/actions_model_generated.h"
+#include "annotator/annotator.h"
+#include "annotator/types.h"
+#include "utils/memory/mmap.h"
+#include "utils/tflite-model-executor.h"
+
+namespace libtextclassifier3 {
+
+// An entity associated with an action.
+struct ActionSuggestionAnnotation {
+  // The referenced message.
+  // -1 if not referencing a particular message in the provided input.
+  int message_index;
+
+  // The span within the reference message.
+  // (-1, -1) if not referencing a particular location.
+  CodepointSpan span;
+  ClassificationResult entity;
+
+  // Optional annotation name.
+  std::string name;
+
+  explicit ActionSuggestionAnnotation()
+      : message_index(kInvalidIndex), span({kInvalidIndex, kInvalidIndex}) {}
+};
+
+// Action suggestion that contains a response text and the type of the response.
+struct ActionSuggestion {
+  // Text of the action suggestion.
+  std::string response_text;
+
+  // Type of the action suggestion.
+  std::string type;
+
+  // Score.
+  float score;
+
+  // The associated annotations.
+  std::vector<ActionSuggestionAnnotation> annotations;
+};
+
+// Actions suggestions result containing meta-information and the suggested
+// actions.
+struct ActionsSuggestionsResponse {
+  ActionsSuggestionsResponse()
+      : sensitivity_score(-1),
+        triggering_score(-1),
+        output_filtered_sensitivity(false),
+        output_filtered_min_triggering_score(false) {}
+
+  // The sensitivity assessment.
+  float sensitivity_score;
+  float triggering_score;
+
+  // Whether the output was suppressed by the sensitivity threshold.
+  bool output_filtered_sensitivity;
+
+  // Whether the output was suppressed by the triggering score threshold.
+  bool output_filtered_min_triggering_score;
+
+  // The suggested actions.
+  std::vector<ActionSuggestion> actions;
+};
+
+// Represents a single message in the conversation.
+struct ConversationMessage {
+  // User ID distinguishing the user from other users in the conversation.
+  int user_id;
+
+  // Text of the message.
+  std::string text;
+
+  // Relative time to previous message.
+  float time_diff_secs;
+
+  // Annotations on the text.
+  std::vector<AnnotatedSpan> annotations;
+
+  // Comma-separated list of locale specification for the text in the
+  // conversation (BCP 47 tags).
+  std::string locales;
+};
+
+// Conversation between multiple users.
+struct Conversation {
+  // Sequence of messages that were exchanged in the conversation.
+  std::vector<ConversationMessage> messages;
+};
+
+// Options for suggesting actions.
+struct ActionSuggestionOptions {
+  // Options for annotation of the messages.
+  AnnotationOptions annotation_options = AnnotationOptions::Default();
+
+  static ActionSuggestionOptions Default() { return ActionSuggestionOptions(); }
+};
+
+// Class for predicting actions following a conversation.
+class ActionsSuggestions {
+ public:
+  static std::unique_ptr<ActionsSuggestions> FromUnownedBuffer(
+      const uint8_t* buffer, const int size);
+  // Takes ownership of the mmap.
+  static std::unique_ptr<ActionsSuggestions> FromScopedMmap(
+      std::unique_ptr<libtextclassifier3::ScopedMmap> mmap);
+  static std::unique_ptr<ActionsSuggestions> FromFileDescriptor(
+      const int fd, const int offset, const int size);
+  static std::unique_ptr<ActionsSuggestions> FromFileDescriptor(const int fd);
+  static std::unique_ptr<ActionsSuggestions> FromPath(const std::string& path);
+
+  ActionsSuggestionsResponse SuggestActions(
+      const Conversation& conversation,
+      const ActionSuggestionOptions& options =
+          ActionSuggestionOptions::Default()) const;
+
+  // Provide an annotator.
+  void SetAnnotator(const Annotator* annotator);
+
+  // Should be in sync with those defined in Android.
+  // android/frameworks/base/core/java/android/view/textclassifier/ConversationActions.java
+  static const std::string& kViewCalendarType;
+  static const std::string& kViewMapType;
+  static const std::string& kTrackFlightType;
+  static const std::string& kOpenUrlType;
+  static const std::string& kSendSmsType;
+  static const std::string& kCallPhoneType;
+  static const std::string& kSendEmailType;
+
+ private:
+  // Checks that model contains all required fields, and initializes internal
+  // datastructures.
+  bool ValidateAndInitialize();
+
+  void SetupModelInput(const std::vector<std::string>& context,
+                       const std::vector<int>& user_ids,
+                       const std::vector<float>& time_diffs,
+                       const int num_suggestions,
+                       tflite::Interpreter* interpreter) const;
+  void ReadModelOutput(tflite::Interpreter* interpreter,
+                       ActionsSuggestionsResponse* response) const;
+
+  void SuggestActionsFromModel(const Conversation& conversation,
+                               const int num_messages,
+                               ActionsSuggestionsResponse* response) const;
+
+  void SuggestActionsFromAnnotations(
+      const Conversation& conversation, const ActionSuggestionOptions& options,
+      ActionsSuggestionsResponse* suggestions) const;
+
+  void CreateActionsFromAnnotationResult(
+      const int message_index, const AnnotatedSpan& annotation,
+      ActionsSuggestionsResponse* suggestions) const;
+
+  const ActionsModel* model_;
+  std::unique_ptr<libtextclassifier3::ScopedMmap> mmap_;
+
+  // Tensorflow Lite models.
+  std::unique_ptr<const TfLiteModelExecutor> model_executor_;
+
+  // Annotator.
+  const Annotator* annotator_ = nullptr;
+};
+
+// Interprets the buffer as a Model flatbuffer and returns it for reading.
+const ActionsModel* ViewActionsModel(const void* buffer, int size);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_SUGGESTIONS_H_

diff --git a/actions/actions-suggestions_test.cc b/actions/actions-suggestions_test.cc
new file mode 100644
index 0000000..df8abcd
--- /dev/null
+++ b/actions/actions-suggestions_test.cc

@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/actions-suggestions.h"
+
+#include <fstream>
+#include <iterator>
+#include <memory>
+
+#include "actions/actions_model_generated.h"
+#include "annotator/types.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "flatbuffers/flatbuffers.h"
+
+namespace libtextclassifier3 {
+namespace {
+constexpr char kModelFileName[] = "actions_suggestions_test.model";
+
+std::string ReadFile(const std::string& file_name) {
+  std::ifstream file_stream(file_name);
+  return std::string(std::istreambuf_iterator<char>(file_stream), {});
+}
+
+std::string GetModelPath() {
+  return "";
+}
+
+std::unique_ptr<ActionsSuggestions> LoadTestModel() {
+  return ActionsSuggestions::FromPath(GetModelPath() + kModelFileName);
+}
+
+TEST(ActionsSuggestionsTest, InstantiateActionSuggestions) {
+  EXPECT_THAT(LoadTestModel(), testing::NotNull());
+}
+
+TEST(ActionsSuggestionsTest, SuggestActions) {
+  std::unique_ptr<ActionsSuggestions> actions_suggestions = LoadTestModel();
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions(
+          {{{/*user_id=*/1, "Where are you?"}}});
+  EXPECT_EQ(response.actions.size(), 4);
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsFromAnnotations) {
+  std::unique_ptr<ActionsSuggestions> actions_suggestions = LoadTestModel();
+  AnnotatedSpan annotation;
+  annotation.span = {11, 15};
+  annotation.classification = {ClassificationResult("address", 1.0)};
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions({{{/*user_id=*/1, "are you at home?",
+                                             /*time_diff_secs=*/0,
+                                             /*annotations=*/{annotation}}}});
+  EXPECT_EQ(response.actions.size(), 4);
+  EXPECT_EQ(response.actions.back().type, "view_map");
+  EXPECT_EQ(response.actions.back().score, 1.0);
+}
+
+void TestSuggestActionsWithThreshold(
+    const std::function<void(ActionsModelT*)>& set_value_fn,
+    const int expected_size = 0) {
+  const std::string actions_model_string =
+      ReadFile(GetModelPath() + kModelFileName);
+  std::unique_ptr<ActionsModelT> actions_model =
+      UnPackActionsModel(actions_model_string.c_str());
+  set_value_fn(actions_model.get());
+  flatbuffers::FlatBufferBuilder builder;
+  FinishActionsModelBuffer(builder,
+                           ActionsModel::Pack(builder, actions_model.get()));
+  std::unique_ptr<ActionsSuggestions> actions_suggestions =
+      ActionsSuggestions::FromUnownedBuffer(
+          reinterpret_cast<const uint8_t*>(builder.GetBufferPointer()),
+          builder.GetSize());
+  ASSERT_TRUE(actions_suggestions);
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions(
+          {{{/*user_id=*/1, "Where are you?"}}});
+  EXPECT_EQ(response.actions.size(), expected_size);
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsWithTriggeringScore) {
+  TestSuggestActionsWithThreshold(
+      [](ActionsModelT* actions_model) {
+        actions_model->min_triggering_confidence = 1.0;
+      },
+      /*expected_size=*/1 /*no smart reply, only actions*/);
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsWithSensitiveTopicScore) {
+  TestSuggestActionsWithThreshold(
+      [](ActionsModelT* actions_model) {
+        actions_model->max_sensitive_topic_score = 0.0;
+      },
+      /*expected_size=*/4 /* no sensitive prediction in test model*/);
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsWithMaxInputLength) {
+  TestSuggestActionsWithThreshold([](ActionsModelT* actions_model) {
+    actions_model->max_input_length = 0;
+  });
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsWithMinInputLength) {
+  TestSuggestActionsWithThreshold([](ActionsModelT* actions_model) {
+    actions_model->min_input_length = 100;
+  });
+}
+
+TEST(ActionsSuggestionsTest, SuppressActionsFromAnnotationsOnSensitiveTopic) {
+  const std::string actions_model_string =
+      ReadFile(GetModelPath() + kModelFileName);
+  std::unique_ptr<ActionsModelT> actions_model =
+      UnPackActionsModel(actions_model_string.c_str());
+
+  // Don't test if no sensitivity score is produced
+  if (actions_model->tflite_model_spec->output_sensitive_topic_score < 0) {
+    return;
+  }
+
+  actions_model->max_sensitive_topic_score = 0.0;
+  actions_model->suppress_on_sensitive_topic = true;
+  flatbuffers::FlatBufferBuilder builder;
+  FinishActionsModelBuffer(builder,
+                           ActionsModel::Pack(builder, actions_model.get()));
+  std::unique_ptr<ActionsSuggestions> actions_suggestions =
+      ActionsSuggestions::FromUnownedBuffer(
+          reinterpret_cast<const uint8_t*>(builder.GetBufferPointer()),
+          builder.GetSize());
+  AnnotatedSpan annotation;
+  annotation.span = {11, 15};
+  annotation.classification = {
+      ClassificationResult(Annotator::kAddressCollection, 1.0)};
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions({{{/*user_id=*/1, "are you at home?",
+                                             /*time_diff_secs=*/0,
+                                             /*annotations=*/{annotation}}}});
+  EXPECT_THAT(response.actions, testing::IsEmpty());
+}
+
+TEST(ActionsSuggestionsTest, SuggestActionsWithLongerConversation) {
+  const std::string actions_model_string =
+      ReadFile(GetModelPath() + kModelFileName);
+  std::unique_ptr<ActionsModelT> actions_model =
+      UnPackActionsModel(actions_model_string.c_str());
+
+  // Allow a larger conversation context.
+  actions_model->max_conversation_history_length = 10;
+
+  flatbuffers::FlatBufferBuilder builder;
+  FinishActionsModelBuffer(builder,
+                           ActionsModel::Pack(builder, actions_model.get()));
+  std::unique_ptr<ActionsSuggestions> actions_suggestions =
+      ActionsSuggestions::FromUnownedBuffer(
+          reinterpret_cast<const uint8_t*>(builder.GetBufferPointer()),
+          builder.GetSize());
+  AnnotatedSpan annotation;
+  annotation.span = {11, 15};
+  annotation.classification = {
+      ClassificationResult(Annotator::kAddressCollection, 1.0)};
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions(
+          {{{/*user_id=*/0, "hi, how are you?", /*time_diff_secs=*/0},
+            {/*user_id=*/1, "good! are you at home?",
+             /*time_diff_secs=*/60,
+             /*annotations=*/{annotation}}}});
+  EXPECT_EQ(response.actions.size(), 1);
+  EXPECT_EQ(response.actions.back().type, "view_map");
+  EXPECT_EQ(response.actions.back().score, 1.0);
+}
+
+TEST(ActionsSuggestionsTest, CreateActionsFromClassificationResult) {
+  std::unique_ptr<ActionsSuggestions> actions_suggestions = LoadTestModel();
+  AnnotatedSpan annotation;
+  annotation.span = {13, 16};
+  annotation.classification = {
+      ClassificationResult(Annotator::kPhoneCollection, 1.0)};
+
+  const ActionsSuggestionsResponse& response =
+      actions_suggestions->SuggestActions({{{/*user_id=*/1, "can you call 911?",
+                                             /*time_diff_secs=*/0,
+                                             /*annotations=*/{annotation}}}});
+
+  EXPECT_EQ(response.actions.size(),
+            5 /* smart replies + actions from annotations*/);
+  EXPECT_EQ(response.actions.back().type, "send_sms");
+  EXPECT_EQ(response.actions.back().score, 1.0);
+  EXPECT_EQ(response.actions.back().annotations.size(), 1);
+  EXPECT_EQ(response.actions.back().annotations[0].message_index, 0);
+  EXPECT_EQ(response.actions.back().annotations[0].span, annotation.span);
+  EXPECT_EQ(response.actions.end()[-2].type, "call_phone");
+  EXPECT_EQ(response.actions.end()[-2].score, 1.0);
+  EXPECT_EQ(response.actions.end()[-2].annotations.size(), 1);
+  EXPECT_EQ(response.actions.end()[-2].annotations[0].message_index, 0);
+  EXPECT_EQ(response.actions.end()[-2].annotations[0].span, annotation.span);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/actions/actions_jni.cc b/actions/actions_jni.cc
new file mode 100644
index 0000000..17571c3
--- /dev/null
+++ b/actions/actions_jni.cc

@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// JNI wrapper for actions.
+
+#include "actions/actions_jni.h"
+
+#include <jni.h>
+#include <type_traits>
+#include <vector>
+
+#include "actions/actions-suggestions.h"
+#include "annotator/annotator.h"
+#include "annotator/annotator_jni_common.h"
+#include "utils/base/integral_types.h"
+#include "utils/java/scoped_local_ref.h"
+#include "utils/memory/mmap.h"
+
+using libtextclassifier3::ActionsSuggestions;
+using libtextclassifier3::ActionsSuggestionsResponse;
+using libtextclassifier3::ActionSuggestion;
+using libtextclassifier3::ActionSuggestionOptions;
+using libtextclassifier3::Annotator;
+using libtextclassifier3::Conversation;
+using libtextclassifier3::ScopedLocalRef;
+using libtextclassifier3::ToStlString;
+
+namespace libtextclassifier3 {
+
+namespace {
+ActionSuggestionOptions FromJavaActionSuggestionOptions(JNIEnv* env,
+                                                        jobject joptions) {
+  ActionSuggestionOptions options = ActionSuggestionOptions::Default();
+
+  if (!joptions) {
+    return options;
+  }
+
+  const ScopedLocalRef<jclass> options_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ACTIONS_CLASS_NAME_STR
+                     "$ActionSuggestionOptions"),
+      env);
+
+  if (!options_class) {
+    return options;
+  }
+
+  const std::pair<bool, jobject> status_or_annotation_options =
+      CallJniMethod0<jobject>(env, joptions, options_class.get(),
+                              &JNIEnv::CallObjectMethod, "getAnnotationOptions",
+                              "L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                              "$AnnotationOptions;");
+
+  if (!status_or_annotation_options.first) {
+    return options;
+  }
+
+  // Create annotation options.
+  options.annotation_options =
+      FromJavaAnnotationOptions(env, status_or_annotation_options.second);
+
+  return options;
+}
+
+jobjectArray ActionSuggestionsToJObjectArray(
+    JNIEnv* env, const std::vector<ActionSuggestion>& action_result) {
+  const ScopedLocalRef<jclass> result_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ACTIONS_CLASS_NAME_STR
+                     "$ActionSuggestion"),
+      env);
+  if (!result_class) {
+    TC3_LOG(ERROR) << "Couldn't find ActionSuggestion class.";
+    return nullptr;
+  }
+
+  const jmethodID result_class_constructor = env->GetMethodID(
+      result_class.get(), "<init>", "(Ljava/lang/String;Ljava/lang/String;F)V");
+  const jobjectArray results =
+      env->NewObjectArray(action_result.size(), result_class.get(), nullptr);
+  for (int i = 0; i < action_result.size(); i++) {
+    ScopedLocalRef<jobject> result(env->NewObject(
+        result_class.get(), result_class_constructor,
+        env->NewStringUTF(action_result[i].response_text.c_str()),
+        env->NewStringUTF(action_result[i].type.c_str()),
+        static_cast<jfloat>(action_result[i].score)));
+    env->SetObjectArrayElement(results, i, result.get());
+  }
+  return results;
+}
+
+ConversationMessage FromJavaConversationMessage(JNIEnv* env, jobject jmessage) {
+  if (!jmessage) {
+    return {};
+  }
+
+  const ScopedLocalRef<jclass> message_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ACTIONS_CLASS_NAME_STR
+                     "$ConversationMessage"),
+      env);
+  const std::pair<bool, jobject> status_or_text = CallJniMethod0<jobject>(
+      env, jmessage, message_class.get(), &JNIEnv::CallObjectMethod, "getText",
+      "Ljava/lang/String;");
+  const std::pair<bool, int32> status_or_user_id =
+      CallJniMethod0<int32>(env, jmessage, message_class.get(),
+                            &JNIEnv::CallIntMethod, "getUserId", "I");
+  const std::pair<bool, int32> status_or_time_diff = CallJniMethod0<int32>(
+      env, jmessage, message_class.get(), &JNIEnv::CallIntMethod,
+      "getTimeDiffInSeconds", "I");
+  const std::pair<bool, jobject> status_or_locales = CallJniMethod0<jobject>(
+      env, jmessage, message_class.get(), &JNIEnv::CallObjectMethod,
+      "getLocales", "Ljava/lang/String;");
+  if (!status_or_text.first || !status_or_user_id.first ||
+      !status_or_locales.first || !status_or_time_diff.first) {
+    return {};
+  }
+
+  ConversationMessage message;
+  message.text =
+      ToStlString(env, reinterpret_cast<jstring>(status_or_text.second));
+  message.user_id = status_or_user_id.second;
+  message.time_diff_secs = status_or_time_diff.second;
+  message.locales =
+      ToStlString(env, reinterpret_cast<jstring>(status_or_locales.second));
+  return message;
+}
+
+Conversation FromJavaConversation(JNIEnv* env, jobject jconversation) {
+  if (!jconversation) {
+    return {};
+  }
+
+  const ScopedLocalRef<jclass> conversation_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ACTIONS_CLASS_NAME_STR
+                     "$Conversation"),
+      env);
+
+  const std::pair<bool, jobject> status_or_messages = CallJniMethod0<jobject>(
+      env, jconversation, conversation_class.get(), &JNIEnv::CallObjectMethod,
+      "getConversationMessages",
+      "[L" TC3_PACKAGE_PATH TC3_ACTIONS_CLASS_NAME_STR "$ConversationMessage;");
+
+  if (!status_or_messages.first) {
+    return {};
+  }
+
+  const jobjectArray jmessages =
+      reinterpret_cast<jobjectArray>(status_or_messages.second);
+
+  const int size = env->GetArrayLength(jmessages);
+
+  std::vector<ConversationMessage> messages;
+  for (int i = 0; i < size; i++) {
+    jobject jmessage = env->GetObjectArrayElement(jmessages, i);
+    ConversationMessage message = FromJavaConversationMessage(env, jmessage);
+    messages.push_back(message);
+  }
+  Conversation conversation;
+  conversation.messages = messages;
+  return conversation;
+}
+
+jstring GetLocalesFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return env->NewStringUTF("");
+  }
+  const ActionsModel* model = libtextclassifier3::ViewActionsModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model || !model->locales()) {
+    return env->NewStringUTF("");
+  }
+  return env->NewStringUTF(model->locales()->c_str());
+}
+
+jint GetVersionFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return 0;
+  }
+  const ActionsModel* model = libtextclassifier3::ViewActionsModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model) {
+    return 0;
+  }
+  return model->version();
+}
+
+jstring GetNameFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return env->NewStringUTF("");
+  }
+  const ActionsModel* model = libtextclassifier3::ViewActionsModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model || !model->name()) {
+    return env->NewStringUTF("");
+  }
+  return env->NewStringUTF(model->name()->c_str());
+}
+}  // namespace
+}  // namespace libtextclassifier3
+
+using libtextclassifier3::ActionSuggestionsToJObjectArray;
+using libtextclassifier3::FromJavaActionSuggestionOptions;
+using libtextclassifier3::FromJavaConversation;
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME, nativeNewActionsModel)
+(JNIEnv* env, jobject thiz, jint fd) {
+  return reinterpret_cast<jlong>(
+      ActionsSuggestions::FromFileDescriptor(fd).release());
+}
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME, nativeNewActionsModelFromPath)
+(JNIEnv* env, jobject thiz, jstring path) {
+  const std::string path_str = ToStlString(env, path);
+  return reinterpret_cast<jlong>(
+      ActionsSuggestions::FromPath(path_str).release());
+}
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME,
+               nativeNewActionModelsFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
+  const jint fd = libtextclassifier3::GetFdFromAssetFileDescriptor(env, afd);
+  return reinterpret_cast<jlong>(
+      ActionsSuggestions::FromFileDescriptor(fd, offset, size).release());
+}
+
+TC3_JNI_METHOD(jobjectArray, TC3_ACTIONS_CLASS_NAME, nativeSuggestActions)
+(JNIEnv* env, jobject clazz, jlong ptr, jobject jconversation,
+ jobject joptions) {
+  if (!ptr) {
+    return nullptr;
+  }
+  const Conversation conversation = FromJavaConversation(env, jconversation);
+  const ActionSuggestionOptions actionSuggestionOptions =
+      FromJavaActionSuggestionOptions(env, joptions);
+  ActionsSuggestions* action_model = reinterpret_cast<ActionsSuggestions*>(ptr);
+
+  const ActionsSuggestionsResponse response =
+      action_model->SuggestActions(conversation, actionSuggestionOptions);
+  return ActionSuggestionsToJObjectArray(env, response.actions);
+}
+
+TC3_JNI_METHOD(void, TC3_ACTIONS_CLASS_NAME, nativeCloseActionsModel)
+(JNIEnv* env, jobject clazz, jlong ptr) {
+  ActionsSuggestions* model = reinterpret_cast<ActionsSuggestions*>(ptr);
+  delete model;
+}
+
+TC3_JNI_METHOD(jstring, TC3_ACTIONS_CLASS_NAME, nativeGetLocales)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return libtextclassifier3::GetLocalesFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jstring, TC3_ACTIONS_CLASS_NAME, nativeGetName)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return libtextclassifier3::GetNameFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jint, TC3_ACTIONS_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return libtextclassifier3::GetVersionFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(void, TC3_ACTIONS_CLASS_NAME, nativeSetAnnotator)
+(JNIEnv* env, jobject clazz, jlong ptr, jlong annotatorPtr) {
+  if (!ptr) {
+    return;
+  }
+  ActionsSuggestions* action_model = reinterpret_cast<ActionsSuggestions*>(ptr);
+  Annotator* annotator = reinterpret_cast<Annotator*>(annotatorPtr);
+  action_model->SetAnnotator(annotator);
+}

diff --git a/actions/actions_jni.h b/actions/actions_jni.h
new file mode 100644
index 0000000..48d50db
--- /dev/null
+++ b/actions/actions_jni.h

@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_JNI_H_
+#define LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_JNI_H_
+
+#include <jni.h>
+#include <string>
+#include "utils/java/jni-base.h"
+
+#ifndef TC3_ACTIONS_CLASS_NAME
+#define TC3_ACTIONS_CLASS_NAME ActionsSuggestionsModel
+#endif
+
+#define TC3_ACTIONS_CLASS_NAME_STR TC3_ADD_QUOTES(TC3_ACTIONS_CLASS_NAME)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME, nativeNewActionsModel)
+(JNIEnv* env, jobject thiz, jint fd);
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME, nativeNewActionsModelFromPath)
+(JNIEnv* env, jobject thiz, jstring path);
+
+TC3_JNI_METHOD(jlong, TC3_ACTIONS_CLASS_NAME,
+               nativeNewActionsModelModelsFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
+
+TC3_JNI_METHOD(jobjectArray, TC3_ACTIONS_CLASS_NAME, nativeSuggestActions)
+(JNIEnv* env, jobject thiz, jlong ptr, jobject jconversation, jobject joptions);
+
+TC3_JNI_METHOD(void, TC3_ACTIONS_CLASS_NAME, nativeCloseActionsModel)
+(JNIEnv* env, jobject thiz, jlong ptr);
+
+TC3_JNI_METHOD(jstring, TC3_ACTIONS_CLASS_NAME, nativeGetLocales)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jstring, TC3_ACTIONS_CLASS_NAME, nativeGetName)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jint, TC3_ACTIONS_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jint fd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // LIBTEXTCLASSIFIER_ACTIONS_ACTIONS_JNI_H_

diff --git a/actions/actions_model.fbs b/actions/actions_model.fbs
new file mode 100755
index 0000000..8d1faba
--- /dev/null
+++ b/actions/actions_model.fbs

@@ -0,0 +1,128 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+file_identifier "TC3A";
+
+// Options to specify triggering behaviour per action class.
+namespace libtextclassifier3;
+table ActionTypeOptions {
+  // The name of the predicted action.
+  name:string;
+
+  // Triggering behaviour.
+  // Whether the action class is considered in the model output or not.
+  enabled:bool = true;
+
+  // Minimal output score threshold.
+  min_triggering_score:float = 0;
+}
+
+// TensorFlow Lite model for suggesting actions.
+namespace libtextclassifier3;
+table TensorflowLiteModelSpec {
+  // TensorFlow Lite model for suggesting actions.
+  tflite_model:[ubyte] (force_align: 16);
+
+  // Input specification.
+  input_user_id:int = 0;
+
+  input_context:int = 1;
+  input_context_length:int = 2;
+  input_time_diffs:int = 3;
+  input_num_suggestions:int = 4;
+
+  // Output specification.
+  output_replies:int = 0;
+
+  output_replies_scores:int = 1;
+  output_sensitive_topic_score:int = 3;
+  output_triggering_score:int = 4;
+  output_actions_scores:int = 5;
+}
+
+namespace libtextclassifier3.AnnotationActionsSpec_;
+table AnnotationMapping {
+  // The annotation collection.
+  annotation_collection:string;
+
+  // The action name to use.
+  action_name:string;
+
+  // Default score in case we do not use the annotation score.
+  default_score:float;
+
+  // Whether to use the score of the annotation as the action score.
+  use_annotation_score:bool = true;
+
+  // Minimum threshold for the annotation score for filtering.
+  min_annotation_score:float;
+}
+
+// Configuration for actions based on annotatations.
+namespace libtextclassifier3;
+table AnnotationActionsSpec {
+  annotation_mapping:[libtextclassifier3.AnnotationActionsSpec_.AnnotationMapping];
+}
+
+namespace libtextclassifier3;
+table ActionsModel {
+  // Comma-separated list of locales supported by the model as BCP 47 tags.
+  locales:string;
+
+  // Version of the actions model.
+  version:int;
+
+  // A name for the model that can be used e.g. for logging.
+  name:string;
+
+  tflite_model_spec:libtextclassifier3.TensorflowLiteModelSpec;
+
+  // Output classes.
+  smart_reply_action_type:string;
+
+  action_type:[libtextclassifier3.ActionTypeOptions];
+
+  // Lower bound thresholds for model prediction output.
+  min_actions_confidence:float;
+
+  min_triggering_confidence:float;
+
+  // Maximum sensitive score for which actions and smart replies are shown.
+  max_sensitive_topic_score:float = 1;
+
+  // Default number of smart reply predictions.
+  num_smart_replies:int = 3;
+
+  // Length of message history to consider, -1 if unbounded.
+  max_conversation_history_length:int = 1;
+
+  // Filtering behaviour.
+  // Whether to supress all model output when a conversation is classified as
+  // sensitive.
+  suppress_on_sensitive_topic:bool = true;
+
+  // Thresholds on the model prediction input.
+  // The minimal length of input to consider for prediction.
+  min_input_length:int = 0;
+
+  // The maximal length of input to consider for prediciton, -1 if unbounded.
+  max_input_length:int = -1;
+
+  // Configuration for mapping annotations to action suggestions.
+  annotation_actions_spec:libtextclassifier3.AnnotationActionsSpec;
+}
+
+root_type libtextclassifier3.ActionsModel;

diff --git a/actions/test_data/actions_suggestions_test.model b/actions/test_data/actions_suggestions_test.model
new file mode 100644
index 0000000..956eced
--- /dev/null
+++ b/actions/test_data/actions_suggestions_test.model
Binary files differ

diff --git a/text-classifier.cc b/annotator/annotator.cc
similarity index 79%
rename from text-classifier.cc
rename to annotator/annotator.cc
index e20813a..2be9d3c 100644
--- a/text-classifier.cc
+++ b/annotator/annotator.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "text-classifier.h"
+#include "annotator/annotator.h"
 
 #include <algorithm>
 #include <cctype>
@@ -22,39 +22,76 @@
 #include <iterator>
 #include <numeric>
 
-#include "util/base/logging.h"
-#include "util/math/softmax.h"
-#include "util/utf8/unicodetext.h"
+#include "utils/base/logging.h"
+#include "utils/checksum.h"
+#include "utils/math/softmax.h"
+#include "utils/utf8/unicodetext.h"
 
-namespace libtextclassifier2 {
-const std::string& TextClassifier::kOtherCollection =
+namespace libtextclassifier3 {
+const std::string& Annotator::kOtherCollection =
     *[]() { return new std::string("other"); }();
-const std::string& TextClassifier::kPhoneCollection =
+const std::string& Annotator::kPhoneCollection =
     *[]() { return new std::string("phone"); }();
-const std::string& TextClassifier::kAddressCollection =
+const std::string& Annotator::kAddressCollection =
     *[]() { return new std::string("address"); }();
-const std::string& TextClassifier::kDateCollection =
+const std::string& Annotator::kDateCollection =
     *[]() { return new std::string("date"); }();
+const std::string& Annotator::kUrlCollection =
+    *[]() { return new std::string("url"); }();
+const std::string& Annotator::kFlightCollection =
+    *[]() { return new std::string("flight"); }();
+const std::string& Annotator::kEmailCollection =
+    *[]() { return new std::string("email"); }();
+const std::string& Annotator::kIbanCollection =
+    *[]() { return new std::string("iban"); }();
+const std::string& Annotator::kPaymentCardCollection =
+    *[]() { return new std::string("payment_card"); }();
+const std::string& Annotator::kIsbnCollection =
+    *[]() { return new std::string("isbn"); }();
+const std::string& Annotator::kTrackingNumberCollection =
+    *[]() { return new std::string("tracking_number"); }();
 
 namespace {
 const Model* LoadAndVerifyModel(const void* addr, int size) {
-  const Model* model = GetModel(addr);
-
   flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t*>(addr), size);
-  if (model->Verify(verifier)) {
-    return model;
+  if (VerifyModelBuffer(verifier)) {
+    return GetModel(addr);
   } else {
     return nullptr;
   }
 }
+
+// If lib is not nullptr, just returns lib. Otherwise, if lib is nullptr, will
+// create a new instance, assign ownership to owned_lib, and return it.
+const UniLib* MaybeCreateUnilib(const UniLib* lib,
+                                std::unique_ptr<UniLib>* owned_lib) {
+  if (lib) {
+    return lib;
+  } else {
+    owned_lib->reset(new UniLib);
+    return owned_lib->get();
+  }
+}
+
+// As above, but for CalendarLib.
+const CalendarLib* MaybeCreateCalendarlib(
+    const CalendarLib* lib, std::unique_ptr<CalendarLib>* owned_lib) {
+  if (lib) {
+    return lib;
+  } else {
+    owned_lib->reset(new CalendarLib);
+    return owned_lib->get();
+  }
+}
+
 }  // namespace
 
 tflite::Interpreter* InterpreterManager::SelectionInterpreter() {
   if (!selection_interpreter_) {
-    TC_CHECK(selection_executor_);
+    TC3_CHECK(selection_executor_);
     selection_interpreter_ = selection_executor_->CreateInterpreter();
     if (!selection_interpreter_) {
-      TC_LOG(ERROR) << "Could not build TFLite interpreter.";
+      TC3_LOG(ERROR) << "Could not build TFLite interpreter.";
     }
   }
   return selection_interpreter_.get();
@@ -62,24 +99,25 @@
 
 tflite::Interpreter* InterpreterManager::ClassificationInterpreter() {
   if (!classification_interpreter_) {
-    TC_CHECK(classification_executor_);
+    TC3_CHECK(classification_executor_);
     classification_interpreter_ = classification_executor_->CreateInterpreter();
     if (!classification_interpreter_) {
-      TC_LOG(ERROR) << "Could not build TFLite interpreter.";
+      TC3_LOG(ERROR) << "Could not build TFLite interpreter.";
     }
   }
   return classification_interpreter_.get();
 }
 
-std::unique_ptr<TextClassifier> TextClassifier::FromUnownedBuffer(
-    const char* buffer, int size, const UniLib* unilib) {
+std::unique_ptr<Annotator> Annotator::FromUnownedBuffer(
+    const char* buffer, int size, const UniLib* unilib,
+    const CalendarLib* calendarlib) {
   const Model* model = LoadAndVerifyModel(buffer, size);
   if (model == nullptr) {
     return nullptr;
   }
 
   auto classifier =
-      std::unique_ptr<TextClassifier>(new TextClassifier(model, unilib));
+      std::unique_ptr<Annotator>(new Annotator(model, unilib, calendarlib));
   if (!classifier->IsInitialized()) {
     return nullptr;
   }
@@ -87,22 +125,24 @@
   return classifier;
 }
 
-std::unique_ptr<TextClassifier> TextClassifier::FromScopedMmap(
-    std::unique_ptr<ScopedMmap>* mmap, const UniLib* unilib) {
+
+std::unique_ptr<Annotator> Annotator::FromScopedMmap(
+    std::unique_ptr<ScopedMmap>* mmap, const UniLib* unilib,
+    const CalendarLib* calendarlib) {
   if (!(*mmap)->handle().ok()) {
-    TC_VLOG(1) << "Mmap failed.";
+    TC3_VLOG(1) << "Mmap failed.";
     return nullptr;
   }
 
   const Model* model = LoadAndVerifyModel((*mmap)->handle().start(),
                                           (*mmap)->handle().num_bytes());
   if (!model) {
-    TC_LOG(ERROR) << "Model verification failed.";
+    TC3_LOG(ERROR) << "Model verification failed.";
     return nullptr;
   }
 
-  auto classifier =
-      std::unique_ptr<TextClassifier>(new TextClassifier(mmap, model, unilib));
+  auto classifier = std::unique_ptr<Annotator>(
+      new Annotator(mmap, model, unilib, calendarlib));
   if (!classifier->IsInitialized()) {
     return nullptr;
   }
@@ -110,29 +150,52 @@
   return classifier;
 }
 
-std::unique_ptr<TextClassifier> TextClassifier::FromFileDescriptor(
-    int fd, int offset, int size, const UniLib* unilib) {
+std::unique_ptr<Annotator> Annotator::FromFileDescriptor(
+    int fd, int offset, int size, const UniLib* unilib,
+    const CalendarLib* calendarlib) {
   std::unique_ptr<ScopedMmap> mmap(new ScopedMmap(fd, offset, size));
-  return FromScopedMmap(&mmap, unilib);
+  return FromScopedMmap(&mmap, unilib, calendarlib);
 }
 
-std::unique_ptr<TextClassifier> TextClassifier::FromFileDescriptor(
-    int fd, const UniLib* unilib) {
+std::unique_ptr<Annotator> Annotator::FromFileDescriptor(
+    int fd, const UniLib* unilib, const CalendarLib* calendarlib) {
   std::unique_ptr<ScopedMmap> mmap(new ScopedMmap(fd));
-  return FromScopedMmap(&mmap, unilib);
+  return FromScopedMmap(&mmap, unilib, calendarlib);
 }
 
-std::unique_ptr<TextClassifier> TextClassifier::FromPath(
-    const std::string& path, const UniLib* unilib) {
+std::unique_ptr<Annotator> Annotator::FromPath(const std::string& path,
+                                               const UniLib* unilib,
+                                               const CalendarLib* calendarlib) {
   std::unique_ptr<ScopedMmap> mmap(new ScopedMmap(path));
-  return FromScopedMmap(&mmap, unilib);
+  return FromScopedMmap(&mmap, unilib, calendarlib);
 }
 
-void TextClassifier::ValidateAndInitialize() {
+Annotator::Annotator(std::unique_ptr<ScopedMmap>* mmap, const Model* model,
+                     const UniLib* unilib, const CalendarLib* calendarlib)
+    : model_(model),
+      mmap_(std::move(*mmap)),
+      owned_unilib_(nullptr),
+      unilib_(MaybeCreateUnilib(unilib, &owned_unilib_)),
+      owned_calendarlib_(nullptr),
+      calendarlib_(MaybeCreateCalendarlib(calendarlib, &owned_calendarlib_)) {
+  ValidateAndInitialize();
+}
+
+Annotator::Annotator(const Model* model, const UniLib* unilib,
+                     const CalendarLib* calendarlib)
+    : model_(model),
+      owned_unilib_(nullptr),
+      unilib_(MaybeCreateUnilib(unilib, &owned_unilib_)),
+      owned_calendarlib_(nullptr),
+      calendarlib_(MaybeCreateCalendarlib(calendarlib, &owned_calendarlib_)) {
+  ValidateAndInitialize();
+}
+
+void Annotator::ValidateAndInitialize() {
   initialized_ = false;
 
   if (model_ == nullptr) {
-    TC_LOG(ERROR) << "No model specified.";
+    TC3_LOG(ERROR) << "No model specified.";
     return;
   }
 
@@ -150,24 +213,24 @@
   // Annotation requires the selection model.
   if (model_enabled_for_annotation || model_enabled_for_selection) {
     if (!model_->selection_options()) {
-      TC_LOG(ERROR) << "No selection options.";
+      TC3_LOG(ERROR) << "No selection options.";
       return;
     }
     if (!model_->selection_feature_options()) {
-      TC_LOG(ERROR) << "No selection feature options.";
+      TC3_LOG(ERROR) << "No selection feature options.";
       return;
     }
     if (!model_->selection_feature_options()->bounds_sensitive_features()) {
-      TC_LOG(ERROR) << "No selection bounds sensitive feature options.";
+      TC3_LOG(ERROR) << "No selection bounds sensitive feature options.";
       return;
     }
     if (!model_->selection_model()) {
-      TC_LOG(ERROR) << "No selection model.";
+      TC3_LOG(ERROR) << "No selection model.";
       return;
     }
-    selection_executor_ = ModelExecutor::Instance(model_->selection_model());
+    selection_executor_ = ModelExecutor::FromBuffer(model_->selection_model());
     if (!selection_executor_) {
-      TC_LOG(ERROR) << "Could not initialize selection executor.";
+      TC3_LOG(ERROR) << "Could not initialize selection executor.";
       return;
     }
     selection_feature_processor_.reset(
@@ -180,29 +243,29 @@
   if (model_enabled_for_annotation || model_enabled_for_classification ||
       model_enabled_for_selection) {
     if (!model_->classification_options()) {
-      TC_LOG(ERROR) << "No classification options.";
+      TC3_LOG(ERROR) << "No classification options.";
       return;
     }
 
     if (!model_->classification_feature_options()) {
-      TC_LOG(ERROR) << "No classification feature options.";
+      TC3_LOG(ERROR) << "No classification feature options.";
       return;
     }
 
     if (!model_->classification_feature_options()
              ->bounds_sensitive_features()) {
-      TC_LOG(ERROR) << "No classification bounds sensitive feature options.";
+      TC3_LOG(ERROR) << "No classification bounds sensitive feature options.";
       return;
     }
     if (!model_->classification_model()) {
-      TC_LOG(ERROR) << "No clf model.";
+      TC3_LOG(ERROR) << "No clf model.";
       return;
     }
 
     classification_executor_ =
-        ModelExecutor::Instance(model_->classification_model());
+        ModelExecutor::FromBuffer(model_->classification_model());
     if (!classification_executor_) {
-      TC_LOG(ERROR) << "Could not initialize classification executor.";
+      TC3_LOG(ERROR) << "Could not initialize classification executor.";
       return;
     }
 
@@ -215,7 +278,7 @@
   if (model_enabled_for_annotation || model_enabled_for_classification ||
       model_enabled_for_selection) {
     if (!model_->embedding_model()) {
-      TC_LOG(ERROR) << "No embedding model.";
+      TC3_LOG(ERROR) << "No embedding model.";
       return;
     }
 
@@ -227,17 +290,17 @@
          model_->selection_feature_options()->embedding_quantization_bits() !=
              model_->classification_feature_options()
                  ->embedding_quantization_bits())) {
-      TC_LOG(ERROR) << "Mismatching embedding size/quantization.";
+      TC3_LOG(ERROR) << "Mismatching embedding size/quantization.";
       return;
     }
 
-    embedding_executor_ = TFLiteEmbeddingExecutor::Instance(
+    embedding_executor_ = TFLiteEmbeddingExecutor::FromBuffer(
         model_->embedding_model(),
         model_->classification_feature_options()->embedding_size(),
         model_->classification_feature_options()
             ->embedding_quantization_bits());
     if (!embedding_executor_) {
-      TC_LOG(ERROR) << "Could not initialize embedding executor.";
+      TC3_LOG(ERROR) << "Could not initialize embedding executor.";
       return;
     }
   }
@@ -245,16 +308,16 @@
   std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
   if (model_->regex_model()) {
     if (!InitializeRegexModel(decompressor.get())) {
-      TC_LOG(ERROR) << "Could not initialize regex model.";
+      TC3_LOG(ERROR) << "Could not initialize regex model.";
       return;
     }
   }
 
   if (model_->datetime_model()) {
-    datetime_parser_ = DatetimeParser::Instance(model_->datetime_model(),
-                                                *unilib_, decompressor.get());
+    datetime_parser_ = DatetimeParser::Instance(
+        model_->datetime_model(), *unilib_, *calendarlib_, decompressor.get());
     if (!datetime_parser_) {
-      TC_LOG(ERROR) << "Could not initialize datetime parser.";
+      TC3_LOG(ERROR) << "Could not initialize datetime parser.";
       return;
     }
   }
@@ -283,7 +346,7 @@
   initialized_ = true;
 }
 
-bool TextClassifier::InitializeRegexModel(ZlibDecompressor* decompressor) {
+bool Annotator::InitializeRegexModel(ZlibDecompressor* decompressor) {
   if (!model_->regex_model()->patterns()) {
     return true;
   }
@@ -296,7 +359,7 @@
                                    regex_pattern->compressed_pattern(),
                                    decompressor);
     if (!compiled_pattern) {
-      TC_LOG(INFO) << "Failed to load regex pattern";
+      TC3_LOG(INFO) << "Failed to load regex pattern";
       return false;
     }
 
@@ -309,10 +372,13 @@
     if (regex_pattern->enabled_modes() & ModeFlag_SELECTION) {
       selection_regex_patterns_.push_back(regex_pattern_id);
     }
-    regex_patterns_.push_back({regex_pattern->collection_name()->str(),
-                               regex_pattern->target_classification_score(),
-                               regex_pattern->priority_score(),
-                               std::move(compiled_pattern)});
+    regex_patterns_.push_back({
+        regex_pattern->collection_name()->str(),
+        regex_pattern->target_classification_score(),
+        regex_pattern->priority_score(),
+        std::move(compiled_pattern),
+        regex_pattern->verification_options(),
+    });
     if (regex_pattern->use_approximate_matching()) {
       regex_approximate_match_pattern_ids_.insert(regex_pattern_id);
     }
@@ -322,6 +388,18 @@
   return true;
 }
 
+bool Annotator::InitializeKnowledgeEngine(
+    const std::string& serialized_config) {
+  std::unique_ptr<KnowledgeEngine> knowledge_engine(
+      new KnowledgeEngine(unilib_));
+  if (!knowledge_engine->Initialize(serialized_config)) {
+    TC3_LOG(ERROR) << "Failed to initialize the knowledge engine.";
+    return false;
+  }
+  knowledge_engine_ = std::move(knowledge_engine);
+  return true;
+}
+
 namespace {
 
 int CountDigits(const std::string& str, CodepointSpan selection_indices) {
@@ -347,6 +425,19 @@
   std::advance(selection_end, selection_indices.second);
   return UnicodeText::UTF8Substring(selection_begin, selection_end);
 }
+
+bool VerifyCandidate(const VerificationOptions* verification_options,
+                     const std::string& match) {
+  if (!verification_options) {
+    return true;
+  }
+  if (verification_options->verify_luhn_checksum() &&
+      !VerifyLuhnChecksum(match)) {
+    return false;
+  }
+  return true;
+}
+
 }  // namespace
 
 namespace internal {
@@ -356,7 +447,7 @@
 CodepointSpan SnapLeftIfWhitespaceSelection(CodepointSpan span,
                                             const UnicodeText& context_unicode,
                                             const UniLib& unilib) {
-  TC_CHECK(ValidNonEmptySpan(span));
+  TC3_CHECK(ValidNonEmptySpan(span));
 
   UnicodeText::const_iterator it;
 
@@ -390,32 +481,32 @@
 }
 }  // namespace internal
 
-bool TextClassifier::FilteredForAnnotation(const AnnotatedSpan& span) const {
+bool Annotator::FilteredForAnnotation(const AnnotatedSpan& span) const {
   return !span.classification.empty() &&
          filtered_collections_annotation_.find(
              span.classification[0].collection) !=
              filtered_collections_annotation_.end();
 }
 
-bool TextClassifier::FilteredForClassification(
+bool Annotator::FilteredForClassification(
     const ClassificationResult& classification) const {
   return filtered_collections_classification_.find(classification.collection) !=
          filtered_collections_classification_.end();
 }
 
-bool TextClassifier::FilteredForSelection(const AnnotatedSpan& span) const {
+bool Annotator::FilteredForSelection(const AnnotatedSpan& span) const {
   return !span.classification.empty() &&
          filtered_collections_selection_.find(
              span.classification[0].collection) !=
              filtered_collections_selection_.end();
 }
 
-CodepointSpan TextClassifier::SuggestSelection(
+CodepointSpan Annotator::SuggestSelection(
     const std::string& context, CodepointSpan click_indices,
     const SelectionOptions& options) const {
   CodepointSpan original_click_indices = click_indices;
   if (!initialized_) {
-    TC_LOG(ERROR) << "Not initialized";
+    TC3_LOG(ERROR) << "Not initialized";
     return original_click_indices;
   }
   if (!(model_->enabled_modes() & ModeFlag_SELECTION)) {
@@ -435,8 +526,8 @@
       click_indices.first >= context_codepoint_size ||
       click_indices.second > context_codepoint_size ||
       click_indices.first >= click_indices.second) {
-    TC_VLOG(1) << "Trying to run SuggestSelection with invalid indices: "
-               << click_indices.first << " " << click_indices.second;
+    TC3_VLOG(1) << "Trying to run SuggestSelection with invalid indices: "
+                << click_indices.first << " " << click_indices.second;
     return original_click_indices;
   }
 
@@ -448,7 +539,7 @@
     // finding logic finds the clicked token correctly. This modification is
     // done by the following function. Note, that it's enough to check the left
     // side of the current selection, because if the white-space is a part of a
-    // multi-selection, neccessarily both tokens - on the left and the right
+    // multi-selection, necessarily both tokens - on the left and the right
     // sides need to be selected. Thus snapping only to the left is sufficient
     // (there's a check at the bottom that makes sure that if we snap to the
     // left token but the result does not contain the initial white-space,
@@ -463,17 +554,21 @@
   std::vector<Token> tokens;
   if (!ModelSuggestSelection(context_unicode, click_indices,
                              &interpreter_manager, &tokens, &candidates)) {
-    TC_LOG(ERROR) << "Model suggest selection failed.";
+    TC3_LOG(ERROR) << "Model suggest selection failed.";
     return original_click_indices;
   }
   if (!RegexChunk(context_unicode, selection_regex_patterns_, &candidates)) {
-    TC_LOG(ERROR) << "Regex suggest selection failed.";
+    TC3_LOG(ERROR) << "Regex suggest selection failed.";
     return original_click_indices;
   }
   if (!DatetimeChunk(UTF8ToUnicodeText(context, /*do_copy=*/false),
                      /*reference_time_ms_utc=*/0, /*reference_timezone=*/"",
                      options.locales, ModeFlag_SELECTION, &candidates)) {
-    TC_LOG(ERROR) << "Datetime suggest selection failed.";
+    TC3_LOG(ERROR) << "Datetime suggest selection failed.";
+    return original_click_indices;
+  }
+  if (knowledge_engine_ && !knowledge_engine_->Chunk(context, &candidates)) {
+    TC3_LOG(ERROR) << "Knowledge suggest selection failed.";
     return original_click_indices;
   }
 
@@ -488,7 +583,7 @@
   std::vector<int> candidate_indices;
   if (!ResolveConflicts(candidates, context, tokens, &interpreter_manager,
                         &candidate_indices)) {
-    TC_LOG(ERROR) << "Couldn't resolve conflicts.";
+    TC3_LOG(ERROR) << "Couldn't resolve conflicts.";
     return original_click_indices;
   }
 
@@ -541,10 +636,11 @@
 }
 }  // namespace
 
-bool TextClassifier::ResolveConflicts(
-    const std::vector<AnnotatedSpan>& candidates, const std::string& context,
-    const std::vector<Token>& cached_tokens,
-    InterpreterManager* interpreter_manager, std::vector<int>* result) const {
+bool Annotator::ResolveConflicts(const std::vector<AnnotatedSpan>& candidates,
+                                 const std::string& context,
+                                 const std::vector<Token>& cached_tokens,
+                                 InterpreterManager* interpreter_manager,
+                                 std::vector<int>* result) const {
   result->clear();
   result->reserve(candidates.size());
   for (int i = 0; i < candidates.size();) {
@@ -575,7 +671,7 @@
 inline bool ClassifiedAsOther(
     const std::vector<ClassificationResult>& classification) {
   return !classification.empty() &&
-         classification[0].collection == TextClassifier::kOtherCollection;
+         classification[0].collection == Annotator::kOtherCollection;
 }
 
 float GetPriorityScore(
@@ -588,11 +684,12 @@
 }
 }  // namespace
 
-bool TextClassifier::ResolveConflict(
-    const std::string& context, const std::vector<Token>& cached_tokens,
-    const std::vector<AnnotatedSpan>& candidates, int start_index,
-    int end_index, InterpreterManager* interpreter_manager,
-    std::vector<int>* chosen_indices) const {
+bool Annotator::ResolveConflict(const std::string& context,
+                                const std::vector<Token>& cached_tokens,
+                                const std::vector<AnnotatedSpan>& candidates,
+                                int start_index, int end_index,
+                                InterpreterManager* interpreter_manager,
+                                std::vector<int>* chosen_indices) const {
   std::vector<int> conflicting_indices;
   std::unordered_map<int, float> scores;
   for (int i = start_index; i < end_index; ++i) {
@@ -645,7 +742,7 @@
   return true;
 }
 
-bool TextClassifier::ModelSuggestSelection(
+bool Annotator::ModelSuggestSelection(
     const UnicodeText& context_unicode, CodepointSpan click_indices,
     InterpreterManager* interpreter_manager, std::vector<Token>* tokens,
     std::vector<AnnotatedSpan>* result) const {
@@ -661,7 +758,7 @@
       selection_feature_processor_->GetOptions()->only_use_line_with_click(),
       tokens, &click_pos);
   if (click_pos == kInvalidIndex) {
-    TC_VLOG(1) << "Could not calculate the click position.";
+    TC3_VLOG(1) << "Could not calculate the click position.";
     return false;
   }
 
@@ -719,7 +816,7 @@
           selection_feature_processor_->EmbeddingSize() +
               selection_feature_processor_->DenseFeaturesCount(),
           &cached_features)) {
-    TC_LOG(ERROR) << "Could not extract features.";
+    TC3_LOG(ERROR) << "Could not extract features.";
     return false;
   }
 
@@ -728,7 +825,7 @@
   if (!ModelChunk(tokens->size(), /*span_of_interest=*/symmetry_context_span,
                   interpreter_manager->SelectionInterpreter(), *cached_features,
                   &chunks)) {
-    TC_LOG(ERROR) << "Could not chunk.";
+    TC3_LOG(ERROR) << "Could not chunk.";
     return false;
   }
 
@@ -749,7 +846,7 @@
   return true;
 }
 
-bool TextClassifier::ModelClassifyText(
+bool Annotator::ModelClassifyText(
     const std::string& context, CodepointSpan selection_indices,
     InterpreterManager* interpreter_manager,
     FeatureProcessor::EmbeddingCache* embedding_cache,
@@ -796,7 +893,7 @@
 }
 }  // namespace internal
 
-TokenSpan TextClassifier::ClassifyTextUpperBoundNeededTokens() const {
+TokenSpan Annotator::ClassifyTextUpperBoundNeededTokens() const {
   const FeatureProcessorOptions_::BoundsSensitiveFeatures*
       bounds_sensitive_features =
           classification_feature_processor_->GetOptions()
@@ -815,7 +912,7 @@
   }
 }
 
-bool TextClassifier::ModelClassifyText(
+bool Annotator::ModelClassifyText(
     const std::string& context, const std::vector<Token>& cached_tokens,
     CodepointSpan selection_indices, InterpreterManager* interpreter_manager,
     FeatureProcessor::EmbeddingCache* embedding_cache,
@@ -850,7 +947,7 @@
               ->bounds_sensitive_features();
   if (selection_token_span.first == kInvalidIndex ||
       selection_token_span.second == kInvalidIndex) {
-    TC_LOG(ERROR) << "Could not determine span.";
+    TC3_LOG(ERROR) << "Could not determine span.";
     return false;
   }
 
@@ -865,7 +962,7 @@
         /*num_tokens_right=*/bounds_sensitive_features->num_tokens_after());
   } else {
     if (click_pos == kInvalidIndex) {
-      TC_LOG(ERROR) << "Couldn't choose a click position.";
+      TC3_LOG(ERROR) << "Couldn't choose a click position.";
       return false;
     }
     // The extraction span is the clicked token with context_size tokens on
@@ -891,7 +988,7 @@
           classification_feature_processor_->EmbeddingSize() +
               classification_feature_processor_->DenseFeaturesCount(),
           &cached_features)) {
-    TC_LOG(ERROR) << "Could not extract features.";
+    TC3_LOG(ERROR) << "Could not extract features.";
     return false;
   }
 
@@ -909,13 +1006,13 @@
                         {1, static_cast<int>(features.size())}),
       interpreter_manager->ClassificationInterpreter());
   if (!logits.is_valid()) {
-    TC_LOG(ERROR) << "Couldn't compute logits.";
+    TC3_LOG(ERROR) << "Couldn't compute logits.";
     return false;
   }
 
   if (logits.dims() != 2 || logits.dim(0) != 1 ||
       logits.dim(1) != classification_feature_processor_->NumCollections()) {
-    TC_LOG(ERROR) << "Mismatching output";
+    TC3_LOG(ERROR) << "Mismatching output";
     return false;
   }
 
@@ -956,7 +1053,7 @@
   return true;
 }
 
-bool TextClassifier::RegexClassifyText(
+bool Annotator::RegexClassifyText(
     const std::string& context, CodepointSpan selection_indices,
     ClassificationResult* classification_result) const {
   const std::string selection_text =
@@ -980,21 +1077,22 @@
     if (status != UniLib::RegexMatcher::kNoError) {
       return false;
     }
-    if (matches) {
+    if (matches &&
+        VerifyCandidate(regex_pattern.verification_options, selection_text)) {
       *classification_result = {regex_pattern.collection_name,
                                 regex_pattern.target_classification_score,
                                 regex_pattern.priority_score};
       return true;
     }
     if (status != UniLib::RegexMatcher::kNoError) {
-      TC_LOG(ERROR) << "Cound't match regex: " << pattern_id;
+      TC3_LOG(ERROR) << "Cound't match regex: " << pattern_id;
     }
   }
 
   return false;
 }
 
-bool TextClassifier::DatetimeClassifyText(
+bool Annotator::DatetimeClassifyText(
     const std::string& context, CodepointSpan selection_indices,
     const ClassificationOptions& options,
     ClassificationResult* classification_result) const {
@@ -1010,7 +1108,7 @@
                                options.reference_timezone, options.locales,
                                ModeFlag_CLASSIFICATION,
                                /*anchor_start_end=*/true, &datetime_spans)) {
-    TC_LOG(ERROR) << "Error during parsing datetime.";
+    TC3_LOG(ERROR) << "Error during parsing datetime.";
     return false;
   }
   for (const DatetimeParseResultSpan& datetime_span : datetime_spans) {
@@ -1028,11 +1126,11 @@
   return false;
 }
 
-std::vector<ClassificationResult> TextClassifier::ClassifyText(
+std::vector<ClassificationResult> Annotator::ClassifyText(
     const std::string& context, CodepointSpan selection_indices,
     const ClassificationOptions& options) const {
   if (!initialized_) {
-    TC_LOG(ERROR) << "Not initialized";
+    TC3_LOG(ERROR) << "Not initialized";
     return {};
   }
 
@@ -1045,12 +1143,23 @@
   }
 
   if (std::get<0>(selection_indices) >= std::get<1>(selection_indices)) {
-    TC_VLOG(1) << "Trying to run ClassifyText with invalid indices: "
-               << std::get<0>(selection_indices) << " "
-               << std::get<1>(selection_indices);
+    TC3_VLOG(1) << "Trying to run ClassifyText with invalid indices: "
+                << std::get<0>(selection_indices) << " "
+                << std::get<1>(selection_indices);
     return {};
   }
 
+  // Try the knowledge engine.
+  ClassificationResult knowledge_result;
+  if (knowledge_engine_ && knowledge_engine_->ClassifyText(
+                               context, selection_indices, &knowledge_result)) {
+    if (!FilteredForClassification(knowledge_result)) {
+      return {knowledge_result};
+    } else {
+      return {{kOtherCollection, 1.0}};
+    }
+  }
+
   // Try the regular expression models.
   ClassificationResult regex_result;
   if (RegexClassifyText(context, selection_indices, &regex_result)) {
@@ -1091,10 +1200,10 @@
   return {};
 }
 
-bool TextClassifier::ModelAnnotate(const std::string& context,
-                                   InterpreterManager* interpreter_manager,
-                                   std::vector<Token>* tokens,
-                                   std::vector<AnnotatedSpan>* result) const {
+bool Annotator::ModelAnnotate(const std::string& context,
+                              InterpreterManager* interpreter_manager,
+                              std::vector<Token>* tokens,
+                              std::vector<AnnotatedSpan>* result) const {
   if (model_->triggering_options() == nullptr ||
       !(model_->triggering_options()->enabled_modes() & ModeFlag_ANNOTATION)) {
     return true;
@@ -1142,7 +1251,7 @@
             selection_feature_processor_->EmbeddingSize() +
                 selection_feature_processor_->DenseFeaturesCount(),
             &cached_features)) {
-      TC_LOG(ERROR) << "Could not extract features.";
+      TC3_LOG(ERROR) << "Could not extract features.";
       return false;
     }
 
@@ -1150,7 +1259,7 @@
     if (!ModelChunk(tokens->size(), /*span_of_interest=*/full_line_span,
                     interpreter_manager->SelectionInterpreter(),
                     *cached_features, &local_chunks)) {
-      TC_LOG(ERROR) << "Could not chunk.";
+      TC3_LOG(ERROR) << "Could not chunk.";
       return false;
     }
 
@@ -1166,9 +1275,9 @@
         if (!ModelClassifyText(line_str, *tokens, codepoint_span,
                                interpreter_manager, &embedding_cache,
                                &classification)) {
-          TC_LOG(ERROR) << "Could not classify text: "
-                        << (codepoint_span.first + offset) << " "
-                        << (codepoint_span.second + offset);
+          TC3_LOG(ERROR) << "Could not classify text: "
+                         << (codepoint_span.first + offset) << " "
+                         << (codepoint_span.second + offset);
           return false;
         }
 
@@ -1187,21 +1296,20 @@
   return true;
 }
 
-const FeatureProcessor* TextClassifier::SelectionFeatureProcessorForTests()
-    const {
+const FeatureProcessor* Annotator::SelectionFeatureProcessorForTests() const {
   return selection_feature_processor_.get();
 }
 
-const FeatureProcessor* TextClassifier::ClassificationFeatureProcessorForTests()
+const FeatureProcessor* Annotator::ClassificationFeatureProcessorForTests()
     const {
   return classification_feature_processor_.get();
 }
 
-const DatetimeParser* TextClassifier::DatetimeParserForTests() const {
+const DatetimeParser* Annotator::DatetimeParserForTests() const {
   return datetime_parser_.get();
 }
 
-std::vector<AnnotatedSpan> TextClassifier::Annotate(
+std::vector<AnnotatedSpan> Annotator::Annotate(
     const std::string& context, const AnnotationOptions& options) const {
   std::vector<AnnotatedSpan> candidates;
 
@@ -1218,14 +1326,14 @@
   // Annotate with the selection model.
   std::vector<Token> tokens;
   if (!ModelAnnotate(context, &interpreter_manager, &tokens, &candidates)) {
-    TC_LOG(ERROR) << "Couldn't run ModelAnnotate.";
+    TC3_LOG(ERROR) << "Couldn't run ModelAnnotate.";
     return {};
   }
 
   // Annotate with the regular expression models.
   if (!RegexChunk(UTF8ToUnicodeText(context, /*do_copy=*/false),
                   annotation_regex_patterns_, &candidates)) {
-    TC_LOG(ERROR) << "Couldn't run RegexChunk.";
+    TC3_LOG(ERROR) << "Couldn't run RegexChunk.";
     return {};
   }
 
@@ -1233,7 +1341,13 @@
   if (!DatetimeChunk(UTF8ToUnicodeText(context, /*do_copy=*/false),
                      options.reference_time_ms_utc, options.reference_timezone,
                      options.locales, ModeFlag_ANNOTATION, &candidates)) {
-    TC_LOG(ERROR) << "Couldn't run RegexChunk.";
+    TC3_LOG(ERROR) << "Couldn't run RegexChunk.";
+    return {};
+  }
+
+  // Annotate with the knowledge engine.
+  if (knowledge_engine_ && !knowledge_engine_->Chunk(context, &candidates)) {
+    TC3_LOG(ERROR) << "Couldn't run knowledge engine Chunk.";
     return {};
   }
 
@@ -1248,7 +1362,7 @@
   std::vector<int> candidate_indices;
   if (!ResolveConflicts(candidates, context, tokens, &interpreter_manager,
                         &candidate_indices)) {
-    TC_LOG(ERROR) << "Couldn't resolve conflicts.";
+    TC3_LOG(ERROR) << "Couldn't resolve conflicts.";
     return {};
   }
 
@@ -1265,20 +1379,26 @@
   return result;
 }
 
-bool TextClassifier::RegexChunk(const UnicodeText& context_unicode,
-                                const std::vector<int>& rules,
-                                std::vector<AnnotatedSpan>* result) const {
+bool Annotator::RegexChunk(const UnicodeText& context_unicode,
+                           const std::vector<int>& rules,
+                           std::vector<AnnotatedSpan>* result) const {
   for (int pattern_id : rules) {
     const CompiledRegexPattern& regex_pattern = regex_patterns_[pattern_id];
     const auto matcher = regex_pattern.pattern->Matcher(context_unicode);
     if (!matcher) {
-      TC_LOG(ERROR) << "Could not get regex matcher for pattern: "
-                    << pattern_id;
+      TC3_LOG(ERROR) << "Could not get regex matcher for pattern: "
+                     << pattern_id;
       return false;
     }
 
     int status = UniLib::RegexMatcher::kNoError;
     while (matcher->Find(&status) && status == UniLib::RegexMatcher::kNoError) {
+      if (regex_pattern.verification_options) {
+        if (!VerifyCandidate(regex_pattern.verification_options,
+                             matcher->Group(1, &status).ToUTF8String())) {
+          continue;
+        }
+      }
       result->emplace_back();
       // Selection/annotation regular expressions need to specify a capturing
       // group specifying the selection.
@@ -1293,11 +1413,10 @@
   return true;
 }
 
-bool TextClassifier::ModelChunk(int num_tokens,
-                                const TokenSpan& span_of_interest,
-                                tflite::Interpreter* selection_interpreter,
-                                const CachedFeatures& cached_features,
-                                std::vector<TokenSpan>* chunks) const {
+bool Annotator::ModelChunk(int num_tokens, const TokenSpan& span_of_interest,
+                           tflite::Interpreter* selection_interpreter,
+                           const CachedFeatures& cached_features,
+                           std::vector<TokenSpan>* chunks) const {
   const int max_selection_span =
       selection_feature_processor_->GetOptions()->max_selection_span();
   // The inference span is the span of interest expanded to include
@@ -1378,7 +1497,7 @@
 }
 }  // namespace
 
-bool TextClassifier::ModelClickContextScoreChunks(
+bool Annotator::ModelClickContextScoreChunks(
     int num_tokens, const TokenSpan& span_of_interest,
     const CachedFeatures& cached_features,
     tflite::Interpreter* selection_interpreter,
@@ -1407,13 +1526,13 @@
         TensorView<float>(all_features.data(), {batch_size, features_size}),
         selection_interpreter);
     if (!logits.is_valid()) {
-      TC_LOG(ERROR) << "Couldn't compute logits.";
+      TC3_LOG(ERROR) << "Couldn't compute logits.";
       return false;
     }
     if (logits.dims() != 2 || logits.dim(0) != batch_size ||
         logits.dim(1) !=
             selection_feature_processor_->GetSelectionLabelCount()) {
-      TC_LOG(ERROR) << "Mismatching output.";
+      TC3_LOG(ERROR) << "Mismatching output.";
       return false;
     }
 
@@ -1427,7 +1546,7 @@
         TokenSpan relative_token_span;
         if (!selection_feature_processor_->LabelToTokenSpan(
                 j, &relative_token_span)) {
-          TC_LOG(ERROR) << "Couldn't map the label to a token span.";
+          TC3_LOG(ERROR) << "Couldn't map the label to a token span.";
           return false;
         }
         const TokenSpan candidate_span = ExpandTokenSpan(
@@ -1449,7 +1568,7 @@
   return true;
 }
 
-bool TextClassifier::ModelBoundsSensitiveScoreChunks(
+bool Annotator::ModelBoundsSensitiveScoreChunks(
     int num_tokens, const TokenSpan& span_of_interest,
     const TokenSpan& inference_span, const CachedFeatures& cached_features,
     tflite::Interpreter* selection_interpreter,
@@ -1518,12 +1637,12 @@
         TensorView<float>(all_features.data(), {batch_size, features_size}),
         selection_interpreter);
     if (!logits.is_valid()) {
-      TC_LOG(ERROR) << "Couldn't compute logits.";
+      TC3_LOG(ERROR) << "Couldn't compute logits.";
       return false;
     }
     if (logits.dims() != 2 || logits.dim(0) != batch_size ||
         logits.dim(1) != 1) {
-      TC_LOG(ERROR) << "Mismatching output.";
+      TC3_LOG(ERROR) << "Mismatching output.";
       return false;
     }
 
@@ -1537,11 +1656,11 @@
   return true;
 }
 
-bool TextClassifier::DatetimeChunk(const UnicodeText& context_unicode,
-                                   int64 reference_time_ms_utc,
-                                   const std::string& reference_timezone,
-                                   const std::string& locales, ModeFlag mode,
-                                   std::vector<AnnotatedSpan>* result) const {
+bool Annotator::DatetimeChunk(const UnicodeText& context_unicode,
+                              int64 reference_time_ms_utc,
+                              const std::string& reference_timezone,
+                              const std::string& locales, ModeFlag mode,
+                              std::vector<AnnotatedSpan>* result) const {
   if (!datetime_parser_) {
     return true;
   }
@@ -1573,4 +1692,4 @@
   return LoadAndVerifyModel(buffer, size);
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/text-classifier.h b/annotator/annotator.h
similarity index 86%
rename from text-classifier.h
rename to annotator/annotator.h
index 0692ecd..a4b22e8 100644
--- a/text-classifier.h
+++ b/annotator/annotator.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,25 +16,26 @@
 
 // Inference code for the text classification model.
 
-#ifndef LIBTEXTCLASSIFIER_TEXT_CLASSIFIER_H_
-#define LIBTEXTCLASSIFIER_TEXT_CLASSIFIER_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_
 
 #include <memory>
 #include <set>
 #include <string>
 #include <vector>
 
-#include "datetime/parser.h"
-#include "feature-processor.h"
-#include "model-executor.h"
-#include "model_generated.h"
-#include "strip-unpaired-brackets.h"
-#include "types.h"
-#include "util/memory/mmap.h"
-#include "util/utf8/unilib.h"
-#include "zlib-utils.h"
+#include "annotator/datetime/parser.h"
+#include "annotator/feature-processor.h"
+#include "annotator/knowledge/knowledge-engine.h"
+#include "annotator/model-executor.h"
+#include "annotator/model_generated.h"
+#include "annotator/strip-unpaired-brackets.h"
+#include "annotator/types.h"
+#include "annotator/zlib-utils.h"
+#include "utils/memory/mmap.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 struct SelectionOptions {
   // Comma-separated list of locale specification for the input text (BCP 47
@@ -106,23 +107,31 @@
 // A text processing model that provides text classification, annotation,
 // selection suggestion for various types.
 // NOTE: This class is not thread-safe.
-class TextClassifier {
+class Annotator {
  public:
-  static std::unique_ptr<TextClassifier> FromUnownedBuffer(
-      const char* buffer, int size, const UniLib* unilib = nullptr);
+  static std::unique_ptr<Annotator> FromUnownedBuffer(
+      const char* buffer, int size, const UniLib* unilib = nullptr,
+      const CalendarLib* calendarlib = nullptr);
   // Takes ownership of the mmap.
-  static std::unique_ptr<TextClassifier> FromScopedMmap(
-      std::unique_ptr<ScopedMmap>* mmap, const UniLib* unilib = nullptr);
-  static std::unique_ptr<TextClassifier> FromFileDescriptor(
-      int fd, int offset, int size, const UniLib* unilib = nullptr);
-  static std::unique_ptr<TextClassifier> FromFileDescriptor(
-      int fd, const UniLib* unilib = nullptr);
-  static std::unique_ptr<TextClassifier> FromPath(
-      const std::string& path, const UniLib* unilib = nullptr);
+  static std::unique_ptr<Annotator> FromScopedMmap(
+      std::unique_ptr<ScopedMmap>* mmap, const UniLib* unilib = nullptr,
+      const CalendarLib* calendarlib = nullptr);
+  static std::unique_ptr<Annotator> FromFileDescriptor(
+      int fd, int offset, int size, const UniLib* unilib = nullptr,
+      const CalendarLib* calendarlib = nullptr);
+  static std::unique_ptr<Annotator> FromFileDescriptor(
+      int fd, const UniLib* unilib = nullptr,
+      const CalendarLib* calendarlib = nullptr);
+  static std::unique_ptr<Annotator> FromPath(
+      const std::string& path, const UniLib* unilib = nullptr,
+      const CalendarLib* calendarlib = nullptr);
 
   // Returns true if the model is ready for use.
   bool IsInitialized() { return initialized_; }
 
+  // Initializes the knowledge engine with the given config.
+  bool InitializeKnowledgeEngine(const std::string& serialized_config);
+
   // Runs inference for given a context and current selection (i.e. index
   // of the first and one past last selected characters (utf8 codepoint
   // offsets)). Returns the indices (utf8 codepoint offsets) of the selection
@@ -160,6 +169,13 @@
   static const std::string& kPhoneCollection;
   static const std::string& kAddressCollection;
   static const std::string& kDateCollection;
+  static const std::string& kUrlCollection;
+  static const std::string& kFlightCollection;
+  static const std::string& kEmailCollection;
+  static const std::string& kIbanCollection;
+  static const std::string& kPaymentCardCollection;
+  static const std::string& kIsbnCollection;
+  static const std::string& kTrackingNumberCollection;
 
  protected:
   struct ScoredChunk {
@@ -169,23 +185,13 @@
 
   // Constructs and initializes text classifier from given model.
   // Takes ownership of 'mmap', and thus owns the buffer that backs 'model'.
-  TextClassifier(std::unique_ptr<ScopedMmap>* mmap, const Model* model,
-                 const UniLib* unilib)
-      : model_(model),
-        mmap_(std::move(*mmap)),
-        owned_unilib_(nullptr),
-        unilib_(internal::MaybeCreateUnilib(unilib, &owned_unilib_)) {
-    ValidateAndInitialize();
-  }
+  Annotator(std::unique_ptr<ScopedMmap>* mmap, const Model* model,
+            const UniLib* unilib, const CalendarLib* calendarlib);
 
   // Constructs, validates and initializes text classifier from given model.
   // Does not own the buffer that backs 'model'.
-  explicit TextClassifier(const Model* model, const UniLib* unilib)
-      : model_(model),
-        owned_unilib_(nullptr),
-        unilib_(internal::MaybeCreateUnilib(unilib, &owned_unilib_)) {
-    ValidateAndInitialize();
-  }
+  explicit Annotator(const Model* model, const UniLib* unilib,
+                     const CalendarLib* calendarlib);
 
   // Checks that model contains all required fields, and initializes internal
   // datastructures.
@@ -334,6 +340,7 @@
     float target_classification_score;
     float priority_score;
     std::unique_ptr<UniLib::RegexPattern> pattern;
+    const VerificationOptions* verification_options;
   };
 
   std::unique_ptr<ScopedMmap> mmap_;
@@ -354,6 +361,10 @@
 
   std::unique_ptr<UniLib> owned_unilib_;
   const UniLib* unilib_;
+  std::unique_ptr<CalendarLib> owned_calendarlib_;
+  const CalendarLib* calendarlib_;
+
+  std::unique_ptr<const KnowledgeEngine> knowledge_engine_;
 };
 
 namespace internal {
@@ -376,6 +387,6 @@
 // Interprets the buffer as a Model flatbuffer and returns it for reading.
 const Model* ViewModel(const void* buffer, int size);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TEXT_CLASSIFIER_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_H_

diff --git a/annotator/annotator_jni.cc b/annotator/annotator_jni.cc
new file mode 100644
index 0000000..6d58d96
--- /dev/null
+++ b/annotator/annotator_jni.cc

@@ -0,0 +1,437 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// JNI wrapper for the Annotator.
+
+#include "annotator/annotator_jni.h"
+
+#include <jni.h>
+#include <type_traits>
+#include <vector>
+
+#include "annotator/annotator.h"
+#include "annotator/annotator_jni_common.h"
+#include "utils/base/integral_types.h"
+#include "utils/calendar/calendar.h"
+#include "utils/java/scoped_local_ref.h"
+#include "utils/java/string_utils.h"
+#include "utils/memory/mmap.h"
+#include "utils/utf8/unilib.h"
+
+#ifdef TC3_UNILIB_JAVAICU
+#ifndef TC3_CALENDAR_JAVAICU
+#error Inconsistent usage of Java ICU components
+#else
+#define TC3_USE_JAVAICU
+#endif
+#endif
+
+using libtextclassifier3::AnnotatedSpan;
+using libtextclassifier3::AnnotationOptions;
+using libtextclassifier3::Annotator;
+using libtextclassifier3::ClassificationOptions;
+using libtextclassifier3::ClassificationResult;
+using libtextclassifier3::CodepointSpan;
+using libtextclassifier3::JStringToUtf8String;
+using libtextclassifier3::Model;
+using libtextclassifier3::ScopedLocalRef;
+using libtextclassifier3::SelectionOptions;
+// When using the Java's ICU, CalendarLib and UniLib need to be instantiated
+// with a JavaVM pointer from JNI. When using a standard ICU the pointer is
+// not needed and the objects are instantiated implicitly.
+#ifdef TC3_USE_JAVAICU
+using libtextclassifier3::CalendarLib;
+using libtextclassifier3::UniLib;
+#endif
+
+namespace libtextclassifier3 {
+
+using libtextclassifier3::CodepointSpan;
+
+namespace {
+
+jobjectArray ClassificationResultsToJObjectArray(
+    JNIEnv* env,
+    const std::vector<ClassificationResult>& classification_result) {
+  const ScopedLocalRef<jclass> result_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                     "$ClassificationResult"),
+      env);
+  if (!result_class) {
+    TC3_LOG(ERROR) << "Couldn't find ClassificationResult class.";
+    return nullptr;
+  }
+  const ScopedLocalRef<jclass> datetime_parse_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                     "$DatetimeResult"),
+      env);
+  if (!datetime_parse_class) {
+    TC3_LOG(ERROR) << "Couldn't find DatetimeResult class.";
+    return nullptr;
+  }
+
+  const jmethodID result_class_constructor = env->GetMethodID(
+      result_class.get(), "<init>",
+      "(Ljava/lang/String;FL" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+      "$DatetimeResult;[B)V");
+  const jmethodID datetime_parse_class_constructor =
+      env->GetMethodID(datetime_parse_class.get(), "<init>", "(JI)V");
+
+  const jobjectArray results = env->NewObjectArray(classification_result.size(),
+                                                   result_class.get(), nullptr);
+  for (int i = 0; i < classification_result.size(); i++) {
+    jstring row_string =
+        env->NewStringUTF(classification_result[i].collection.c_str());
+
+    jobject row_datetime_parse = nullptr;
+    if (classification_result[i].datetime_parse_result.IsSet()) {
+      row_datetime_parse = env->NewObject(
+          datetime_parse_class.get(), datetime_parse_class_constructor,
+          classification_result[i].datetime_parse_result.time_ms_utc,
+          classification_result[i].datetime_parse_result.granularity);
+    }
+
+    jbyteArray serialized_knowledge_result = nullptr;
+    const std::string& serialized_knowledge_result_string =
+        classification_result[i].serialized_knowledge_result;
+    if (!serialized_knowledge_result_string.empty()) {
+      serialized_knowledge_result =
+          env->NewByteArray(serialized_knowledge_result_string.size());
+      env->SetByteArrayRegion(serialized_knowledge_result, 0,
+                              serialized_knowledge_result_string.size(),
+                              reinterpret_cast<const jbyte*>(
+                                  serialized_knowledge_result_string.data()));
+    }
+
+    jobject result =
+        env->NewObject(result_class.get(), result_class_constructor, row_string,
+                       static_cast<jfloat>(classification_result[i].score),
+                       row_datetime_parse, serialized_knowledge_result);
+    env->SetObjectArrayElement(results, i, result);
+    env->DeleteLocalRef(result);
+  }
+  return results;
+}
+
+CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
+                                    CodepointSpan orig_indices,
+                                    bool from_utf8) {
+  const libtextclassifier3::UnicodeText unicode_str =
+      libtextclassifier3::UTF8ToUnicodeText(utf8_str, /*do_copy=*/false);
+
+  int unicode_index = 0;
+  int bmp_index = 0;
+
+  const int* source_index;
+  const int* target_index;
+  if (from_utf8) {
+    source_index = &unicode_index;
+    target_index = &bmp_index;
+  } else {
+    source_index = &bmp_index;
+    target_index = &unicode_index;
+  }
+
+  CodepointSpan result{-1, -1};
+  std::function<void()> assign_indices_fn = [&result, &orig_indices,
+                                             &source_index, &target_index]() {
+    if (orig_indices.first == *source_index) {
+      result.first = *target_index;
+    }
+
+    if (orig_indices.second == *source_index) {
+      result.second = *target_index;
+    }
+  };
+
+  for (auto it = unicode_str.begin(); it != unicode_str.end();
+       ++it, ++unicode_index, ++bmp_index) {
+    assign_indices_fn();
+
+    // There is 1 extra character in the input for each UTF8 character > 0xFFFF.
+    if (*it > 0xFFFF) {
+      ++bmp_index;
+    }
+  }
+  assign_indices_fn();
+
+  return result;
+}
+
+}  // namespace
+
+CodepointSpan ConvertIndicesBMPToUTF8(const std::string& utf8_str,
+                                      CodepointSpan bmp_indices) {
+  return ConvertIndicesBMPUTF8(utf8_str, bmp_indices, /*from_utf8=*/false);
+}
+
+CodepointSpan ConvertIndicesUTF8ToBMP(const std::string& utf8_str,
+                                      CodepointSpan utf8_indices) {
+  return ConvertIndicesBMPUTF8(utf8_str, utf8_indices, /*from_utf8=*/true);
+}
+
+jstring GetLocalesFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return env->NewStringUTF("");
+  }
+  const Model* model = libtextclassifier3::ViewModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model || !model->locales()) {
+    return env->NewStringUTF("");
+  }
+  return env->NewStringUTF(model->locales()->c_str());
+}
+
+jint GetVersionFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return 0;
+  }
+  const Model* model = libtextclassifier3::ViewModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model) {
+    return 0;
+  }
+  return model->version();
+}
+
+jstring GetNameFromMmap(JNIEnv* env, libtextclassifier3::ScopedMmap* mmap) {
+  if (!mmap->handle().ok()) {
+    return env->NewStringUTF("");
+  }
+  const Model* model = libtextclassifier3::ViewModel(
+      mmap->handle().start(), mmap->handle().num_bytes());
+  if (!model || !model->name()) {
+    return env->NewStringUTF("");
+  }
+  return env->NewStringUTF(model->name()->c_str());
+}
+
+}  // namespace libtextclassifier3
+
+using libtextclassifier3::ClassificationResultsToJObjectArray;
+using libtextclassifier3::ConvertIndicesBMPToUTF8;
+using libtextclassifier3::ConvertIndicesUTF8ToBMP;
+using libtextclassifier3::FromJavaAnnotationOptions;
+using libtextclassifier3::FromJavaClassificationOptions;
+using libtextclassifier3::FromJavaSelectionOptions;
+using libtextclassifier3::ToStlString;
+
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotator)
+(JNIEnv* env, jobject thiz, jint fd) {
+#ifdef TC3_USE_JAVAICU
+  std::shared_ptr<libtextclassifier3::JniCache> jni_cache(
+      libtextclassifier3::JniCache::Create(env));
+  return reinterpret_cast<jlong>(Annotator::FromFileDescriptor(fd).release(),
+                                 new UniLib(jni_cache),
+                                 new CalendarLib(jni_cache));
+#else
+  return reinterpret_cast<jlong>(Annotator::FromFileDescriptor(fd).release());
+#endif
+}
+
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotatorFromPath)
+(JNIEnv* env, jobject thiz, jstring path) {
+  const std::string path_str = ToStlString(env, path);
+#ifdef TC3_USE_JAVAICU
+  std::shared_ptr<libtextclassifier3::JniCache> jni_cache(
+      libtextclassifier3::JniCache::Create(env));
+  return reinterpret_cast<jlong>(Annotator::FromPath(path_str,
+                                                     new UniLib(jni_cache),
+                                                     new CalendarLib(jni_cache))
+                                     .release());
+#else
+  return reinterpret_cast<jlong>(Annotator::FromPath(path_str).release());
+#endif
+}
+
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME,
+               nativeNewAnnotatorFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
+  const jint fd = libtextclassifier3::GetFdFromAssetFileDescriptor(env, afd);
+#ifdef TC3_USE_JAVAICU
+  std::shared_ptr<libtextclassifier3::JniCache> jni_cache(
+      libtextclassifier3::JniCache::Create(env));
+  return reinterpret_cast<jlong>(
+      Annotator::FromFileDescriptor(fd, offset, size, new UniLib(jni_cache),
+                                    new CalendarLib(jni_cache))
+          .release());
+#else
+  return reinterpret_cast<jlong>(
+      Annotator::FromFileDescriptor(fd, offset, size).release());
+#endif
+}
+
+TC3_JNI_METHOD(jboolean, TC3_ANNOTATOR_CLASS_NAME,
+               nativeInitializeKnowledgeEngine)
+(JNIEnv* env, jobject thiz, jlong ptr, jbyteArray serialized_config) {
+  if (!ptr) {
+    return false;
+  }
+
+  Annotator* model = reinterpret_cast<Annotator*>(ptr);
+
+  std::string serialized_config_string;
+  const int length = env->GetArrayLength(serialized_config);
+  serialized_config_string.resize(length);
+  env->GetByteArrayRegion(serialized_config, 0, length,
+                          reinterpret_cast<jbyte*>(const_cast<char*>(
+                              serialized_config_string.data())));
+
+  return model->InitializeKnowledgeEngine(serialized_config_string);
+}
+
+TC3_JNI_METHOD(jintArray, TC3_ANNOTATOR_CLASS_NAME, nativeSuggestSelection)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+ jint selection_end, jobject options) {
+  if (!ptr) {
+    return nullptr;
+  }
+
+  Annotator* model = reinterpret_cast<Annotator*>(ptr);
+
+  const std::string context_utf8 = ToStlString(env, context);
+  CodepointSpan input_indices =
+      ConvertIndicesBMPToUTF8(context_utf8, {selection_begin, selection_end});
+  CodepointSpan selection = model->SuggestSelection(
+      context_utf8, input_indices, FromJavaSelectionOptions(env, options));
+  selection = ConvertIndicesUTF8ToBMP(context_utf8, selection);
+
+  jintArray result = env->NewIntArray(2);
+  env->SetIntArrayRegion(result, 0, 1, &(std::get<0>(selection)));
+  env->SetIntArrayRegion(result, 1, 1, &(std::get<1>(selection)));
+  return result;
+}
+
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeClassifyText)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+ jint selection_end, jobject options) {
+  if (!ptr) {
+    return nullptr;
+  }
+  Annotator* ff_model = reinterpret_cast<Annotator*>(ptr);
+
+  const std::string context_utf8 = ToStlString(env, context);
+  const CodepointSpan input_indices =
+      ConvertIndicesBMPToUTF8(context_utf8, {selection_begin, selection_end});
+  const std::vector<ClassificationResult> classification_result =
+      ff_model->ClassifyText(context_utf8, input_indices,
+                             FromJavaClassificationOptions(env, options));
+
+  return ClassificationResultsToJObjectArray(env, classification_result);
+}
+
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeAnnotate)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options) {
+  if (!ptr) {
+    return nullptr;
+  }
+  Annotator* model = reinterpret_cast<Annotator*>(ptr);
+  std::string context_utf8 = ToStlString(env, context);
+  std::vector<AnnotatedSpan> annotations =
+      model->Annotate(context_utf8, FromJavaAnnotationOptions(env, options));
+
+  jclass result_class = env->FindClass(
+      TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotatedSpan");
+  if (!result_class) {
+    TC3_LOG(ERROR) << "Couldn't find result class: "
+                   << TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+        "$AnnotatedSpan";
+    return nullptr;
+  }
+
+  jmethodID result_class_constructor =
+      env->GetMethodID(result_class, "<init>",
+                       "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                       "$ClassificationResult;)V");
+
+  jobjectArray results =
+      env->NewObjectArray(annotations.size(), result_class, nullptr);
+
+  for (int i = 0; i < annotations.size(); ++i) {
+    CodepointSpan span_bmp =
+        ConvertIndicesUTF8ToBMP(context_utf8, annotations[i].span);
+    jobject result = env->NewObject(result_class, result_class_constructor,
+                                    static_cast<jint>(span_bmp.first),
+                                    static_cast<jint>(span_bmp.second),
+                                    ClassificationResultsToJObjectArray(
+                                        env, annotations[i].classification));
+    env->SetObjectArrayElement(results, i, result);
+    env->DeleteLocalRef(result);
+  }
+  env->DeleteLocalRef(result_class);
+  return results;
+}
+
+TC3_JNI_METHOD(void, TC3_ANNOTATOR_CLASS_NAME, nativeCloseAnnotator)
+(JNIEnv* env, jobject thiz, jlong ptr) {
+  Annotator* model = reinterpret_cast<Annotator*>(ptr);
+  delete model;
+}
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetLanguage)
+(JNIEnv* env, jobject clazz, jint fd) {
+  TC3_LOG(WARNING) << "Using deprecated getLanguage().";
+  return TC3_JNI_METHOD_NAME(TC3_ANNOTATOR_CLASS_NAME, nativeGetLocales)(
+      env, clazz, fd);
+}
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetLocales)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return GetLocalesFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetLocalesFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
+  const jint fd = libtextclassifier3::GetFdFromAssetFileDescriptor(env, afd);
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd, offset, size));
+  return GetLocalesFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jint, TC3_ANNOTATOR_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return GetVersionFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jint, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetVersionFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
+  const jint fd = libtextclassifier3::GetFdFromAssetFileDescriptor(env, afd);
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd, offset, size));
+  return GetVersionFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetName)
+(JNIEnv* env, jobject clazz, jint fd) {
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd));
+  return GetNameFromMmap(env, mmap.get());
+}
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetNameFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
+  const jint fd = libtextclassifier3::GetFdFromAssetFileDescriptor(env, afd);
+  const std::unique_ptr<libtextclassifier3::ScopedMmap> mmap(
+      new libtextclassifier3::ScopedMmap(fd, offset, size));
+  return GetNameFromMmap(env, mmap.get());
+}

diff --git a/annotator/annotator_jni.h b/annotator/annotator_jni.h
new file mode 100644
index 0000000..47715b4
--- /dev/null
+++ b/annotator/annotator_jni.h

@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_H_
+
+#include <jni.h>
+#include <string>
+#include "annotator/annotator_jni_common.h"
+#include "annotator/types.h"
+#include "utils/java/jni-base.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// SmartSelection.
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotator)
+(JNIEnv* env, jobject thiz, jint fd);
+
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotatorFromPath)
+(JNIEnv* env, jobject thiz, jstring path);
+
+TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME,
+               nativeNewAnnotatorFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
+
+TC3_JNI_METHOD(jboolean, TC3_ANNOTATOR_CLASS_NAME,
+               nativeInitializeKnowledgeEngine)
+(JNIEnv* env, jobject thiz, jlong ptr, jbyteArray serialized_config);
+
+TC3_JNI_METHOD(jintArray, TC3_ANNOTATOR_CLASS_NAME, nativeSuggestSelection)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+ jint selection_end, jobject options);
+
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeClassifyText)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
+ jint selection_end, jobject options);
+
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeAnnotate)
+(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options);
+
+TC3_JNI_METHOD(void, TC3_ANNOTATOR_CLASS_NAME, nativeCloseAnnotator)
+(JNIEnv* env, jobject thiz, jlong ptr);
+
+// DEPRECATED. Use nativeGetLocales instead.
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetLanguage)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetLocales)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetLocalesFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
+
+TC3_JNI_METHOD(jint, TC3_ANNOTATOR_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jint, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetVersionFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME, nativeGetName)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jstring, TC3_ANNOTATOR_CLASS_NAME,
+               nativeGetNameFromAssetFileDescriptor)
+(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
+
+#ifdef __cplusplus
+}
+#endif
+
+namespace libtextclassifier3 {
+
+// Given a utf8 string and a span expressed in Java BMP (basic multilingual
+// plane) codepoints, converts it to a span expressed in utf8 codepoints.
+libtextclassifier3::CodepointSpan ConvertIndicesBMPToUTF8(
+    const std::string& utf8_str, libtextclassifier3::CodepointSpan bmp_indices);
+
+// Given a utf8 string and a span expressed in utf8 codepoints, converts it to a
+// span expressed in Java BMP (basic multilingual plane) codepoints.
+libtextclassifier3::CodepointSpan ConvertIndicesUTF8ToBMP(
+    const std::string& utf8_str,
+    libtextclassifier3::CodepointSpan utf8_indices);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_H_

diff --git a/annotator/annotator_jni_common.cc b/annotator/annotator_jni_common.cc
new file mode 100644
index 0000000..0fdb87b
--- /dev/null
+++ b/annotator/annotator_jni_common.cc

@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/annotator_jni_common.h"
+
+#include "utils/java/jni-base.h"
+#include "utils/java/scoped_local_ref.h"
+
+namespace libtextclassifier3 {
+namespace {
+template <typename T>
+T FromJavaOptionsInternal(JNIEnv* env, jobject joptions,
+                          const std::string& class_name) {
+  if (!joptions) {
+    return {};
+  }
+
+  const ScopedLocalRef<jclass> options_class(env->FindClass(class_name.c_str()),
+                                             env);
+  if (!options_class) {
+    return {};
+  }
+
+  const std::pair<bool, jobject> status_or_locales = CallJniMethod0<jobject>(
+      env, joptions, options_class.get(), &JNIEnv::CallObjectMethod,
+      "getLocale", "Ljava/lang/String;");
+  const std::pair<bool, jobject> status_or_reference_timezone =
+      CallJniMethod0<jobject>(env, joptions, options_class.get(),
+                              &JNIEnv::CallObjectMethod, "getReferenceTimezone",
+                              "Ljava/lang/String;");
+  const std::pair<bool, int64> status_or_reference_time_ms_utc =
+      CallJniMethod0<int64>(env, joptions, options_class.get(),
+                            &JNIEnv::CallLongMethod, "getReferenceTimeMsUtc",
+                            "J");
+
+  if (!status_or_locales.first || !status_or_reference_timezone.first ||
+      !status_or_reference_time_ms_utc.first) {
+    return {};
+  }
+
+  T options;
+  options.locales =
+      ToStlString(env, reinterpret_cast<jstring>(status_or_locales.second));
+  options.reference_timezone = ToStlString(
+      env, reinterpret_cast<jstring>(status_or_reference_timezone.second));
+  options.reference_time_ms_utc = status_or_reference_time_ms_utc.second;
+  return options;
+}
+}  // namespace
+
+SelectionOptions FromJavaSelectionOptions(JNIEnv* env, jobject joptions) {
+  if (!joptions) {
+    return {};
+  }
+
+  const ScopedLocalRef<jclass> options_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                     "$SelectionOptions"),
+      env);
+  const std::pair<bool, jobject> status_or_locales = CallJniMethod0<jobject>(
+      env, joptions, options_class.get(), &JNIEnv::CallObjectMethod,
+      "getLocales", "Ljava/lang/String;");
+  if (!status_or_locales.first) {
+    return {};
+  }
+
+  SelectionOptions options;
+  options.locales =
+      ToStlString(env, reinterpret_cast<jstring>(status_or_locales.second));
+
+  return options;
+}
+
+ClassificationOptions FromJavaClassificationOptions(JNIEnv* env,
+                                                    jobject joptions) {
+  return FromJavaOptionsInternal<ClassificationOptions>(
+      env, joptions,
+      TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$ClassificationOptions");
+}
+
+AnnotationOptions FromJavaAnnotationOptions(JNIEnv* env, jobject joptions) {
+  return FromJavaOptionsInternal<AnnotationOptions>(
+      env, joptions,
+      TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotationOptions");
+}
+
+}  // namespace libtextclassifier3

diff --git a/annotator/annotator_jni_common.h b/annotator/annotator_jni_common.h
new file mode 100644
index 0000000..b62bb21
--- /dev/null
+++ b/annotator/annotator_jni_common.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_COMMON_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_COMMON_H_
+
+#include <jni.h>
+
+#include "annotator/annotator.h"
+
+#ifndef TC3_ANNOTATOR_CLASS_NAME
+#define TC3_ANNOTATOR_CLASS_NAME AnnotatorModel
+#endif
+
+#define TC3_ANNOTATOR_CLASS_NAME_STR TC3_ADD_QUOTES(TC3_ANNOTATOR_CLASS_NAME)
+
+namespace libtextclassifier3 {
+
+SelectionOptions FromJavaSelectionOptions(JNIEnv* env, jobject joptions);
+
+ClassificationOptions FromJavaClassificationOptions(JNIEnv* env,
+                                                    jobject joptions);
+
+AnnotationOptions FromJavaAnnotationOptions(JNIEnv* env, jobject joptions);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_COMMON_H_

diff --git a/textclassifier_jni_test.cc b/annotator/annotator_jni_test.cc
similarity index 92%
rename from textclassifier_jni_test.cc
rename to annotator/annotator_jni_test.cc
index 87b96fa..929fb59 100644
--- a/textclassifier_jni_test.cc
+++ b/annotator/annotator_jni_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "textclassifier_jni.h"
+#include "annotator/annotator_jni.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
-TEST(TextClassifier, ConvertIndicesBMPUTF8) {
+TEST(Annotator, ConvertIndicesBMPUTF8) {
   // Test boundary cases.
   EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5));
   EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5));
@@ -76,4 +76,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/text-classifier_test.cc b/annotator/annotator_test.cc
similarity index 72%
rename from text-classifier_test.cc
rename to annotator/annotator_test.cc
index c8ced76..fbaf039 100644
--- a/text-classifier_test.cc
+++ b/annotator/annotator_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,19 +14,19 @@
  * limitations under the License.
  */
 
-#include "text-classifier.h"
+#include "annotator/annotator.h"
 
 #include <fstream>
 #include <iostream>
 #include <memory>
 #include <string>
 
-#include "model_generated.h"
-#include "types-test-util.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 using testing::ElementsAreArray;
@@ -52,27 +52,32 @@
 }
 
 std::string GetModelPath() {
-  return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
+  return TC3_TEST_DATA_DIR;
 }
 
-TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb", &unilib);
+class AnnotatorTest : public ::testing::TestWithParam<const char*> {
+ protected:
+  AnnotatorTest()
+      : INIT_UNILIB_FOR_TESTING(unilib_),
+        INIT_CALENDARLIB_FOR_TESTING(calendarlib_) {}
+  UniLib unilib_;
+  CalendarLib calendarlib_;
+};
+
+TEST_F(AnnotatorTest, EmbeddingExecutorLoadingFails) {
+  std::unique_ptr<Annotator> classifier = Annotator::FromPath(
+      GetModelPath() + "wrong_embeddings.fb", &unilib_, &calendarlib_);
   EXPECT_FALSE(classifier);
 }
 
-class TextClassifierTest : public ::testing::TestWithParam<const char*> {};
-
-INSTANTIATE_TEST_CASE_P(ClickContext, TextClassifierTest,
+INSTANTIATE_TEST_CASE_P(ClickContext, AnnotatorTest,
                         Values("test_model_cc.fb"));
-INSTANTIATE_TEST_CASE_P(BoundsSensitive, TextClassifierTest,
+INSTANTIATE_TEST_CASE_P(BoundsSensitive, AnnotatorTest,
                         Values("test_model.fb"));
 
-TEST_P(TextClassifierTest, ClassifyText) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, ClassifyText) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ("other",
@@ -109,8 +114,7 @@
                                      "\xf0\x9f\x98\x8b\x8b", {0, 0})));
 }
 
-TEST_P(TextClassifierTest, ClassifyTextDisabledFail) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, ClassifyTextDisabledFail) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -119,19 +123,17 @@
   unpacked_model->triggering_options->enabled_modes = ModeFlag_SELECTION;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
 
   // The classification model is still needed for selection scores.
   ASSERT_FALSE(classifier);
 }
 
-TEST_P(TextClassifierTest, ClassifyTextDisabled) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, ClassifyTextDisabled) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -140,12 +142,11 @@
       ModeFlag_ANNOTATION_AND_SELECTION;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_THAT(
@@ -153,13 +154,11 @@
       IsEmpty());
 }
 
-TEST_P(TextClassifierTest, ClassifyTextFilteredCollections) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, ClassifyTextFilteredCollections) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(test_model.c_str(), test_model.size(),
-                                        &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      test_model.c_str(), test_model.size(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
@@ -173,11 +172,11 @@
       "phone");
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  classifier = TextClassifier::FromUnownedBuffer(
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
@@ -206,9 +205,8 @@
   return result;
 }
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, ClassifyTextRegularExpression) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, ClassifyTextRegularExpression) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -219,14 +217,21 @@
   unpacked_model->regex_model->patterns.push_back(MakePattern(
       "flight", "[a-zA-Z]{2}\\d{2,4}", /*enabled_for_classification=*/true,
       /*enabled_for_selection=*/false, /*enabled_for_annotation=*/false, 0.5));
+  std::unique_ptr<RegexModel_::PatternT> verified_pattern =
+      MakePattern("payment_card", "\\d{4}(?: \\d{4}){3}",
+                  /*enabled_for_classification=*/true,
+                  /*enabled_for_selection=*/false,
+                  /*enabled_for_annotation=*/false, 1.0);
+  verified_pattern->verification_options.reset(new VerificationOptionsT);
+  verified_pattern->verification_options->verify_luhn_checksum = true;
+  unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ("flight",
@@ -246,6 +251,13 @@
   EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("LX 37", {0, 5})));
   EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("flight LX 37 abcd",
                                                            {7, 12})));
+  EXPECT_EQ("payment_card", FirstResult(classifier->ClassifyText(
+                                "cc: 4012 8888 8888 1881", {4, 23})));
+  EXPECT_EQ("payment_card", FirstResult(classifier->ClassifyText(
+                                "2221 0067 4735 6281", {0, 19})));
+  // Luhn check fails.
+  EXPECT_EQ("other", FirstResult(classifier->ClassifyText("2221 0067 4735 6282",
+                                                          {0, 19})));
 
   // More lines.
   EXPECT_EQ("url",
@@ -254,11 +266,10 @@
                 "www.google.com every today!|Call me at (800) 123-456 today.",
                 {51, 65})));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, SuggestSelectionRegularExpression) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, SuggestSelectionRegularExpression) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -270,14 +281,21 @@
       "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
       /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
   unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
+  std::unique_ptr<RegexModel_::PatternT> verified_pattern =
+      MakePattern("payment_card", "(\\d{4}(?: \\d{4}){3})",
+                  /*enabled_for_classification=*/false,
+                  /*enabled_for_selection=*/true,
+                  /*enabled_for_annotation=*/false, 1.0);
+  verified_pattern->verification_options.reset(new VerificationOptionsT);
+  verified_pattern->verification_options->verify_luhn_checksum = true;
+  unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   // Check regular expression selection.
@@ -287,12 +305,13 @@
   EXPECT_EQ(classifier->SuggestSelection(
                 "this afternoon Barack Obama gave a speech at", {15, 21}),
             std::make_pair(15, 27));
+  EXPECT_EQ(classifier->SuggestSelection("cc: 4012 8888 8888 1881", {9, 14}),
+            std::make_pair(4, 23));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest,
-       SuggestSelectionRegularExpressionConflictsModelWins) {
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, SuggestSelectionRegularExpressionConflictsModelWins) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -306,12 +325,11 @@
   unpacked_model->regex_model->patterns.back()->priority_score = 0.5;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize());
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize());
   ASSERT_TRUE(classifier);
 
   // Check conflict resolution.
@@ -321,11 +339,10 @@
           {55, 57}),
       std::make_pair(26, 62));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest,
-       SuggestSelectionRegularExpressionConflictsRegexWins) {
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, SuggestSelectionRegularExpressionConflictsRegexWins) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -339,12 +356,11 @@
   unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize());
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize());
   ASSERT_TRUE(classifier);
 
   // Check conflict resolution.
@@ -354,11 +370,10 @@
           {55, 57}),
       std::make_pair(55, 62));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, AnnotateRegex) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, AnnotateRegex) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -369,32 +384,36 @@
   unpacked_model->regex_model->patterns.push_back(MakePattern(
       "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
       /*enabled_for_selection=*/false, /*enabled_for_annotation=*/true, 0.5));
+  std::unique_ptr<RegexModel_::PatternT> verified_pattern =
+      MakePattern("payment_card", "(\\d{4}(?: \\d{4}){3})",
+                  /*enabled_for_classification=*/false,
+                  /*enabled_for_selection=*/false,
+                  /*enabled_for_annotation=*/true, 1.0);
+  verified_pattern->verification_options.reset(new VerificationOptionsT);
+  verified_pattern->verification_options->verify_luhn_checksum = true;
+  unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
       "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
-      "number is 853 225 3556";
+      "number is 853 225 3556\nand my card is 4012 8888 8888 1881.\n";
   EXPECT_THAT(classifier->Annotate(test_string),
-              ElementsAreArray({
-                  IsAnnotatedSpan(6, 18, "person"),
-                  IsAnnotatedSpan(19, 24, "date"),
-                  IsAnnotatedSpan(28, 55, "address"),
-                  IsAnnotatedSpan(79, 91, "phone"),
-              }));
+              ElementsAreArray({IsAnnotatedSpan(6, 18, "person"),
+                                IsAnnotatedSpan(28, 55, "address"),
+                                IsAnnotatedSpan(79, 91, "phone"),
+                                IsAnnotatedSpan(107, 126, "payment_card")}));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-TEST_P(TextClassifierTest, PhoneFiltering) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, PhoneFiltering) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
@@ -405,10 +424,9 @@
                          "phone: (123) 456 789,0001112", {7, 28})));
 }
 
-TEST_P(TextClassifierTest, SuggestSelection) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelection) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(classifier->SuggestSelection(
@@ -451,8 +469,7 @@
             std::make_pair(11, 12));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionDisabledFail) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, SuggestSelectionDisabledFail) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -462,18 +479,16 @@
   unpacked_model->triggering_options->enabled_modes = ModeFlag_ANNOTATION;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   // Selection model needs to be present for annotation.
   ASSERT_FALSE(classifier);
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionDisabled) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, SuggestSelectionDisabled) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -484,12 +499,11 @@
   unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(
@@ -503,13 +517,11 @@
               IsEmpty());
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionFilteredCollections) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, SuggestSelectionFilteredCollections) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(test_model.c_str(), test_model.size(),
-                                        &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      test_model.c_str(), test_model.size(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(
@@ -526,11 +538,11 @@
   unpacked_model->selection_options->always_classify_suggested_selection = true;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  classifier = TextClassifier::FromUnownedBuffer(
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(
@@ -542,10 +554,9 @@
             std::make_pair(0, 27));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionsAreSymmetric) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelectionsAreSymmetric) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}),
@@ -560,10 +571,9 @@
             std::make_pair(6, 33));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionWithNewLine) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelectionWithNewLine) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}),
@@ -576,10 +586,9 @@
             std::make_pair(0, 7));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionWithPunctuation) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelectionWithPunctuation) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   // From the right.
@@ -603,10 +612,9 @@
             std::make_pair(16, 27));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionNoCrashWithJunk) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelectionNoCrashWithJunk) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   // Try passing in bunch of invalid selections.
@@ -627,10 +635,9 @@
             std::make_pair(-1, -1));
 }
 
-TEST_P(TextClassifierTest, SuggestSelectionSelectSpace) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, SuggestSelectionSelectSpace) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(
@@ -677,41 +684,39 @@
             std::make_pair(5, 6));
 }
 
-TEST(TextClassifierTest, SnapLeftIfWhitespaceSelection) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(AnnotatorTest, SnapLeftIfWhitespaceSelection) {
   UnicodeText text;
 
   text = UTF8ToUnicodeText("abcd efgh", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib_),
             std::make_pair(3, 4));
   text = UTF8ToUnicodeText("abcd     ", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib_),
             std::make_pair(3, 4));
 
   // Nothing on the left.
   text = UTF8ToUnicodeText("     efgh", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib_),
             std::make_pair(4, 5));
   text = UTF8ToUnicodeText("     efgh", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({0, 1}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({0, 1}, text, unilib_),
             std::make_pair(0, 1));
 
   // Whitespace only.
   text = UTF8ToUnicodeText("     ", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({2, 3}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({2, 3}, text, unilib_),
             std::make_pair(2, 3));
   text = UTF8ToUnicodeText("     ", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({4, 5}, text, unilib_),
             std::make_pair(4, 5));
   text = UTF8ToUnicodeText("     ", /*do_copy=*/false);
-  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({0, 1}, text, unilib),
+  EXPECT_EQ(internal::SnapLeftIfWhitespaceSelection({0, 1}, text, unilib_),
             std::make_pair(0, 1));
 }
 
-TEST_P(TextClassifierTest, Annotate) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, Annotate) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
@@ -719,9 +724,6 @@
       "number is 853 225 3556";
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-                  IsAnnotatedSpan(19, 24, "date"),
-#endif
                   IsAnnotatedSpan(28, 55, "address"),
                   IsAnnotatedSpan(79, 91, "phone"),
               }));
@@ -737,20 +739,19 @@
           .empty());
 }
 
-TEST_P(TextClassifierTest, AnnotateSmallBatches) {
-  CREATE_UNILIB_FOR_TESTING;
+
+TEST_P(AnnotatorTest, AnnotateSmallBatches) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
   // Set the batch size.
   unpacked_model->selection_options->batch_size = 4;
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
@@ -758,9 +759,6 @@
       "number is 853 225 3556";
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-                  IsAnnotatedSpan(19, 24, "date"),
-#endif
                   IsAnnotatedSpan(28, 55, "address"),
                   IsAnnotatedSpan(79, 91, "phone"),
               }));
@@ -771,9 +769,8 @@
   EXPECT_TRUE(classifier->Annotate("853 225\n3556", options).empty());
 }
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, AnnotateFilteringDiscardAll) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, AnnotateFilteringDiscardAll) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -782,24 +779,22 @@
   unpacked_model->triggering_options->min_annotate_confidence =
       2.f;  // Discards all results.
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
       "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
       "number is 853 225 3556";
 
-  EXPECT_EQ(classifier->Annotate(test_string).size(), 1);
+  EXPECT_EQ(classifier->Annotate(test_string).size(), 0);
 }
-#endif
+#endif  // TC3_UNILIB_ICU
 
-TEST_P(TextClassifierTest, AnnotateFilteringKeepAll) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, AnnotateFilteringKeepAll) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -809,39 +804,31 @@
       0.f;  // Keeps all results.
   unpacked_model->triggering_options->enabled_modes = ModeFlag_ALL;
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
       "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
       "number is 853 225 3556";
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-  EXPECT_EQ(classifier->Annotate(test_string).size(), 3);
-#else
-  // In non-ICU mode there is no "date" result.
   EXPECT_EQ(classifier->Annotate(test_string).size(), 2);
-#endif
 }
 
-TEST_P(TextClassifierTest, AnnotateDisabled) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, AnnotateDisabled) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
   // Disable the model for annotation.
   unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION_AND_SELECTION;
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
   const std::string test_string =
       "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
@@ -849,13 +836,11 @@
   EXPECT_THAT(classifier->Annotate(test_string), IsEmpty());
 }
 
-TEST_P(TextClassifierTest, AnnotateFilteredCollections) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_P(AnnotatorTest, AnnotateFilteredCollections) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(test_model.c_str(), test_model.size(),
-                                        &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      test_model.c_str(), test_model.size(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
@@ -864,9 +849,6 @@
 
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-                  IsAnnotatedSpan(19, 24, "date"),
-#endif
                   IsAnnotatedSpan(28, 55, "address"),
                   IsAnnotatedSpan(79, 91, "phone"),
               }));
@@ -879,30 +861,25 @@
       "phone");
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  classifier = TextClassifier::FromUnownedBuffer(
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-                  IsAnnotatedSpan(19, 24, "date"),
-#endif
                   IsAnnotatedSpan(28, 55, "address"),
               }));
 }
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, AnnotateFilteredCollectionsSuppress) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, AnnotateFilteredCollectionsSuppress) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(test_model.c_str(), test_model.size(),
-                                        &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      test_model.c_str(), test_model.size(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   const std::string test_string =
@@ -911,9 +888,6 @@
 
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-                  IsAnnotatedSpan(19, 24, "date"),
-#endif
                   IsAnnotatedSpan(28, 55, "address"),
                   IsAnnotatedSpan(79, 91, "phone"),
               }));
@@ -932,25 +906,24 @@
       /*enabled_for_selection=*/false, /*enabled_for_annotation=*/true, 2.0));
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  classifier = TextClassifier::FromUnownedBuffer(
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_THAT(classifier->Annotate(test_string),
               ElementsAreArray({
-                  IsAnnotatedSpan(19, 24, "date"),
                   IsAnnotatedSpan(28, 55, "address"),
               }));
 }
-#endif
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
-TEST_P(TextClassifierTest, ClassifyTextDate) {
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam());
+#ifdef TC3_CALENDAR_ICU
+TEST_P(AnnotatorTest, ClassifyTextDate) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam());
   EXPECT_TRUE(classifier);
 
   std::vector<ClassificationResult> result;
@@ -996,12 +969,12 @@
   EXPECT_EQ(result[0].datetime_parse_result.granularity,
             DatetimeGranularity::GRANULARITY_DAY);
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
-TEST_P(TextClassifierTest, ClassifyTextDatePriorities) {
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam());
+#ifdef TC3_CALENDAR_ICU
+TEST_P(AnnotatorTest, ClassifyTextDatePriorities) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam());
   EXPECT_TRUE(classifier);
 
   std::vector<ClassificationResult> result;
@@ -1029,11 +1002,10 @@
   EXPECT_EQ(result[0].datetime_parse_result.granularity,
             DatetimeGranularity::GRANULARITY_DAY);
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
-TEST_P(TextClassifierTest, SuggestTextDateDisabled) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_CALENDAR_ICU
+TEST_P(AnnotatorTest, SuggestTextDateDisabled) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
@@ -1043,12 +1015,11 @@
         ModeFlag_ANNOTATION_AND_CLASSIFICATION;
   }
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
 
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromUnownedBuffer(
-          reinterpret_cast<const char*>(builder.GetBufferPointer()),
-          builder.GetSize(), &unilib);
+  std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
   EXPECT_EQ("date",
             FirstResult(classifier->ClassifyText("january 1, 2017", {0, 15})));
@@ -1057,14 +1028,15 @@
   EXPECT_THAT(classifier->Annotate("january 1, 2017"),
               ElementsAreArray({IsAnnotatedSpan(0, 15, "date")}));
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-class TestingTextClassifier : public TextClassifier {
+class TestingAnnotator : public Annotator {
  public:
-  TestingTextClassifier(const std::string& model, const UniLib* unilib)
-      : TextClassifier(ViewModel(model.data(), model.size()), unilib) {}
+  TestingAnnotator(const std::string& model, const UniLib* unilib,
+                   const CalendarLib* calendarlib)
+      : Annotator(ViewModel(model.data(), model.size()), unilib, calendarlib) {}
 
-  using TextClassifier::ResolveConflicts;
+  using Annotator::ResolveConflicts;
 };
 
 AnnotatedSpan MakeAnnotatedSpan(CodepointSpan span,
@@ -1076,9 +1048,8 @@
   return result;
 }
 
-TEST(TextClassifierTest, ResolveConflictsTrivial) {
-  CREATE_UNILIB_FOR_TESTING;
-  TestingTextClassifier classifier("", &unilib);
+TEST_F(AnnotatorTest, ResolveConflictsTrivial) {
+  TestingAnnotator classifier("", &unilib_, &calendarlib_);
 
   std::vector<AnnotatedSpan> candidates{
       {MakeAnnotatedSpan({0, 1}, "phone", 1.0)}};
@@ -1089,9 +1060,8 @@
   EXPECT_THAT(chosen, ElementsAreArray({0}));
 }
 
-TEST(TextClassifierTest, ResolveConflictsSequence) {
-  CREATE_UNILIB_FOR_TESTING;
-  TestingTextClassifier classifier("", &unilib);
+TEST_F(AnnotatorTest, ResolveConflictsSequence) {
+  TestingAnnotator classifier("", &unilib_, &calendarlib_);
 
   std::vector<AnnotatedSpan> candidates{{
       MakeAnnotatedSpan({0, 1}, "phone", 1.0),
@@ -1107,9 +1077,8 @@
   EXPECT_THAT(chosen, ElementsAreArray({0, 1, 2, 3, 4}));
 }
 
-TEST(TextClassifierTest, ResolveConflictsThreeSpans) {
-  CREATE_UNILIB_FOR_TESTING;
-  TestingTextClassifier classifier("", &unilib);
+TEST_F(AnnotatorTest, ResolveConflictsThreeSpans) {
+  TestingAnnotator classifier("", &unilib_, &calendarlib_);
 
   std::vector<AnnotatedSpan> candidates{{
       MakeAnnotatedSpan({0, 3}, "phone", 1.0),
@@ -1123,9 +1092,8 @@
   EXPECT_THAT(chosen, ElementsAreArray({0, 2}));
 }
 
-TEST(TextClassifierTest, ResolveConflictsThreeSpansReversed) {
-  CREATE_UNILIB_FOR_TESTING;
-  TestingTextClassifier classifier("", &unilib);
+TEST_F(AnnotatorTest, ResolveConflictsThreeSpansReversed) {
+  TestingAnnotator classifier("", &unilib_, &calendarlib_);
 
   std::vector<AnnotatedSpan> candidates{{
       MakeAnnotatedSpan({0, 3}, "phone", 0.5),  // Looser!
@@ -1139,9 +1107,8 @@
   EXPECT_THAT(chosen, ElementsAreArray({1}));
 }
 
-TEST(TextClassifierTest, ResolveConflictsFiveSpans) {
-  CREATE_UNILIB_FOR_TESTING;
-  TestingTextClassifier classifier("", &unilib);
+TEST_F(AnnotatorTest, ResolveConflictsFiveSpans) {
+  TestingAnnotator classifier("", &unilib_, &calendarlib_);
 
   std::vector<AnnotatedSpan> candidates{{
       MakeAnnotatedSpan({0, 3}, "phone", 0.5),
@@ -1157,11 +1124,10 @@
   EXPECT_THAT(chosen, ElementsAreArray({0, 2, 4}));
 }
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, LongInput) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, LongInput) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   for (const auto& type_value_pair :
@@ -1187,15 +1153,14 @@
                   input_100k, {50000, 50000 + value_length})));
   }
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
 // These coarse tests are there only to make sure the execution happens in
 // reasonable amount of time.
-TEST_P(TextClassifierTest, LongInputNoResultCheck) {
-  CREATE_UNILIB_FOR_TESTING;
-  std::unique_ptr<TextClassifier> classifier =
-      TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
+TEST_P(AnnotatorTest, LongInputNoResultCheck) {
+  std::unique_ptr<Annotator> classifier =
+      Annotator::FromPath(GetModelPath() + GetParam(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   for (const std::string& value :
@@ -1209,24 +1174,23 @@
     classifier->ClassifyText(input_100k, {50000, 50000 + value_length});
   }
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, MaxTokenLength) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, MaxTokenLength) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
-  std::unique_ptr<TextClassifier> classifier;
+  std::unique_ptr<Annotator> classifier;
 
   // With unrestricted number of tokens should behave normally.
   unpacked_model->classification_options->max_num_tokens = -1;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
-  classifier = TextClassifier::FromUnownedBuffer(
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(FirstResult(classifier->ClassifyText(
@@ -1237,34 +1201,33 @@
   unpacked_model->classification_options->max_num_tokens = 3;
 
   flatbuffers::FlatBufferBuilder builder2;
-  builder2.Finish(Model::Pack(builder2, unpacked_model.get()));
-  classifier = TextClassifier::FromUnownedBuffer(
+  FinishModelBuffer(builder2, Model::Pack(builder2, unpacked_model.get()));
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder2.GetBufferPointer()),
-      builder2.GetSize(), &unilib);
+      builder2.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(FirstResult(classifier->ClassifyText(
                 "I live at 350 Third Street, Cambridge.", {10, 37})),
             "other");
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST_P(TextClassifierTest, MinAddressTokenLength) {
-  CREATE_UNILIB_FOR_TESTING;
+#ifdef TC3_UNILIB_ICU
+TEST_P(AnnotatorTest, MinAddressTokenLength) {
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
 
-  std::unique_ptr<TextClassifier> classifier;
+  std::unique_ptr<Annotator> classifier;
 
   // With unrestricted number of address tokens should behave normally.
   unpacked_model->classification_options->address_min_num_tokens = 0;
 
   flatbuffers::FlatBufferBuilder builder;
-  builder.Finish(Model::Pack(builder, unpacked_model.get()));
-  classifier = TextClassifier::FromUnownedBuffer(
+  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder.GetBufferPointer()),
-      builder.GetSize(), &unilib);
+      builder.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(FirstResult(classifier->ClassifyText(
@@ -1275,17 +1238,17 @@
   unpacked_model->classification_options->address_min_num_tokens = 5;
 
   flatbuffers::FlatBufferBuilder builder2;
-  builder2.Finish(Model::Pack(builder2, unpacked_model.get()));
-  classifier = TextClassifier::FromUnownedBuffer(
+  FinishModelBuffer(builder2, Model::Pack(builder2, unpacked_model.get()));
+  classifier = Annotator::FromUnownedBuffer(
       reinterpret_cast<const char*>(builder2.GetBufferPointer()),
-      builder2.GetSize(), &unilib);
+      builder2.GetSize(), &unilib_, &calendarlib_);
   ASSERT_TRUE(classifier);
 
   EXPECT_EQ(FirstResult(classifier->ClassifyText(
                 "I live at 350 Third Street, Cambridge.", {10, 37})),
             "other");
 }
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif  // TC3_UNILIB_ICU
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/cached-features.cc b/annotator/cached-features.cc
similarity index 95%
rename from cached-features.cc
rename to annotator/cached-features.cc
index 2a46780..480c044 100644
--- a/cached-features.cc
+++ b/annotator/cached-features.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "cached-features.h"
+#include "annotator/cached-features.h"
 
-#include "tensor-view.h"
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
+#include "utils/tensor-view.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace {
 
@@ -67,7 +67,7 @@
           ? 2
           : 1;
   if (options->feature_version() < min_feature_version) {
-    TC_LOG(ERROR) << "Unsupported feature version.";
+    TC3_LOG(ERROR) << "Unsupported feature version.";
     return nullptr;
   }
 
@@ -170,4 +170,4 @@
   return padding_features_->size();
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/cached-features.h b/annotator/cached-features.h
similarity index 87%
rename from cached-features.h
rename to annotator/cached-features.h
index 0224d86..e03f79c 100644
--- a/cached-features.h
+++ b/annotator/cached-features.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_CACHED_FEATURES_H_
-#define LIBTEXTCLASSIFIER_CACHED_FEATURES_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_CACHED_FEATURES_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_CACHED_FEATURES_H_
 
 #include <memory>
 #include <vector>
 
-#include "model-executor.h"
-#include "model_generated.h"
-#include "types.h"
+#include "annotator/model-executor.h"
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Holds state for extracting features across multiple calls and reusing them.
 // Assumes that features for each Token are independent.
@@ -78,6 +78,6 @@
   std::unique_ptr<std::vector<float>> padding_features_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_CACHED_FEATURES_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_CACHED_FEATURES_H_

diff --git a/cached-features_test.cc b/annotator/cached-features_test.cc
similarity index 96%
rename from cached-features_test.cc
rename to annotator/cached-features_test.cc
index f064a63..702f3ca 100644
--- a/cached-features_test.cc
+++ b/annotator/cached-features_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "cached-features.h"
+#include "annotator/cached-features.h"
 
-#include "model-executor.h"
-#include "tensor-view.h"
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -26,7 +26,7 @@
 using testing::FloatEq;
 using testing::Matcher;
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
@@ -154,4 +154,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/datetime/extractor.cc b/annotator/datetime/extractor.cc
similarity index 93%
rename from datetime/extractor.cc
rename to annotator/datetime/extractor.cc
index f4ab8f4..31229dd 100644
--- a/datetime/extractor.cc
+++ b/annotator/datetime/extractor.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "datetime/extractor.h"
+#include "annotator/datetime/extractor.h"
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 bool DatetimeExtractor::Extract(DateParseData* result,
                                 CodepointSpan* result_span) const {
@@ -36,7 +36,7 @@
       continue;
     }
     if (!GroupTextFromMatch(group_id, &group_text)) {
-      TC_LOG(ERROR) << "Couldn't retrieve group.";
+      TC3_LOG(ERROR) << "Couldn't retrieve group.";
       return false;
     }
     // The pattern can have a group defined in a part that was not matched,
@@ -47,7 +47,7 @@
     switch (group_type) {
       case DatetimeGroupType_GROUP_YEAR: {
         if (!ParseYear(group_text, &(result->year))) {
-          TC_LOG(ERROR) << "Couldn't extract YEAR.";
+          TC3_LOG(ERROR) << "Couldn't extract YEAR.";
           return false;
         }
         result->field_set_mask |= DateParseData::YEAR_FIELD;
@@ -55,7 +55,7 @@
       }
       case DatetimeGroupType_GROUP_MONTH: {
         if (!ParseMonth(group_text, &(result->month))) {
-          TC_LOG(ERROR) << "Couldn't extract MONTH.";
+          TC3_LOG(ERROR) << "Couldn't extract MONTH.";
           return false;
         }
         result->field_set_mask |= DateParseData::MONTH_FIELD;
@@ -63,7 +63,7 @@
       }
       case DatetimeGroupType_GROUP_DAY: {
         if (!ParseDigits(group_text, &(result->day_of_month))) {
-          TC_LOG(ERROR) << "Couldn't extract DAY.";
+          TC3_LOG(ERROR) << "Couldn't extract DAY.";
           return false;
         }
         result->field_set_mask |= DateParseData::DAY_FIELD;
@@ -71,7 +71,7 @@
       }
       case DatetimeGroupType_GROUP_HOUR: {
         if (!ParseDigits(group_text, &(result->hour))) {
-          TC_LOG(ERROR) << "Couldn't extract HOUR.";
+          TC3_LOG(ERROR) << "Couldn't extract HOUR.";
           return false;
         }
         result->field_set_mask |= DateParseData::HOUR_FIELD;
@@ -79,7 +79,7 @@
       }
       case DatetimeGroupType_GROUP_MINUTE: {
         if (!ParseDigits(group_text, &(result->minute))) {
-          TC_LOG(ERROR) << "Couldn't extract MINUTE.";
+          TC3_LOG(ERROR) << "Couldn't extract MINUTE.";
           return false;
         }
         result->field_set_mask |= DateParseData::MINUTE_FIELD;
@@ -87,7 +87,7 @@
       }
       case DatetimeGroupType_GROUP_SECOND: {
         if (!ParseDigits(group_text, &(result->second))) {
-          TC_LOG(ERROR) << "Couldn't extract SECOND.";
+          TC3_LOG(ERROR) << "Couldn't extract SECOND.";
           return false;
         }
         result->field_set_mask |= DateParseData::SECOND_FIELD;
@@ -95,7 +95,7 @@
       }
       case DatetimeGroupType_GROUP_AMPM: {
         if (!ParseAMPM(group_text, &(result->ampm))) {
-          TC_LOG(ERROR) << "Couldn't extract AMPM.";
+          TC3_LOG(ERROR) << "Couldn't extract AMPM.";
           return false;
         }
         result->field_set_mask |= DateParseData::AMPM_FIELD;
@@ -103,7 +103,7 @@
       }
       case DatetimeGroupType_GROUP_RELATIONDISTANCE: {
         if (!ParseRelationDistance(group_text, &(result->relation_distance))) {
-          TC_LOG(ERROR) << "Couldn't extract RELATION_DISTANCE_FIELD.";
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_DISTANCE_FIELD.";
           return false;
         }
         result->field_set_mask |= DateParseData::RELATION_DISTANCE_FIELD;
@@ -111,7 +111,7 @@
       }
       case DatetimeGroupType_GROUP_RELATION: {
         if (!ParseRelation(group_text, &(result->relation))) {
-          TC_LOG(ERROR) << "Couldn't extract RELATION_FIELD.";
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_FIELD.";
           return false;
         }
         result->field_set_mask |= DateParseData::RELATION_FIELD;
@@ -119,7 +119,7 @@
       }
       case DatetimeGroupType_GROUP_RELATIONTYPE: {
         if (!ParseRelationType(group_text, &(result->relation_type))) {
-          TC_LOG(ERROR) << "Couldn't extract RELATION_TYPE_FIELD.";
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_TYPE_FIELD.";
           return false;
         }
         result->field_set_mask |= DateParseData::RELATION_TYPE_FIELD;
@@ -129,11 +129,11 @@
       case DatetimeGroupType_GROUP_DUMMY2:
         break;
       default:
-        TC_LOG(INFO) << "Unknown group type.";
+        TC3_LOG(INFO) << "Unknown group type.";
         continue;
     }
     if (!UpdateMatchSpan(group_id, result_span)) {
-      TC_LOG(ERROR) << "Couldn't update span.";
+      TC3_LOG(ERROR) << "Couldn't update span.";
       return false;
     }
   }
@@ -466,4 +466,4 @@
       parsed_weekday);
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/datetime/extractor.h b/annotator/datetime/extractor.h
similarity index 89%
rename from datetime/extractor.h
rename to annotator/datetime/extractor.h
index 5c36ec4..4c17aa7 100644
--- a/datetime/extractor.h
+++ b/annotator/datetime/extractor.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,20 +14,20 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_DATETIME_EXTRACTOR_H_
-#define LIBTEXTCLASSIFIER_DATETIME_EXTRACTOR_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_
 
 #include <string>
 #include <unordered_map>
 #include <vector>
 
-#include "model_generated.h"
-#include "types.h"
-#include "util/strings/stringpiece.h"
-#include "util/utf8/unicodetext.h"
-#include "util/utf8/unilib.h"
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
+#include "utils/strings/stringpiece.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 struct CompiledRule {
   // The compiled regular expression.
@@ -106,6 +106,6 @@
       type_and_locale_to_rule_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_DATETIME_EXTRACTOR_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_

diff --git a/datetime/parser.cc b/annotator/datetime/parser.cc
similarity index 94%
rename from datetime/parser.cc
rename to annotator/datetime/parser.cc
index 4bc5dff..ac3a62d 100644
--- a/datetime/parser.cc
+++ b/annotator/datetime/parser.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,22 +14,22 @@
  * limitations under the License.
  */
 
-#include "datetime/parser.h"
+#include "annotator/datetime/parser.h"
 
 #include <set>
 #include <unordered_set>
 
-#include "datetime/extractor.h"
-#include "util/calendar/calendar.h"
-#include "util/i18n/locale.h"
-#include "util/strings/split.h"
+#include "annotator/datetime/extractor.h"
+#include "utils/calendar/calendar.h"
+#include "utils/i18n/locale.h"
+#include "utils/strings/split.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 std::unique_ptr<DatetimeParser> DatetimeParser::Instance(
     const DatetimeModel* model, const UniLib& unilib,
-    ZlibDecompressor* decompressor) {
+    const CalendarLib& calendarlib, ZlibDecompressor* decompressor) {
   std::unique_ptr<DatetimeParser> result(
-      new DatetimeParser(model, unilib, decompressor));
+      new DatetimeParser(model, unilib, calendarlib, decompressor));
   if (!result->initialized_) {
     result.reset();
   }
@@ -37,8 +37,9 @@
 }
 
 DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
+                               const CalendarLib& calendarlib,
                                ZlibDecompressor* decompressor)
-    : unilib_(unilib) {
+    : unilib_(unilib), calendarlib_(calendarlib) {
   initialized_ = false;
 
   if (model == nullptr) {
@@ -54,7 +55,7 @@
                                          regex->compressed_pattern(),
                                          decompressor);
           if (!regex_pattern) {
-            TC_LOG(ERROR) << "Couldn't create rule pattern.";
+            TC3_LOG(ERROR) << "Couldn't create rule pattern.";
             return;
           }
           rules_.push_back({std::move(regex_pattern), regex, pattern});
@@ -75,7 +76,7 @@
                                      extractor->compressed_pattern(),
                                      decompressor);
       if (!regex_pattern) {
-        TC_LOG(ERROR) << "Couldn't create extractor pattern";
+        TC3_LOG(ERROR) << "Couldn't create extractor pattern";
         return;
       }
       extractor_rules_.push_back(std::move(regex_pattern));
@@ -393,7 +394,7 @@
 
   result->granularity = GetGranularity(parse);
 
-  if (!calendar_lib_.InterpretParseData(
+  if (!calendarlib_.InterpretParseData(
           parse, reference_time_ms_utc, reference_timezone, reference_locale,
           result->granularity, &(result->time_ms_utc))) {
     return false;
@@ -402,4 +403,4 @@
   return true;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/datetime/parser.h b/annotator/datetime/parser.h
similarity index 86%
rename from datetime/parser.h
rename to annotator/datetime/parser.h
index 0666607..9b91833 100644
--- a/datetime/parser.h
+++ b/annotator/datetime/parser.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
-#define LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
 
 #include <memory>
 #include <string>
@@ -23,15 +23,15 @@
 #include <unordered_set>
 #include <vector>
 
-#include "datetime/extractor.h"
-#include "model_generated.h"
-#include "types.h"
-#include "util/base/integral_types.h"
-#include "util/calendar/calendar.h"
-#include "util/utf8/unilib.h"
-#include "zlib-utils.h"
+#include "annotator/datetime/extractor.h"
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
+#include "annotator/zlib-utils.h"
+#include "utils/base/integral_types.h"
+#include "utils/calendar/calendar.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Parses datetime expressions in the input and resolves them to actual absolute
 // time.
@@ -39,7 +39,7 @@
  public:
   static std::unique_ptr<DatetimeParser> Instance(
       const DatetimeModel* model, const UniLib& unilib,
-      ZlibDecompressor* decompressor);
+      const CalendarLib& calendarlib, ZlibDecompressor* decompressor);
 
   // Parses the dates in 'input' and fills result. Makes sure that the results
   // do not overlap.
@@ -58,6 +58,7 @@
 
  protected:
   DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
+                 const CalendarLib& calendarlib,
                  ZlibDecompressor* decompressor);
 
   // Returns a list of locale ids for given locale spec string (comma-separated
@@ -101,6 +102,7 @@
  private:
   bool initialized_;
   const UniLib& unilib_;
+  const CalendarLib& calendarlib_;
   std::vector<CompiledRule> rules_;
   std::unordered_map<int, std::vector<int>> locale_to_rules_;
   std::vector<std::unique_ptr<const UniLib::RegexPattern>> extractor_rules_;
@@ -108,10 +110,9 @@
       type_and_locale_to_extractor_rule_;
   std::unordered_map<std::string, int> locale_string_to_id_;
   std::vector<int> default_locale_ids_;
-  CalendarLib calendar_lib_;
   bool use_extractors_for_locating_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_DATETIME_PARSER_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_

diff --git a/datetime/parser_test.cc b/annotator/datetime/parser_test.cc
similarity index 77%
rename from datetime/parser_test.cc
rename to annotator/datetime/parser_test.cc
index e61ed12..d46accf 100644
--- a/datetime/parser_test.cc
+++ b/annotator/datetime/parser_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,18 +23,18 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-#include "datetime/parser.h"
-#include "model_generated.h"
-#include "text-classifier.h"
-#include "types-test-util.h"
+#include "annotator/annotator.h"
+#include "annotator/datetime/parser.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
 
 using testing::ElementsAreArray;
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 std::string GetModelPath() {
-  return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
+  return TC3_TEST_DATA_DIR;
 }
 
 std::string ReadFile(const std::string& file_name) {
@@ -55,9 +55,9 @@
  public:
   void SetUp() override {
     model_buffer_ = ReadFile(GetModelPath() + "test_model.fb");
-    classifier_ = TextClassifier::FromUnownedBuffer(
-        model_buffer_.data(), model_buffer_.size(), &unilib_);
-    TC_CHECK(classifier_);
+    classifier_ = Annotator::FromUnownedBuffer(model_buffer_.data(),
+                                               model_buffer_.size(), &unilib_);
+    TC3_CHECK(classifier_);
     parser_ = classifier_->DatetimeParserForTests();
   }
 
@@ -66,8 +66,8 @@
     std::vector<DatetimeParseResultSpan> results;
     if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
                         anchor_start_end, &results)) {
-      TC_LOG(ERROR) << text;
-      TC_CHECK(false);
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
     }
     return results.empty();
   }
@@ -84,8 +84,8 @@
         std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
     auto brace_end_it =
         std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
-    TC_CHECK(brace_open_it != marked_text_unicode.end());
-    TC_CHECK(brace_end_it != marked_text_unicode.end());
+    TC3_CHECK(brace_open_it != marked_text_unicode.end());
+    TC3_CHECK(brace_end_it != marked_text_unicode.end());
 
     std::string text;
     text +=
@@ -98,11 +98,11 @@
 
     if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
                         anchor_start_end, &results)) {
-      TC_LOG(ERROR) << text;
-      TC_CHECK(false);
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
     }
     if (results.empty()) {
-      TC_LOG(ERROR) << "No results.";
+      TC3_LOG(ERROR) << "No results.";
       return false;
     }
 
@@ -124,16 +124,16 @@
         {{expected_start_index, expected_end_index},
          {expected_ms_utc, expected_granularity},
          /*target_classification_score=*/1.0,
-         /*priority_score=*/0.0}};
+         /*priority_score=*/0.1}};
     const bool matches =
         testing::Matches(ElementsAreArray(expected))(filtered_results);
     if (!matches) {
-      TC_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
-                    << FormatMillis(expected[0].data.time_ms_utc);
+      TC3_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
+                     << FormatMillis(expected[0].data.time_ms_utc);
       for (int i = 0; i < filtered_results.size(); ++i) {
-        TC_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
-                      << " which corresponds to: "
-                      << FormatMillis(filtered_results[i].data.time_ms_utc);
+        TC3_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
+                       << " which corresponds to: "
+                       << FormatMillis(filtered_results[i].data.time_ms_utc);
       }
     }
     return matches;
@@ -149,7 +149,7 @@
 
  protected:
   std::string model_buffer_;
-  std::unique_ptr<TextClassifier> classifier_;
+  std::unique_ptr<Annotator> classifier_;
   const DatetimeParser* parser_;
   UniLib unilib_;
 };
@@ -158,7 +158,6 @@
 TEST_F(ParserTest, ParseShort) {
   EXPECT_TRUE(
       ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY));
 }
 
 TEST_F(ParserTest, Parse) {
@@ -176,30 +175,23 @@
                               GRANULARITY_SECOND));
   EXPECT_TRUE(
       ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29},573", 1277512289000,
+  EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29}", 1277512289000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{11:42:35}.173", 38555000, GRANULARITY_SECOND));
   EXPECT_TRUE(
-      ParsesCorrectly("{23/Apr 11:42:35},173", 9715355000, GRANULARITY_SECOND));
+      ParsesCorrectly("{23/Apr 11:42:35}", 9715355000, GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}.883", 1429782155000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}.883", 1429782155000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}.883", 1429782155000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly(
@@ -221,26 +213,17 @@
   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
                               GRANULARITY_HOUR));
 
-  EXPECT_TRUE(ParsesCorrectly("{today}", -3600000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{today}", -57600000, GRANULARITY_DAY,
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -3600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -57600000, GRANULARITY_MINUTE,
                               /*anchor_start_end=*/false,
                               "America/Los_Angeles"));
-  EXPECT_TRUE(ParsesCorrectly("{next week}", 255600000, GRANULARITY_WEEK));
-  EXPECT_TRUE(ParsesCorrectly("{next day}", 82800000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{in three days}", 255600000, GRANULARITY_DAY));
-  EXPECT_TRUE(
-      ParsesCorrectly("{in three weeks}", 1465200000, GRANULARITY_WEEK));
-  EXPECT_TRUE(ParsesCorrectly("{tomorrow}", 82800000, GRANULARITY_DAY));
   EXPECT_TRUE(
       ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE));
-  EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4}", 97200000, GRANULARITY_HOUR));
-  EXPECT_TRUE(ParsesCorrectly("{next wednesday}", 514800000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4am}", 97200000, GRANULARITY_HOUR));
   EXPECT_TRUE(
-      ParsesCorrectly("{next wednesday at 4}", 529200000, GRANULARITY_HOUR));
+      ParsesCorrectly("{wednesday at 4am}", 529200000, GRANULARITY_HOUR));
   EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
                               GRANULARITY_MINUTE));
-  EXPECT_TRUE(ParsesCorrectly("{Three days ago}", -262800000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY));
 }
 
 TEST_F(ParserTest, ParseWithAnchor) {
@@ -271,15 +254,13 @@
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{März 16 08:12:04}", 6419524000,
                                     GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29},573", 1277512289000,
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29}", 1277512289000,
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}", 1137899465000,
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(
       ParsesCorrectlyGerman("{11:42:35}", 38555000, GRANULARITY_SECOND));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{11:42:35}.173", 38555000, GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35},173", 9715355000,
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35}", 9715355000,
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}", 1429782155000,
                                     GRANULARITY_SECOND));
@@ -287,18 +268,12 @@
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}", 1429782155000,
                                     GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}.883", 1429782155000,
-                                    GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}", 1429782155000,
                                     GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}.883", 1429782155000,
-                                    GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}", 1429782155000,
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}", 1429782155000,
                                     GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}.883", 1429782155000,
-                                    GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}", 1271651775000,
                                     GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}", 1514777400000,
@@ -309,32 +284,12 @@
                                     GRANULARITY_HOUR));
   EXPECT_TRUE(
       ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{heute}", -3600000, GRANULARITY_DAY));
   EXPECT_TRUE(
-      ParsesCorrectlyGerman("{nächste Woche}", 342000000, GRANULARITY_WEEK));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{nächsten Tag}", 82800000, GRANULARITY_DAY));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{in drei Tagen}", 255600000, GRANULARITY_DAY));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{in drei Wochen}", 1551600000, GRANULARITY_WEEK));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{vor drei Tagen}", -262800000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{morgen}", 82800000, GRANULARITY_DAY));
+      ParsesCorrectlyGerman("{morgen 0:00}", 82800000, GRANULARITY_MINUTE));
   EXPECT_TRUE(
       ParsesCorrectlyGerman("{morgen um 4:00}", 97200000, GRANULARITY_MINUTE));
   EXPECT_TRUE(
-      ParsesCorrectlyGerman("{morgen um 4}", 97200000, GRANULARITY_HOUR));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{nächsten Mittwoch}", 514800000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectlyGerman("{nächsten Mittwoch um 4}", 529200000,
-                                    GRANULARITY_HOUR));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{Vor drei Tagen}", -262800000, GRANULARITY_DAY));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{in einer woche}", 342000000, GRANULARITY_WEEK));
-  EXPECT_TRUE(
-      ParsesCorrectlyGerman("{in einer tag}", 82800000, GRANULARITY_DAY));
+      ParsesCorrectlyGerman("{morgen um 4 vorm}", 97200000, GRANULARITY_HOUR));
 }
 
 TEST_F(ParserTest, ParseNonUs) {
@@ -372,6 +327,7 @@
 
  protected:
   UniLib unilib_;
+  CalendarLib calendarlib_;
   flatbuffers::FlatBufferBuilder builder_;
   std::unique_ptr<DatetimeParser> parser_;
 };
@@ -412,7 +368,7 @@
       flatbuffers::GetRoot<DatetimeModel>(builder_.GetBufferPointer());
   ASSERT_TRUE(model_fb);
 
-  parser_ = DatetimeParser::Instance(model_fb, unilib_,
+  parser_ = DatetimeParser::Instance(model_fb, unilib_, calendarlib_,
                                      /*decompressor=*/nullptr);
   ASSERT_TRUE(parser_);
 }
@@ -454,4 +410,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/feature-processor.cc b/annotator/feature-processor.cc
similarity index 96%
rename from feature-processor.cc
rename to annotator/feature-processor.cc
index 551e649..a18393b 100644
--- a/feature-processor.cc
+++ b/annotator/feature-processor.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "feature-processor.h"
+#include "annotator/feature-processor.h"
 
 #include <iterator>
 #include <set>
 #include <vector>
 
-#include "util/base/logging.h"
-#include "util/strings/utf8.h"
-#include "util/utf8/unicodetext.h"
+#include "utils/base/logging.h"
+#include "utils/strings/utf8.h"
+#include "utils/utf8/unicodetext.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace internal {
 
@@ -111,16 +111,6 @@
   }
 }
 
-const UniLib* MaybeCreateUnilib(const UniLib* unilib,
-                                std::unique_ptr<UniLib>* owned_unilib) {
-  if (unilib) {
-    return unilib;
-  } else {
-    owned_unilib->reset(new UniLib);
-    return owned_unilib->get();
-  }
-}
-
 }  // namespace internal
 
 void FeatureProcessor::StripTokensFromOtherLines(
@@ -168,7 +158,7 @@
   if (options_->default_collection() < 0 ||
       options_->collections() == nullptr ||
       options_->default_collection() >= options_->collections()->size()) {
-    TC_LOG(ERROR)
+    TC3_LOG(ERROR)
         << "Invalid or missing default collection. Returning empty string.";
     return "";
   }
@@ -199,8 +189,8 @@
     }
     return result;
   } else {
-    TC_LOG(ERROR) << "Unknown tokenization type specified. Using "
-                     "internal.";
+    TC3_LOG(ERROR) << "Unknown tokenization type specified. Using "
+                      "internal.";
     return tokenizer_.Tokenize(text_unicode);
   }
 }
@@ -462,7 +452,7 @@
       return internal::CenterTokenFromMiddleOfSelection(span, tokens);
     }
   } else {
-    TC_LOG(ERROR) << "Invalid center token selection method.";
+    TC3_LOG(ERROR) << "Invalid center token selection method.";
     return kInvalidIndex;
   }
 }
@@ -473,7 +463,7 @@
   for (int i = 0; i < label_to_selection_.size(); ++i) {
     CodepointSpan span;
     if (!LabelToSpan(i, tokens, &span)) {
-      TC_LOG(ERROR) << "Could not convert label to span: " << i;
+      TC3_LOG(ERROR) << "Could not convert label to span: " << i;
       return false;
     }
     selection_label_spans->push_back(span);
@@ -711,7 +701,7 @@
     const UnicodeText& context_unicode, CodepointSpan input_span,
     bool only_use_line_with_click, std::vector<Token>* tokens,
     int* click_pos) const {
-  TC_CHECK(tokens != nullptr);
+  TC3_CHECK(tokens != nullptr);
 
   if (options_->split_tokens_on_selection_boundaries()) {
     internal::SplitTokensOnSelectionBoundaries(input_span, tokens);
@@ -777,8 +767,8 @@
     const float supported_codepoint_ratio =
         SupportedCodepointsRatio(token_span, tokens);
     if (supported_codepoint_ratio < options_->min_supported_codepoint_ratio()) {
-      TC_VLOG(1) << "Not enough supported codepoints in the context: "
-                 << supported_codepoint_ratio;
+      TC3_VLOG(1) << "Not enough supported codepoints in the context: "
+                  << supported_codepoint_ratio;
       return false;
     }
   }
@@ -797,7 +787,7 @@
     if (!AppendTokenFeaturesWithCache(tokens[i], selection_span_for_feature,
                                       embedding_executor, embedding_cache,
                                       features.get())) {
-      TC_LOG(ERROR) << "Could not get token features.";
+      TC3_LOG(ERROR) << "Could not get token features.";
       return false;
     }
   }
@@ -808,7 +798,7 @@
   if (!AppendTokenFeaturesWithCache(Token(), selection_span_for_feature,
                                     embedding_executor, embedding_cache,
                                     padding_features.get())) {
-    TC_LOG(ERROR) << "Count not get padding token features.";
+    TC3_LOG(ERROR) << "Count not get padding token features.";
     return false;
   }
 
@@ -816,7 +806,7 @@
                                             std::move(padding_features),
                                             options_, feature_vector_size);
   if (!*cached_features) {
-    TC_LOG(ERROR) << "Cound not create cached features.";
+    TC3_LOG(ERROR) << "Cound not create cached features.";
     return false;
   }
 
@@ -945,7 +935,7 @@
       if (!feature_extractor_.Extract(
               token, token.IsContainedInSpan(selection_span_for_feature),
               /*sparse_features=*/nullptr, &dense_features)) {
-        TC_LOG(ERROR) << "Could not extract token's dense features.";
+        TC3_LOG(ERROR) << "Could not extract token's dense features.";
         return false;
       }
 
@@ -964,7 +954,7 @@
   if (!feature_extractor_.Extract(
           token, token.IsContainedInSpan(selection_span_for_feature),
           &sparse_features, &dense_features)) {
-    TC_LOG(ERROR) << "Could not extract token's features.";
+    TC3_LOG(ERROR) << "Could not extract token's features.";
     return false;
   }
 
@@ -978,7 +968,7 @@
                           {static_cast<int>(sparse_features.size())}),
           /*dest=*/output_features_end - embedding_size,
           /*dest_size=*/embedding_size)) {
-    TC_LOG(ERROR) << "Cound not embed token's sparse features.";
+    TC3_LOG(ERROR) << "Cound not embed token's sparse features.";
     return false;
   }
 
@@ -995,4 +985,4 @@
   return true;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/feature-processor.h b/annotator/feature-processor.h
similarity index 91%
rename from feature-processor.h
rename to annotator/feature-processor.h
index 98d3449..2d04253 100644
--- a/feature-processor.h
+++ b/annotator/feature-processor.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
 
 // Feature processing for FFModel (feed-forward SmartSelection model).
 
-#ifndef LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_
-#define LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_FEATURE_PROCESSOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_FEATURE_PROCESSOR_H_
 
 #include <map>
 #include <memory>
@@ -25,17 +25,17 @@
 #include <string>
 #include <vector>
 
-#include "cached-features.h"
-#include "model_generated.h"
-#include "token-feature-extractor.h"
-#include "tokenizer.h"
-#include "types.h"
-#include "util/base/integral_types.h"
-#include "util/base/logging.h"
-#include "util/utf8/unicodetext.h"
-#include "util/utf8/unilib.h"
+#include "annotator/cached-features.h"
+#include "annotator/model_generated.h"
+#include "annotator/token-feature-extractor.h"
+#include "annotator/tokenizer.h"
+#include "annotator/types.h"
+#include "utils/base/integral_types.h"
+#include "utils/base/logging.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 constexpr int kInvalidLabel = -1;
 
@@ -64,11 +64,6 @@
 void StripOrPadTokens(TokenSpan relative_click_span, int context_size,
                       std::vector<Token>* tokens, int* click_pos);
 
-// If unilib is not nullptr, just returns unilib. Otherwise, if unilib is
-// nullptr, will create UniLib, assign ownership to owned_unilib, and return it.
-const UniLib* MaybeCreateUnilib(const UniLib* unilib,
-                                std::unique_ptr<UniLib>* owned_unilib);
-
 }  // namespace internal
 
 // Converts a codepoint span to a token span in the given list of tokens.
@@ -93,12 +88,8 @@
   // identical.
   typedef std::map<CodepointSpan, std::vector<float>> EmbeddingCache;
 
-  // If unilib is nullptr, will create and own an instance of a UniLib,
-  // otherwise will use what's passed in.
-  explicit FeatureProcessor(const FeatureProcessorOptions* options,
-                            const UniLib* unilib = nullptr)
-      : owned_unilib_(nullptr),
-        unilib_(internal::MaybeCreateUnilib(unilib, &owned_unilib_)),
+  FeatureProcessor(const FeatureProcessorOptions* options, const UniLib* unilib)
+      : unilib_(unilib),
         feature_extractor_(internal::BuildTokenFeatureExtractorOptions(options),
                            *unilib_),
         options_(options),
@@ -303,7 +294,6 @@
                                     std::vector<float>* output_features) const;
 
  private:
-  std::unique_ptr<UniLib> owned_unilib_;
   const UniLib* unilib_;
 
  protected:
@@ -336,6 +326,6 @@
   Tokenizer tokenizer_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_FEATURE_PROCESSOR_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_FEATURE_PROCESSOR_H_

diff --git a/feature-processor_test.cc b/annotator/feature-processor_test.cc
similarity index 94%
rename from feature-processor_test.cc
rename to annotator/feature-processor_test.cc
index 58b3033..c9f0e0d 100644
--- a/feature-processor_test.cc
+++ b/annotator/feature-processor_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "feature-processor.h"
+#include "annotator/feature-processor.h"
 
-#include "model-executor.h"
-#include "tensor-view.h"
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 using testing::ElementsAreArray;
@@ -66,7 +66,7 @@
  public:
   bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
                     int dest_size) const override {
-    TC_CHECK_GE(dest_size, 4);
+    TC3_CHECK_GE(dest_size, 4);
     EXPECT_EQ(sparse_features.size(), 1);
     dest[0] = sparse_features.data()[0];
     dest[1] = sparse_features.data()[0];
@@ -79,7 +79,13 @@
   std::vector<float> storage_;
 };
 
-TEST(FeatureProcessorTest, SplitTokensOnSelectionBoundariesMiddle) {
+class FeatureProcessorTest : public ::testing::Test {
+ protected:
+  FeatureProcessorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+TEST_F(FeatureProcessorTest, SplitTokensOnSelectionBoundariesMiddle) {
   std::vector<Token> tokens{Token("Hělló", 0, 5),
                             Token("fěěbař@google.com", 6, 23),
                             Token("heře!", 24, 29)};
@@ -96,7 +102,7 @@
   // clang-format on
 }
 
-TEST(FeatureProcessorTest, SplitTokensOnSelectionBoundariesBegin) {
+TEST_F(FeatureProcessorTest, SplitTokensOnSelectionBoundariesBegin) {
   std::vector<Token> tokens{Token("Hělló", 0, 5),
                             Token("fěěbař@google.com", 6, 23),
                             Token("heře!", 24, 29)};
@@ -112,7 +118,7 @@
   // clang-format on
 }
 
-TEST(FeatureProcessorTest, SplitTokensOnSelectionBoundariesEnd) {
+TEST_F(FeatureProcessorTest, SplitTokensOnSelectionBoundariesEnd) {
   std::vector<Token> tokens{Token("Hělló", 0, 5),
                             Token("fěěbař@google.com", 6, 23),
                             Token("heře!", 24, 29)};
@@ -128,7 +134,7 @@
   // clang-format on
 }
 
-TEST(FeatureProcessorTest, SplitTokensOnSelectionBoundariesWhole) {
+TEST_F(FeatureProcessorTest, SplitTokensOnSelectionBoundariesWhole) {
   std::vector<Token> tokens{Token("Hělló", 0, 5),
                             Token("fěěbař@google.com", 6, 23),
                             Token("heře!", 24, 29)};
@@ -143,7 +149,7 @@
   // clang-format on
 }
 
-TEST(FeatureProcessorTest, SplitTokensOnSelectionBoundariesCrossToken) {
+TEST_F(FeatureProcessorTest, SplitTokensOnSelectionBoundariesCrossToken) {
   std::vector<Token> tokens{Token("Hělló", 0, 5),
                             Token("fěěbař@google.com", 6, 23),
                             Token("heře!", 24, 29)};
@@ -160,14 +166,13 @@
   // clang-format on
 }
 
-TEST(FeatureProcessorTest, KeepLineWithClickFirst) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, KeepLineWithClickFirst) {
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string context = "Fiřst Lině\nSěcond Lině\nThiřd Lině";
   const CodepointSpan span = {0, 5};
@@ -186,14 +191,13 @@
               ElementsAreArray({Token("Fiřst", 0, 5), Token("Lině", 6, 10)}));
 }
 
-TEST(FeatureProcessorTest, KeepLineWithClickSecond) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, KeepLineWithClickSecond) {
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string context = "Fiřst Lině\nSěcond Lině\nThiřd Lině";
   const CodepointSpan span = {18, 22};
@@ -212,14 +216,13 @@
                           {Token("Sěcond", 11, 17), Token("Lině", 18, 22)}));
 }
 
-TEST(FeatureProcessorTest, KeepLineWithClickThird) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, KeepLineWithClickThird) {
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string context = "Fiřst Lině\nSěcond Lině\nThiřd Lině";
   const CodepointSpan span = {24, 33};
@@ -238,14 +241,13 @@
                           {Token("Thiřd", 23, 28), Token("Lině", 29, 33)}));
 }
 
-TEST(FeatureProcessorTest, KeepLineWithClickSecondWithPipe) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, KeepLineWithClickSecondWithPipe) {
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string context = "Fiřst Lině|Sěcond Lině\nThiřd Lině";
   const CodepointSpan span = {18, 22};
@@ -264,14 +266,13 @@
                           {Token("Sěcond", 11, 17), Token("Lině", 18, 22)}));
 }
 
-TEST(FeatureProcessorTest, KeepLineWithCrosslineClick) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, KeepLineWithCrosslineClick) {
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string context = "Fiřst Lině\nSěcond Lině\nThiřd Lině";
   const CodepointSpan span = {5, 23};
@@ -292,8 +293,7 @@
                            Token("Thiřd", 23, 28), Token("Lině", 29, 33)}));
 }
 
-TEST(FeatureProcessorTest, SpanToLabel) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, SpanToLabel) {
   FeatureProcessorOptionsT options;
   options.context_size = 1;
   options.max_selection_span = 1;
@@ -309,7 +309,7 @@
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
   std::vector<Token> tokens = feature_processor.Tokenize("one, two, three");
   ASSERT_EQ(3, tokens.size());
   int label;
@@ -328,7 +328,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor2(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options2_fb.data()),
-      &unilib);
+      &unilib_);
   int label2;
   ASSERT_TRUE(feature_processor2.SpanToLabel({5, 8}, tokens, &label2));
   EXPECT_EQ(label, label2);
@@ -350,7 +350,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor3(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options3_fb.data()),
-      &unilib);
+      &unilib_);
   tokens = feature_processor3.Tokenize("zero, one, two, three, four");
   ASSERT_TRUE(feature_processor3.SpanToLabel({6, 15}, tokens, &label2));
   EXPECT_NE(kInvalidLabel, label2);
@@ -367,8 +367,7 @@
   EXPECT_EQ(label2, label3);
 }
 
-TEST(FeatureProcessorTest, SpanToLabelIgnoresPunctuation) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, SpanToLabelIgnoresPunctuation) {
   FeatureProcessorOptionsT options;
   options.context_size = 1;
   options.max_selection_span = 1;
@@ -384,7 +383,7 @@
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
   std::vector<Token> tokens = feature_processor.Tokenize("one, two, three");
   ASSERT_EQ(3, tokens.size());
   int label;
@@ -403,7 +402,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor2(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options2_fb.data()),
-      &unilib);
+      &unilib_);
   int label2;
   ASSERT_TRUE(feature_processor2.SpanToLabel({5, 8}, tokens, &label2));
   EXPECT_EQ(label, label2);
@@ -425,7 +424,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor3(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options3_fb.data()),
-      &unilib);
+      &unilib_);
   tokens = feature_processor3.Tokenize("zero, one, two, three, four");
   ASSERT_TRUE(feature_processor3.SpanToLabel({6, 15}, tokens, &label2));
   EXPECT_NE(kInvalidLabel, label2);
@@ -442,7 +441,7 @@
   EXPECT_EQ(label2, label3);
 }
 
-TEST(FeatureProcessorTest, CenterTokenFromClick) {
+TEST_F(FeatureProcessorTest, CenterTokenFromClick) {
   int token_index;
 
   // Exactly aligned indices.
@@ -464,7 +463,7 @@
   EXPECT_EQ(token_index, kInvalidIndex);
 }
 
-TEST(FeatureProcessorTest, CenterTokenFromMiddleOfSelection) {
+TEST_F(FeatureProcessorTest, CenterTokenFromMiddleOfSelection) {
   int token_index;
 
   // Selection of length 3. Exactly aligned indices.
@@ -507,7 +506,7 @@
   EXPECT_EQ(token_index, -1);
 }
 
-TEST(FeatureProcessorTest, SupportedCodepointsRatio) {
+TEST_F(FeatureProcessorTest, SupportedCodepointsRatio) {
   FeatureProcessorOptionsT options;
   options.context_size = 2;
   options.max_selection_span = 2;
@@ -556,10 +555,9 @@
   }
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
-  CREATE_UNILIB_FOR_TESTING;
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
   EXPECT_THAT(feature_processor.SupportedCodepointsRatio(
                   {0, 3}, feature_processor.Tokenize("aaa bbb ccc")),
               FloatEq(1.0));
@@ -596,7 +594,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor2(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options2_fb.data()),
-      &unilib);
+      &unilib_);
   EXPECT_TRUE(feature_processor2.HasEnoughSupportedCodepoints(
       tokens, /*token_span=*/{0, 3}));
 
@@ -605,7 +603,7 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor3(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options3_fb.data()),
-      &unilib);
+      &unilib_);
   EXPECT_TRUE(feature_processor3.HasEnoughSupportedCodepoints(
       tokens, /*token_span=*/{0, 3}));
 
@@ -614,12 +612,12 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor4(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options4_fb.data()),
-      &unilib);
+      &unilib_);
   EXPECT_FALSE(feature_processor4.HasEnoughSupportedCodepoints(
       tokens, /*token_span=*/{0, 3}));
 }
 
-TEST(FeatureProcessorTest, InSpanFeature) {
+TEST_F(FeatureProcessorTest, InSpanFeature) {
   FeatureProcessorOptionsT options;
   options.context_size = 2;
   options.max_selection_span = 2;
@@ -629,10 +627,9 @@
   options.extract_selection_mask_feature = true;
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
-  CREATE_UNILIB_FOR_TESTING;
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   std::unique_ptr<CachedFeatures> cached_features;
 
@@ -656,7 +653,7 @@
   EXPECT_THAT(features[24], FloatEq(0.0));
 }
 
-TEST(FeatureProcessorTest, EmbeddingCache) {
+TEST_F(FeatureProcessorTest, EmbeddingCache) {
   FeatureProcessorOptionsT options;
   options.context_size = 2;
   options.max_selection_span = 2;
@@ -672,10 +669,9 @@
   options.bounds_sensitive_features->num_tokens_after = 3;
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
-  CREATE_UNILIB_FOR_TESTING;
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   std::unique_ptr<CachedFeatures> cached_features;
 
@@ -726,7 +722,7 @@
               ElementsAreFloat(embedding_cache.at({20, 23})));
 }
 
-TEST(FeatureProcessorTest, StripUnusedTokensWithNoRelativeClick) {
+TEST_F(FeatureProcessorTest, StripUnusedTokensWithNoRelativeClick) {
   std::vector<Token> tokens_orig{
       Token("0", 0, 0), Token("1", 0, 0), Token("2", 0, 0),  Token("3", 0, 0),
       Token("4", 0, 0), Token("5", 0, 0), Token("6", 0, 0),  Token("7", 0, 0),
@@ -776,7 +772,7 @@
   EXPECT_EQ(click_index, 2);
 }
 
-TEST(FeatureProcessorTest, StripUnusedTokensWithRelativeClick) {
+TEST_F(FeatureProcessorTest, StripUnusedTokensWithRelativeClick) {
   std::vector<Token> tokens_orig{
       Token("0", 0, 0), Token("1", 0, 0), Token("2", 0, 0),  Token("3", 0, 0),
       Token("4", 0, 0), Token("5", 0, 0), Token("6", 0, 0),  Token("7", 0, 0),
@@ -838,8 +834,7 @@
   EXPECT_EQ(click_index, 5);
 }
 
-TEST(FeatureProcessorTest, InternalTokenizeOnScriptChange) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, InternalTokenizeOnScriptChange) {
   FeatureProcessorOptionsT options;
   options.tokenization_codepoint_config.emplace_back(
       new TokenizationCodepointRangeT());
@@ -855,7 +850,7 @@
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   EXPECT_EQ(feature_processor.Tokenize("앨라배마123웹사이트"),
             std::vector<Token>({Token("앨라배마123웹사이트", 0, 11)}));
@@ -865,21 +860,23 @@
       PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor2(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb2.data()),
-      &unilib);
+      &unilib_);
 
   EXPECT_EQ(feature_processor2.Tokenize("앨라배마123웹사이트"),
             std::vector<Token>({Token("앨라배마", 0, 4), Token("123", 4, 7),
                                 Token("웹사이트", 7, 11)}));
 }
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(FeatureProcessorTest, ICUTokenize) {
+#ifdef TC3_TEST_ICU
+TEST_F(FeatureProcessorTest, ICUTokenize) {
   FeatureProcessorOptionsT options;
   options.tokenization_type = FeatureProcessorOptions_::TokenizationType_ICU;
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
+  UniLib unilib;
   TestingFeatureProcessor feature_processor(
-      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()));
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+      &unilib);
   std::vector<Token> tokens = feature_processor.Tokenize("พระบาทสมเด็จพระปรมิ");
   ASSERT_EQ(tokens,
             // clang-format off
@@ -892,15 +889,17 @@
 }
 #endif
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(FeatureProcessorTest, ICUTokenizeWithWhitespaces) {
+#ifdef TC3_TEST_ICU
+TEST_F(FeatureProcessorTest, ICUTokenizeWithWhitespaces) {
   FeatureProcessorOptionsT options;
   options.tokenization_type = FeatureProcessorOptions_::TokenizationType_ICU;
   options.icu_preserve_whitespace_tokens = true;
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
+  UniLib unilib;
   TestingFeatureProcessor feature_processor(
-      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()));
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+      &unilib);
   std::vector<Token> tokens =
       feature_processor.Tokenize("พระบาท สมเด็จ พระ ปร มิ");
   ASSERT_EQ(tokens,
@@ -918,8 +917,8 @@
 }
 #endif
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(FeatureProcessorTest, MixedTokenize) {
+#ifdef TC3_TEST_ICU
+TEST_F(FeatureProcessorTest, MixedTokenize) {
   FeatureProcessorOptionsT options;
   options.tokenization_type = FeatureProcessorOptions_::TokenizationType_MIXED;
 
@@ -963,8 +962,10 @@
   }
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
+  UniLib unilib;
   TestingFeatureProcessor feature_processor(
-      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()));
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+      &unilib);
   std::vector<Token> tokens = feature_processor.Tokenize(
       "こんにちはJapanese-ląnguagę text 世界 http://www.google.com/");
   ASSERT_EQ(tokens,
@@ -978,8 +979,7 @@
 }
 #endif
 
-TEST(FeatureProcessorTest, IgnoredSpanBoundaryCodepoints) {
-  CREATE_UNILIB_FOR_TESTING;
+TEST_F(FeatureProcessorTest, IgnoredSpanBoundaryCodepoints) {
   FeatureProcessorOptionsT options;
   options.ignored_span_boundary_codepoints.push_back('.');
   options.ignored_span_boundary_codepoints.push_back(',');
@@ -989,7 +989,7 @@
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
-      &unilib);
+      &unilib_);
 
   const std::string text1_utf8 = "ěščř";
   const UnicodeText text1 = UTF8ToUnicodeText(text1_utf8, /*do_copy=*/false);
@@ -1091,7 +1091,7 @@
             std::make_pair(0, 0));
 }
 
-TEST(FeatureProcessorTest, CodepointSpanToTokenSpan) {
+TEST_F(FeatureProcessorTest, CodepointSpanToTokenSpan) {
   const std::vector<Token> tokens{Token("Hělló", 0, 5),
                                   Token("fěěbař@google.com", 6, 23),
                                   Token("heře!", 24, 29)};
@@ -1122,4 +1122,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/annotator/knowledge/knowledge-engine-dummy.h b/annotator/knowledge/knowledge-engine-dummy.h
new file mode 100644
index 0000000..a6285dc
--- /dev/null
+++ b/annotator/knowledge/knowledge-engine-dummy.h

@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_DUMMY_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_DUMMY_H_
+
+#include <string>
+
+#include "annotator/types.h"
+#include "utils/utf8/unilib.h"
+
+namespace libtextclassifier3 {
+
+// A dummy implementation of the knowledge engine.
+class KnowledgeEngine {
+ public:
+  explicit KnowledgeEngine(const UniLib* unilib) {}
+
+  bool Initialize(const std::string& serialized_config) { return true; }
+
+  bool ClassifyText(const std::string& context, CodepointSpan selection_indices,
+                    ClassificationResult* classification_result) const {
+    return false;
+  }
+
+  bool Chunk(const std::string& context,
+             std::vector<AnnotatedSpan>* result) const {
+    return true;
+  }
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_DUMMY_H_

diff --git a/util/calendar/calendar.h b/annotator/knowledge/knowledge-engine.h
similarity index 64%
copy from util/calendar/calendar.h
copy to annotator/knowledge/knowledge-engine.h
index b0cf2e6..4776b26 100644
--- a/util/calendar/calendar.h
+++ b/annotator/knowledge/knowledge-engine.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_H_
 
-#include "util/calendar/calendar-icu.h"
+#include "annotator/knowledge/knowledge-engine-dummy.h"
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_KNOWLEDGE_KNOWLEDGE_ENGINE_H_

diff --git a/annotator/model-executor.cc b/annotator/model-executor.cc
new file mode 100644
index 0000000..7c57e8f
--- /dev/null
+++ b/annotator/model-executor.cc

@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/model-executor.h"
+
+#include "annotator/quantization.h"
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+TensorView<float> ModelExecutor::ComputeLogits(
+    const TensorView<float>& features, tflite::Interpreter* interpreter) const {
+  if (!interpreter) {
+    return TensorView<float>::Invalid();
+  }
+  interpreter->ResizeInputTensor(kInputIndexFeatures, features.shape());
+  if (interpreter->AllocateTensors() != kTfLiteOk) {
+    TC3_VLOG(1) << "Allocation failed.";
+    return TensorView<float>::Invalid();
+  }
+
+  SetInput<float>(kInputIndexFeatures, features, interpreter);
+
+  if (interpreter->Invoke() != kTfLiteOk) {
+    TC3_VLOG(1) << "Interpreter failed.";
+    return TensorView<float>::Invalid();
+  }
+
+  return OutputView<float>(kOutputIndexLogits, interpreter);
+}
+
+std::unique_ptr<TFLiteEmbeddingExecutor> TFLiteEmbeddingExecutor::FromBuffer(
+    const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
+    int quantization_bits) {
+  std::unique_ptr<TfLiteModelExecutor> executor =
+      TfLiteModelExecutor::FromBuffer(model_spec_buffer);
+  if (!executor) {
+    TC3_LOG(ERROR) << "Could not load TFLite model for embeddings.";
+    return nullptr;
+  }
+
+  std::unique_ptr<tflite::Interpreter> interpreter =
+      executor->CreateInterpreter();
+  if (!interpreter) {
+    TC3_LOG(ERROR) << "Could not build TFLite interpreter for embeddings.";
+    return nullptr;
+  }
+
+  if (interpreter->tensors_size() != 2) {
+    return nullptr;
+  }
+  const TfLiteTensor* embeddings = interpreter->tensor(0);
+  if (embeddings->dims->size != 2) {
+    return nullptr;
+  }
+  int num_buckets = embeddings->dims->data[0];
+  const TfLiteTensor* scales = interpreter->tensor(1);
+  if (scales->dims->size != 2 || scales->dims->data[0] != num_buckets ||
+      scales->dims->data[1] != 1) {
+    return nullptr;
+  }
+  int bytes_per_embedding = embeddings->dims->data[1];
+  if (!CheckQuantizationParams(bytes_per_embedding, quantization_bits,
+                               embedding_size)) {
+    TC3_LOG(ERROR) << "Mismatch in quantization parameters.";
+    return nullptr;
+  }
+
+  return std::unique_ptr<TFLiteEmbeddingExecutor>(new TFLiteEmbeddingExecutor(
+      std::move(executor), quantization_bits, num_buckets, bytes_per_embedding,
+      embedding_size, scales, embeddings, std::move(interpreter)));
+}
+
+TFLiteEmbeddingExecutor::TFLiteEmbeddingExecutor(
+    std::unique_ptr<TfLiteModelExecutor> executor, int quantization_bits,
+    int num_buckets, int bytes_per_embedding, int output_embedding_size,
+    const TfLiteTensor* scales, const TfLiteTensor* embeddings,
+    std::unique_ptr<tflite::Interpreter> interpreter)
+    : executor_(std::move(executor)),
+      quantization_bits_(quantization_bits),
+      num_buckets_(num_buckets),
+      bytes_per_embedding_(bytes_per_embedding),
+      output_embedding_size_(output_embedding_size),
+      scales_(scales),
+      embeddings_(embeddings),
+      interpreter_(std::move(interpreter)) {}
+
+bool TFLiteEmbeddingExecutor::AddEmbedding(
+    const TensorView<int>& sparse_features, float* dest, int dest_size) const {
+  if (dest_size != output_embedding_size_) {
+    TC3_LOG(ERROR) << "Mismatching dest_size and output_embedding_size: "
+                   << dest_size << " " << output_embedding_size_;
+    return false;
+  }
+  const int num_sparse_features = sparse_features.size();
+  for (int i = 0; i < num_sparse_features; ++i) {
+    const int bucket_id = sparse_features.data()[i];
+    if (bucket_id >= num_buckets_) {
+      return false;
+    }
+
+    if (!DequantizeAdd(scales_->data.f, embeddings_->data.uint8,
+                       bytes_per_embedding_, num_sparse_features,
+                       quantization_bits_, bucket_id, dest, dest_size)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace libtextclassifier3

diff --git a/annotator/model-executor.h b/annotator/model-executor.h
new file mode 100644
index 0000000..5ad3a7f
--- /dev/null
+++ b/annotator/model-executor.h

@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Contains classes that can execute different models/parts of a model.
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_MODEL_EXECUTOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_MODEL_EXECUTOR_H_
+
+#include <memory>
+
+#include "annotator/types.h"
+#include "utils/base/logging.h"
+#include "utils/tensor-view.h"
+#include "utils/tflite-model-executor.h"
+
+namespace libtextclassifier3 {
+
+// Executor for the text selection prediction and classification models.
+class ModelExecutor : public TfLiteModelExecutor {
+ public:
+  static std::unique_ptr<ModelExecutor> FromModelSpec(
+      const tflite::Model* model_spec) {
+    auto model = TfLiteModelFromModelSpec(model_spec);
+    if (!model) {
+      return nullptr;
+    }
+    return std::unique_ptr<ModelExecutor>(new ModelExecutor(std::move(model)));
+  }
+
+  static std::unique_ptr<ModelExecutor> FromBuffer(
+      const flatbuffers::Vector<uint8_t>* model_spec_buffer) {
+    auto model = TfLiteModelFromBuffer(model_spec_buffer);
+    if (!model) {
+      return nullptr;
+    }
+    return std::unique_ptr<ModelExecutor>(new ModelExecutor(std::move(model)));
+  }
+
+  TensorView<float> ComputeLogits(const TensorView<float>& features,
+                                  tflite::Interpreter* interpreter) const;
+
+ protected:
+  explicit ModelExecutor(std::unique_ptr<const tflite::FlatBufferModel> model)
+      : TfLiteModelExecutor(std::move(model)) {}
+
+  static const int kInputIndexFeatures = 0;
+  static const int kOutputIndexLogits = 0;
+};
+
+// Executor for embedding sparse features into a dense vector.
+class EmbeddingExecutor {
+ public:
+  virtual ~EmbeddingExecutor() {}
+
+  // Embeds the sparse_features into a dense embedding and adds (+) it
+  // element-wise to the dest vector.
+  virtual bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
+                            int dest_size) const = 0;
+
+  // Returns true when the model is ready to be used, false otherwise.
+  virtual bool IsReady() const { return true; }
+};
+
+class TFLiteEmbeddingExecutor : public EmbeddingExecutor {
+ public:
+  static std::unique_ptr<TFLiteEmbeddingExecutor> FromBuffer(
+      const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
+      int quantization_bits);
+
+  // Embeds the sparse_features into a dense embedding and adds (+) it
+  // element-wise to the dest vector.
+  bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
+                    int dest_size) const;
+
+ protected:
+  explicit TFLiteEmbeddingExecutor(
+      std::unique_ptr<TfLiteModelExecutor> executor, int quantization_bits,
+      int num_buckets, int bytes_per_embedding, int output_embedding_size,
+      const TfLiteTensor* scales, const TfLiteTensor* embeddings,
+      std::unique_ptr<tflite::Interpreter> interpreter);
+
+  std::unique_ptr<TfLiteModelExecutor> executor_;
+
+  int quantization_bits_;
+  int num_buckets_ = -1;
+  int bytes_per_embedding_ = -1;
+  int output_embedding_size_ = -1;
+  const TfLiteTensor* scales_ = nullptr;
+  const TfLiteTensor* embeddings_ = nullptr;
+
+  // NOTE: This interpreter is used in a read-only way (as a storage for the
+  // model params), thus is still thread-safe.
+  std::unique_ptr<tflite::Interpreter> interpreter_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_MODEL_EXECUTOR_H_

diff --git a/model.fbs b/annotator/model.fbs
similarity index 78%
rename from model.fbs
rename to annotator/model.fbs
index fb9778b..1e869ea 100755
--- a/model.fbs
+++ b/annotator/model.fbs

@@ -1,5 +1,5 @@
 //
-// Copyright (C) 2017 The Android Open Source Project
+// Copyright (C) 2018 The Android Open Source Project
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -14,10 +14,12 @@
 // limitations under the License.
 //
 
+include "utils/intents/intent-config.fbs";
+
 file_identifier "TC2 ";
 
 // The possible model modes, represents a bit field.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 enum ModeFlag : int {
   NONE = 0,
   ANNOTATION = 1,
@@ -29,7 +31,7 @@
   ALL = 7,
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 enum DatetimeExtractorType : int {
   UNKNOWN_DATETIME_EXTRACTOR_TYPE = 0,
   AM = 1,
@@ -106,7 +108,7 @@
   THOUSAND = 72,
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 enum DatetimeGroupType : int {
   GROUP_UNKNOWN = 0,
   GROUP_UNUSED = 1,
@@ -129,20 +131,20 @@
   GROUP_DUMMY2 = 13,
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table CompressedBuffer {
   buffer:[ubyte];
   uncompressed_size:int;
 }
 
 // Options for the model that predicts text selection.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table SelectionModelOptions {
   // If true, before the selection is returned, the unpaired brackets contained
   // in the predicted selection are stripped from the both selection ends.
   // The bracket codepoints are defined in the Unicode standard:
   // http://www.unicode.org/Public/UNIDATA/BidiBrackets.txt
-  strip_unpaired_brackets:bool = 1;
+  strip_unpaired_brackets:bool = true;
 
   // Number of hypothetical click positions on either side of the actual click
   // to consider in order to enforce symmetry.
@@ -152,11 +154,11 @@
   batch_size:int = 1024;
 
   // Whether to always classify a suggested selection or only on demand.
-  always_classify_suggested_selection:bool = 0;
+  always_classify_suggested_selection:bool = false;
 }
 
 // Options for the model that classifies a text selection.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table ClassificationModelOptions {
   // Limits for phone numbers.
   phone_min_num_digits:int = 7;
@@ -170,8 +172,14 @@
   max_num_tokens:int = -1;
 }
 
+// Options for post-checks, checksums and verification to apply on a match.
+namespace libtextclassifier3;
+table VerificationOptions {
+  verify_luhn_checksum:bool = false;
+}
+
 // List of regular expression matchers to check.
-namespace libtextclassifier2.RegexModel_;
+namespace libtextclassifier3.RegexModel_;
 table Pattern {
   // The name of the collection of a match.
   collection_name:string;
@@ -181,7 +189,7 @@
   pattern:string;
 
   // The modes for which to apply the patterns.
-  enabled_modes:libtextclassifier2.ModeFlag = ALL;
+  enabled_modes:libtextclassifier3.ModeFlag = ALL;
 
   // The final score to assign to the results of this pattern.
   target_classification_score:float = 1;
@@ -192,31 +200,34 @@
   // If true, will use an approximate matching implementation implemented
   // using Find() instead of the true Match(). This approximate matching will
   // use the first Find() result and then check that it spans the whole input.
-  use_approximate_matching:bool = 0;
+  use_approximate_matching:bool = false;
 
-  compressed_pattern:libtextclassifier2.CompressedBuffer;
+  compressed_pattern:libtextclassifier3.CompressedBuffer;
+
+  // Verification to apply on a match.
+  verification_options:libtextclassifier3.VerificationOptions;
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table RegexModel {
-  patterns:[libtextclassifier2.RegexModel_.Pattern];
+  patterns:[libtextclassifier3.RegexModel_.Pattern];
 }
 
 // List of regex patterns.
-namespace libtextclassifier2.DatetimeModelPattern_;
+namespace libtextclassifier3.DatetimeModelPattern_;
 table Regex {
   pattern:string;
 
   // The ith entry specifies the type of the ith capturing group.
   // This is used to decide how the matched content has to be parsed.
-  groups:[libtextclassifier2.DatetimeGroupType];
+  groups:[libtextclassifier3.DatetimeGroupType];
 
-  compressed_pattern:libtextclassifier2.CompressedBuffer;
+  compressed_pattern:libtextclassifier3.CompressedBuffer;
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table DatetimeModelPattern {
-  regexes:[libtextclassifier2.DatetimeModelPattern_.Regex];
+  regexes:[libtextclassifier3.DatetimeModelPattern_.Regex];
 
   // List of locale indices in DatetimeModel that represent the locales that
   // these patterns should be used for. If empty, can be used for all locales.
@@ -225,63 +236,63 @@
   // The final score to assign to the results of this pattern.
   target_classification_score:float = 1;
 
-  // Priority score used for conflict resulution with the other models.
+  // Priority score used for conflict resolution with the other models.
   priority_score:float = 0;
 
   // The modes for which to apply the patterns.
-  enabled_modes:libtextclassifier2.ModeFlag = ALL;
+  enabled_modes:libtextclassifier3.ModeFlag = ALL;
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table DatetimeModelExtractor {
-  extractor:libtextclassifier2.DatetimeExtractorType;
+  extractor:libtextclassifier3.DatetimeExtractorType;
   pattern:string;
   locales:[int];
-  compressed_pattern:libtextclassifier2.CompressedBuffer;
+  compressed_pattern:libtextclassifier3.CompressedBuffer;
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table DatetimeModel {
   // List of BCP 47 locale strings representing all locales supported by the
   // model. The individual patterns refer back to them using an index.
   locales:[string];
 
-  patterns:[libtextclassifier2.DatetimeModelPattern];
-  extractors:[libtextclassifier2.DatetimeModelExtractor];
+  patterns:[libtextclassifier3.DatetimeModelPattern];
+  extractors:[libtextclassifier3.DatetimeModelExtractor];
 
   // If true, will use the extractors for determining the match location as
   // opposed to using the location where the global pattern matched.
-  use_extractors_for_locating:bool = 1;
+  use_extractors_for_locating:bool = true;
 
   // List of locale ids, rules of whose are always run, after the requested
   // ones.
   default_locales:[int];
 }
 
-namespace libtextclassifier2.DatetimeModelLibrary_;
+namespace libtextclassifier3.DatetimeModelLibrary_;
 table Item {
   key:string;
-  value:libtextclassifier2.DatetimeModel;
+  value:libtextclassifier3.DatetimeModel;
 }
 
 // A set of named DateTime models.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table DatetimeModelLibrary {
-  models:[libtextclassifier2.DatetimeModelLibrary_.Item];
+  models:[libtextclassifier3.DatetimeModelLibrary_.Item];
 }
 
 // Options controlling the output of the Tensorflow Lite models.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table ModelTriggeringOptions {
   // Lower bound threshold for filtering annotation model outputs.
   min_annotate_confidence:float = 0;
 
   // The modes for which to enable the models.
-  enabled_modes:libtextclassifier2.ModeFlag = ALL;
+  enabled_modes:libtextclassifier3.ModeFlag = ALL;
 }
 
 // Options controlling the output of the classifier.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table OutputOptions {
   // Lists of collection names that will be filtered out at the output:
   // - For annotation, the spans of given collection are simply dropped.
@@ -294,7 +305,7 @@
   filtered_collections_selection:[string];
 }
 
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table Model {
   // Comma-separated list of locales supported by the model as BCP 47 tags.
   locales:string;
@@ -304,8 +315,8 @@
   // A name for the model that can be used for e.g. logging.
   name:string;
 
-  selection_feature_options:libtextclassifier2.FeatureProcessorOptions;
-  classification_feature_options:libtextclassifier2.FeatureProcessorOptions;
+  selection_feature_options:libtextclassifier3.FeatureProcessorOptions;
+  classification_feature_options:libtextclassifier3.FeatureProcessorOptions;
 
   // Tensorflow Lite models.
   selection_model:[ubyte] (force_align: 16);
@@ -314,31 +325,37 @@
   embedding_model:[ubyte] (force_align: 16);
 
   // Options for the different models.
-  selection_options:libtextclassifier2.SelectionModelOptions;
+  selection_options:libtextclassifier3.SelectionModelOptions;
 
-  classification_options:libtextclassifier2.ClassificationModelOptions;
-  regex_model:libtextclassifier2.RegexModel;
-  datetime_model:libtextclassifier2.DatetimeModel;
+  classification_options:libtextclassifier3.ClassificationModelOptions;
+  regex_model:libtextclassifier3.RegexModel;
+  datetime_model:libtextclassifier3.DatetimeModel;
 
   // Options controlling the output of the models.
-  triggering_options:libtextclassifier2.ModelTriggeringOptions;
+  triggering_options:libtextclassifier3.ModelTriggeringOptions;
 
   // Global switch that controls if SuggestSelection(), ClassifyText() and
   // Annotate() will run. If a mode is disabled it returns empty/no-op results.
-  enabled_modes:libtextclassifier2.ModeFlag = ALL;
+  enabled_modes:libtextclassifier3.ModeFlag = ALL;
 
   // If true, will snap the selections that consist only of whitespaces to the
   // containing suggested span. Otherwise, no suggestion is proposed, since the
   // selections are not part of any token.
-  snap_whitespace_selections:bool = 1;
+  snap_whitespace_selections:bool = true;
 
   // Global configuration for the output of SuggestSelection(), ClassifyText()
   // and Annotate().
-  output_options:libtextclassifier2.OutputOptions;
+  output_options:libtextclassifier3.OutputOptions;
+
+  // Configures how Intents should be generated on Android.
+  // TODO(smillius): Remove deprecated factory options.
+  android_intent_options:libtextclassifier3.AndroidIntentFactoryOptions;
+
+  intent_options:libtextclassifier3.IntentFactoryModel;
 }
 
 // Role of the codepoints in the range.
-namespace libtextclassifier2.TokenizationCodepointRange_;
+namespace libtextclassifier3.TokenizationCodepointRange_;
 enum Role : int {
   // Concatenates the codepoint to the current run of codepoints.
   DEFAULT_ROLE = 0,
@@ -363,11 +380,11 @@
 }
 
 // Represents a codepoint range [start, end) with its role for tokenization.
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table TokenizationCodepointRange {
   start:int;
   end:int;
-  role:libtextclassifier2.TokenizationCodepointRange_.Role;
+  role:libtextclassifier3.TokenizationCodepointRange_.Role;
 
   // Integer identifier of the script this range denotes. Negative values are
   // reserved for Tokenizer's internal use.
@@ -375,7 +392,7 @@
 }
 
 // Method for selecting the center token.
-namespace libtextclassifier2.FeatureProcessorOptions_;
+namespace libtextclassifier3.FeatureProcessorOptions_;
 enum CenterTokenSelectionMethod : int {
   DEFAULT_CENTER_TOKEN_METHOD = 0,
 
@@ -388,7 +405,7 @@
 }
 
 // Controls the type of tokenization the model will use for the input text.
-namespace libtextclassifier2.FeatureProcessorOptions_;
+namespace libtextclassifier3.FeatureProcessorOptions_;
 enum TokenizationType : int {
   INVALID_TOKENIZATION_TYPE = 0,
 
@@ -405,14 +422,14 @@
 }
 
 // Range of codepoints start - end, where end is exclusive.
-namespace libtextclassifier2.FeatureProcessorOptions_;
+namespace libtextclassifier3.FeatureProcessorOptions_;
 table CodepointRange {
   start:int;
   end:int;
 }
 
 // Bounds-sensitive feature extraction configuration.
-namespace libtextclassifier2.FeatureProcessorOptions_;
+namespace libtextclassifier3.FeatureProcessorOptions_;
 table BoundsSensitiveFeatures {
   // Enables the extraction of bounds-sensitive features, instead of the click
   // context features.
@@ -445,13 +462,7 @@
   score_single_token_spans_as_zero:bool;
 }
 
-namespace libtextclassifier2.FeatureProcessorOptions_;
-table AlternativeCollectionMapEntry {
-  key:string;
-  value:string;
-}
-
-namespace libtextclassifier2;
+namespace libtextclassifier3;
 table FeatureProcessorOptions {
   // Number of buckets used for hashing charactergrams.
   num_buckets:int = -1;
@@ -479,20 +490,20 @@
   max_word_length:int = 20;
 
   // If true, will use the unicode-aware functionality for extracting features.
-  unicode_aware_features:bool = 0;
+  unicode_aware_features:bool = false;
 
   // Whether to extract the token case feature.
-  extract_case_feature:bool = 0;
+  extract_case_feature:bool = false;
 
   // Whether to extract the selection mask feature.
-  extract_selection_mask_feature:bool = 0;
+  extract_selection_mask_feature:bool = false;
 
   // List of regexps to run over each token. For each regexp, if there is a
   // match, a dense feature of 1.0 is emitted. Otherwise -1.0 is used.
   regexp_feature:[string];
 
   // Whether to remap all digits to a single number.
-  remap_digits:bool = 0;
+  remap_digits:bool = false;
 
   // Whether to lower-case each token before generating hashgrams.
   lowercase_tokens:bool;
@@ -504,7 +515,7 @@
   // infeasible ones.
   // NOTE: Exists mainly for compatibility with older models that were trained
   // with the non-reduced output space.
-  selection_reduced_output_space:bool = 1;
+  selection_reduced_output_space:bool = true;
 
   // Collection names.
   collections:[string];
@@ -515,29 +526,29 @@
 
   // If true, will split the input by lines, and only use the line that contains
   // the clicked token.
-  only_use_line_with_click:bool = 0;
+  only_use_line_with_click:bool = false;
 
   // If true, will split tokens that contain the selection boundary, at the
   // position of the boundary.
   // E.g. "foo{bar}@google.com" -> "foo", "bar", "@google.com"
-  split_tokens_on_selection_boundaries:bool = 0;
+  split_tokens_on_selection_boundaries:bool = false;
 
   // Codepoint ranges that determine how different codepoints are tokenized.
   // The ranges must not overlap.
-  tokenization_codepoint_config:[libtextclassifier2.TokenizationCodepointRange];
+  tokenization_codepoint_config:[libtextclassifier3.TokenizationCodepointRange];
 
-  center_token_selection_method:libtextclassifier2.FeatureProcessorOptions_.CenterTokenSelectionMethod;
+  center_token_selection_method:libtextclassifier3.FeatureProcessorOptions_.CenterTokenSelectionMethod;
 
   // If true, span boundaries will be snapped to containing tokens and not
   // required to exactly match token boundaries.
   snap_label_span_boundaries_to_containing_tokens:bool;
 
   // A set of codepoint ranges supported by the model.
-  supported_codepoint_ranges:[libtextclassifier2.FeatureProcessorOptions_.CodepointRange];
+  supported_codepoint_ranges:[libtextclassifier3.FeatureProcessorOptions_.CodepointRange];
 
   // A set of codepoint ranges to use in the mixed tokenization mode to identify
   // stretches of tokens to re-tokenize using the internal tokenizer.
-  internal_tokenizer_codepoint_ranges:[libtextclassifier2.FeatureProcessorOptions_.CodepointRange];
+  internal_tokenizer_codepoint_ranges:[libtextclassifier3.FeatureProcessorOptions_.CodepointRange];
 
   // Minimum ratio of supported codepoints in the input context. If the ratio
   // is lower than this, the feature computation will fail.
@@ -553,14 +564,14 @@
   // to it. So the resulting feature vector has two regions.
   feature_version:int = 0;
 
-  tokenization_type:libtextclassifier2.FeatureProcessorOptions_.TokenizationType = INTERNAL_TOKENIZER;
-  icu_preserve_whitespace_tokens:bool = 0;
+  tokenization_type:libtextclassifier3.FeatureProcessorOptions_.TokenizationType = INTERNAL_TOKENIZER;
+  icu_preserve_whitespace_tokens:bool = false;
 
   // List of codepoints that will be stripped from beginning and end of
   // predicted spans.
   ignored_span_boundary_codepoints:[int];
 
-  bounds_sensitive_features:libtextclassifier2.FeatureProcessorOptions_.BoundsSensitiveFeatures;
+  bounds_sensitive_features:libtextclassifier3.FeatureProcessorOptions_.BoundsSensitiveFeatures;
 
   // List of allowed charactergrams. The extracted charactergrams are filtered
   // using this list, and charactergrams that are not present are interpreted as
@@ -571,7 +582,7 @@
 
   // If true, tokens will be also split when the codepoint's script_id changes
   // as defined in TokenizationCodepointRange.
-  tokenize_on_script_change:bool = 0;
+  tokenize_on_script_change:bool = false;
 }
 
-root_type libtextclassifier2.Model;
+root_type libtextclassifier3.Model;

diff --git a/quantization.cc b/annotator/quantization.cc
similarity index 92%
rename from quantization.cc
rename to annotator/quantization.cc
index 1a34565..2cf11c5 100644
--- a/quantization.cc
+++ b/annotator/quantization.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "quantization.h"
+#include "annotator/quantization.h"
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 float DequantizeValue(int num_sparse_features, int quantization_bias,
                       float multiplier, int value) {
@@ -82,11 +82,11 @@
                       num_sparse_features, quantization_bits, bucket_id, dest,
                       dest_size);
   } else {
-    TC_LOG(ERROR) << "Unsupported quantization_bits: " << quantization_bits;
+    TC3_LOG(ERROR) << "Unsupported quantization_bits: " << quantization_bits;
     return false;
   }
 
   return true;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/quantization.h b/annotator/quantization.h
similarity index 79%
rename from quantization.h
rename to annotator/quantization.h
index c486640..d294f37 100644
--- a/quantization.h
+++ b/annotator/quantization.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_QUANTIZATION_H_
-#define LIBTEXTCLASSIFIER_QUANTIZATION_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_QUANTIZATION_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_QUANTIZATION_H_
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Returns true if the quantization parameters are valid.
 bool CheckQuantizationParams(int bytes_per_embedding, int quantization_bits,
@@ -34,6 +34,6 @@
                    int quantization_bits, int bucket_id, float* dest,
                    int dest_size);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_QUANTIZATION_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_QUANTIZATION_H_

diff --git a/quantization_test.cc b/annotator/quantization_test.cc
similarity index 96%
rename from quantization_test.cc
rename to annotator/quantization_test.cc
index 088daaf..b995096 100644
--- a/quantization_test.cc
+++ b/annotator/quantization_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "quantization.h"
+#include "annotator/quantization.h"
 
 #include <vector>
 
@@ -25,7 +25,7 @@
 using testing::FloatEq;
 using testing::Matcher;
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
@@ -160,4 +160,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/strip-unpaired-brackets.cc b/annotator/strip-unpaired-brackets.cc
similarity index 89%
rename from strip-unpaired-brackets.cc
rename to annotator/strip-unpaired-brackets.cc
index ddf3322..b1067ad 100644
--- a/strip-unpaired-brackets.cc
+++ b/annotator/strip-unpaired-brackets.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "strip-unpaired-brackets.h"
+#include "annotator/strip-unpaired-brackets.h"
 
 #include <iterator>
 
-#include "util/base/logging.h"
-#include "util/utf8/unicodetext.h"
+#include "utils/base/logging.h"
+#include "utils/utf8/unicodetext.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 // Returns true if given codepoint is contained in the given span in context.
@@ -94,12 +94,12 @@
 
   // Should not happen, but let's make sure.
   if (span.first > span.second) {
-    TC_LOG(WARNING) << "Inverse indices result: " << span.first << ", "
-                    << span.second;
+    TC3_LOG(WARNING) << "Inverse indices result: " << span.first << ", "
+                     << span.second;
     span.second = span.first;
   }
 
   return span;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/strip-unpaired-brackets.h b/annotator/strip-unpaired-brackets.h
similarity index 74%
rename from strip-unpaired-brackets.h
rename to annotator/strip-unpaired-brackets.h
index 4e82c3e..ceb8d60 100644
--- a/strip-unpaired-brackets.h
+++ b/annotator/strip-unpaired-brackets.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_STRIP_UNPAIRED_BRACKETS_H_
-#define LIBTEXTCLASSIFIER_STRIP_UNPAIRED_BRACKETS_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_STRIP_UNPAIRED_BRACKETS_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_STRIP_UNPAIRED_BRACKETS_H_
 
 #include <string>
 
-#include "types.h"
-#include "util/utf8/unilib.h"
+#include "annotator/types.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 // If the first or the last codepoint of the given span is a bracket, the
 // bracket is stripped if the span does not contain its corresponding paired
 // version.
@@ -33,6 +33,6 @@
 CodepointSpan StripUnpairedBrackets(const UnicodeText& context_unicode,
                                     CodepointSpan span, const UniLib& unilib);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_STRIP_UNPAIRED_BRACKETS_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_STRIP_UNPAIRED_BRACKETS_H_

diff --git a/strip-unpaired-brackets_test.cc b/annotator/strip-unpaired-brackets_test.cc
similarity index 76%
rename from strip-unpaired-brackets_test.cc
rename to annotator/strip-unpaired-brackets_test.cc
index 5362500..32585ce 100644
--- a/strip-unpaired-brackets_test.cc
+++ b/annotator/strip-unpaired-brackets_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,48 +14,53 @@
  * limitations under the License.
  */
 
-#include "strip-unpaired-brackets.h"
+#include "annotator/strip-unpaired-brackets.h"
 
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
-TEST(StripUnpairedBracketsTest, StripUnpairedBrackets) {
-  CREATE_UNILIB_FOR_TESTING
+class StripUnpairedBracketsTest : public ::testing::Test {
+ protected:
+  StripUnpairedBracketsTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+TEST_F(StripUnpairedBracketsTest, StripUnpairedBrackets) {
   // If the brackets match, nothing gets stripped.
-  EXPECT_EQ(StripUnpairedBrackets("call me (123) 456 today", {8, 17}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me (123) 456 today", {8, 17}, unilib_),
             std::make_pair(8, 17));
-  EXPECT_EQ(StripUnpairedBrackets("call me (123 456) today", {8, 17}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456) today", {8, 17}, unilib_),
             std::make_pair(8, 17));
 
   // If the brackets don't match, they get stripped.
-  EXPECT_EQ(StripUnpairedBrackets("call me (123 456 today", {8, 16}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456 today", {8, 16}, unilib_),
             std::make_pair(9, 16));
-  EXPECT_EQ(StripUnpairedBrackets("call me )123 456 today", {8, 16}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me )123 456 today", {8, 16}, unilib_),
             std::make_pair(9, 16));
-  EXPECT_EQ(StripUnpairedBrackets("call me 123 456) today", {8, 16}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456) today", {8, 16}, unilib_),
             std::make_pair(8, 15));
-  EXPECT_EQ(StripUnpairedBrackets("call me 123 456( today", {8, 16}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456( today", {8, 16}, unilib_),
             std::make_pair(8, 15));
 
   // Strips brackets correctly from length-1 selections that consist of
   // a bracket only.
-  EXPECT_EQ(StripUnpairedBrackets("call me at ) today", {11, 12}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me at ) today", {11, 12}, unilib_),
             std::make_pair(12, 12));
-  EXPECT_EQ(StripUnpairedBrackets("call me at ( today", {11, 12}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me at ( today", {11, 12}, unilib_),
             std::make_pair(12, 12));
 
   // Handles invalid spans gracefully.
-  EXPECT_EQ(StripUnpairedBrackets("call me at  today", {11, 11}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("call me at  today", {11, 11}, unilib_),
             std::make_pair(11, 11));
-  EXPECT_EQ(StripUnpairedBrackets("hello world", {0, 0}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {0, 0}, unilib_),
             std::make_pair(0, 0));
-  EXPECT_EQ(StripUnpairedBrackets("hello world", {11, 11}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {11, 11}, unilib_),
             std::make_pair(11, 11));
-  EXPECT_EQ(StripUnpairedBrackets("hello world", {-1, -1}, unilib),
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {-1, -1}, unilib_),
             std::make_pair(-1, -1));
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/test_data/test_model.fb b/annotator/test_data/test_model.fb
similarity index 82%
rename from test_data/test_model.fb
rename to annotator/test_data/test_model.fb
index c651bdb..f25b950 100644
--- a/test_data/test_model.fb
+++ b/annotator/test_data/test_model.fb
Binary files differ

diff --git a/test_data/test_model_cc.fb b/annotator/test_data/test_model_cc.fb
similarity index 83%
rename from test_data/test_model_cc.fb
rename to annotator/test_data/test_model_cc.fb
index 53af6bf..cfe10cf 100644
--- a/test_data/test_model_cc.fb
+++ b/annotator/test_data/test_model_cc.fb
Binary files differ

diff --git a/annotator/test_data/wrong_embeddings.fb b/annotator/test_data/wrong_embeddings.fb
new file mode 100644
index 0000000..7e990ed
--- /dev/null
+++ b/annotator/test_data/wrong_embeddings.fb
Binary files differ

diff --git a/token-feature-extractor.cc b/annotator/token-feature-extractor.cc
similarity index 94%
rename from token-feature-extractor.cc
rename to annotator/token-feature-extractor.cc
index 13fba30..77ad7a4 100644
--- a/token-feature-extractor.cc
+++ b/annotator/token-feature-extractor.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#include "token-feature-extractor.h"
+#include "annotator/token-feature-extractor.h"
 
 #include <cctype>
 #include <string>
 
-#include "util/base/logging.h"
-#include "util/hash/farmhash.h"
-#include "util/strings/stringpiece.h"
-#include "util/utf8/unicodetext.h"
+#include "utils/base/logging.h"
+#include "utils/hash/farmhash.h"
+#include "utils/strings/stringpiece.h"
+#include "utils/utf8/unicodetext.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace {
 
@@ -58,11 +58,11 @@
   remapped->clear();
   for (auto it = word.begin(); it != word.end(); ++it) {
     if (options.remap_digits && unilib.IsDigit(*it)) {
-      remapped->AppendCodepoint('0');
+      remapped->push_back('0');
     } else if (options.lowercase_tokens) {
-      remapped->AppendCodepoint(unilib.ToLower(*it));
+      remapped->push_back(unilib.ToLower(*it));
     } else {
-      remapped->AppendCodepoint(*it);
+      remapped->push_back(*it);
     }
   }
 }
@@ -160,7 +160,7 @@
 
 int TokenFeatureExtractor::HashToken(StringPiece token) const {
   if (options_.allowed_chargrams.empty()) {
-    return tc2farmhash::Fingerprint64(token) % options_.num_buckets;
+    return tc3farmhash::Fingerprint64(token) % options_.num_buckets;
   } else {
     // Padding and out-of-vocabulary tokens have extra buckets reserved because
     // they are special and important tokens, and we don't want them to share
@@ -174,7 +174,7 @@
                options_.allowed_chargrams.end()) {
       return 0;  // Out-of-vocabulary.
     } else {
-      return (tc2farmhash::Fingerprint64(token) %
+      return (tc3farmhash::Fingerprint64(token) %
               (options_.num_buckets - kNumExtraBuckets)) +
              kNumExtraBuckets;
     }
@@ -308,4 +308,4 @@
   return result;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/token-feature-extractor.h b/annotator/token-feature-extractor.h
similarity index 89%
rename from token-feature-extractor.h
rename to annotator/token-feature-extractor.h
index fee1355..7dc19fe 100644
--- a/token-feature-extractor.h
+++ b/annotator/token-feature-extractor.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_TOKEN_FEATURE_EXTRACTOR_H_
-#define LIBTEXTCLASSIFIER_TOKEN_FEATURE_EXTRACTOR_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TOKEN_FEATURE_EXTRACTOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TOKEN_FEATURE_EXTRACTOR_H_
 
 #include <memory>
 #include <unordered_set>
 #include <vector>
 
-#include "types.h"
-#include "util/strings/stringpiece.h"
-#include "util/utf8/unilib.h"
+#include "annotator/types.h"
+#include "utils/strings/stringpiece.h"
+#include "utils/utf8/unilib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 struct TokenFeatureExtractorOptions {
   // Number of buckets used for hashing charactergrams.
@@ -110,6 +110,6 @@
   const UniLib& unilib_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TOKEN_FEATURE_EXTRACTOR_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TOKEN_FEATURE_EXTRACTOR_H_

diff --git a/token-feature-extractor_test.cc b/annotator/token-feature-extractor_test.cc
similarity index 88%
rename from token-feature-extractor_test.cc
rename to annotator/token-feature-extractor_test.cc
index 4b7e011..32383a9 100644
--- a/token-feature-extractor_test.cc
+++ b/annotator/token-feature-extractor_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,29 +14,34 @@
  * limitations under the License.
  */
 
-#include "token-feature-extractor.h"
+#include "annotator/token-feature-extractor.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
+class TokenFeatureExtractorTest : public ::testing::Test {
+ protected:
+  TokenFeatureExtractorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
 class TestingTokenFeatureExtractor : public TokenFeatureExtractor {
  public:
   using TokenFeatureExtractor::HashToken;
   using TokenFeatureExtractor::TokenFeatureExtractor;
 };
 
-TEST(TokenFeatureExtractorTest, ExtractAscii) {
+TEST_F(TokenFeatureExtractorTest, ExtractAscii) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2, 3};
   options.extract_case_feature = true;
   options.unicode_aware_features = false;
   options.extract_selection_mask_feature = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -99,15 +104,14 @@
   EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
 }
 
-TEST(TokenFeatureExtractorTest, ExtractAsciiNoChargrams) {
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiNoChargrams) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{};
   options.extract_case_feature = true;
   options.unicode_aware_features = false;
   options.extract_selection_mask_feature = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -129,15 +133,14 @@
   EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
 }
 
-TEST(TokenFeatureExtractorTest, ExtractUnicode) {
+TEST_F(TokenFeatureExtractorTest, ExtractUnicode) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2, 3};
   options.extract_case_feature = true;
   options.unicode_aware_features = true;
   options.extract_selection_mask_feature = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -200,15 +203,14 @@
   EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
 }
 
-TEST(TokenFeatureExtractorTest, ExtractUnicodeNoChargrams) {
+TEST_F(TokenFeatureExtractorTest, ExtractUnicodeNoChargrams) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{};
   options.extract_case_feature = true;
   options.unicode_aware_features = true;
   options.extract_selection_mask_feature = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -231,16 +233,15 @@
   EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
 }
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(TokenFeatureExtractorTest, ICUCaseFeature) {
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, ICUCaseFeature) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
   options.extract_case_feature = true;
   options.unicode_aware_features = true;
   options.extract_selection_mask_feature = false;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -268,14 +269,13 @@
 }
 #endif
 
-TEST(TokenFeatureExtractorTest, DigitRemapping) {
+TEST_F(TokenFeatureExtractorTest, DigitRemapping) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
   options.remap_digits = true;
   options.unicode_aware_features = false;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -293,14 +293,13 @@
               testing::Not(testing::ElementsAreArray(sparse_features2)));
 }
 
-TEST(TokenFeatureExtractorTest, DigitRemappingUnicode) {
+TEST_F(TokenFeatureExtractorTest, DigitRemappingUnicode) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
   options.remap_digits = true;
   options.unicode_aware_features = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -318,14 +317,13 @@
               testing::Not(testing::ElementsAreArray(sparse_features2)));
 }
 
-TEST(TokenFeatureExtractorTest, LowercaseAscii) {
+TEST_F(TokenFeatureExtractorTest, LowercaseAscii) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
   options.lowercase_tokens = true;
   options.unicode_aware_features = false;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -342,15 +340,14 @@
   EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
 }
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(TokenFeatureExtractorTest, LowercaseUnicode) {
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, LowercaseUnicode) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
   options.lowercase_tokens = true;
   options.unicode_aware_features = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -363,8 +360,8 @@
 }
 #endif
 
-#ifdef LIBTEXTCLASSIFIER_TEST_ICU
-TEST(TokenFeatureExtractorTest, RegexFeatures) {
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, RegexFeatures) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
@@ -372,8 +369,7 @@
   options.unicode_aware_features = false;
   options.regexp_features.push_back("^[a-z]+$");  // all lower case.
   options.regexp_features.push_back("^[0-9]+$");  // all digits.
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -398,15 +394,14 @@
 }
 #endif
 
-TEST(TokenFeatureExtractorTest, ExtractTooLongWord) {
+TEST_F(TokenFeatureExtractorTest, ExtractTooLongWord) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{22};
   options.extract_case_feature = true;
   options.unicode_aware_features = true;
   options.extract_selection_mask_feature = true;
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   // Test that this runs. ASAN should catch problems.
   std::vector<int> sparse_features;
@@ -423,7 +418,7 @@
               }));
 }
 
-TEST(TokenFeatureExtractorTest, ExtractAsciiUnicodeMatches) {
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiUnicodeMatches) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2, 3, 4, 5};
@@ -431,11 +426,10 @@
   options.unicode_aware_features = true;
   options.extract_selection_mask_feature = true;
 
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor_unicode(options, unilib);
+  TestingTokenFeatureExtractor extractor_unicode(options, unilib_);
 
   options.unicode_aware_features = false;
-  TestingTokenFeatureExtractor extractor_ascii(options, unilib);
+  TestingTokenFeatureExtractor extractor_ascii(options, unilib_);
 
   for (const std::string& input :
        {"https://www.abcdefgh.com/in/xxxkkkvayio",
@@ -458,7 +452,7 @@
   }
 }
 
-TEST(TokenFeatureExtractorTest, ExtractForPadToken) {
+TEST_F(TokenFeatureExtractorTest, ExtractForPadToken) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2};
@@ -466,8 +460,7 @@
   options.unicode_aware_features = false;
   options.extract_selection_mask_feature = true;
 
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -479,7 +472,7 @@
   EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
 }
 
-TEST(TokenFeatureExtractorTest, ExtractFiltered) {
+TEST_F(TokenFeatureExtractorTest, ExtractFiltered) {
   TokenFeatureExtractorOptions options;
   options.num_buckets = 1000;
   options.chargram_orders = std::vector<int>{1, 2, 3};
@@ -493,8 +486,7 @@
   options.allowed_chargrams.insert("!");
   options.allowed_chargrams.insert("\xc4");  // UTF8 control character.
 
-  CREATE_UNILIB_FOR_TESTING
-  TestingTokenFeatureExtractor extractor(options, unilib);
+  TestingTokenFeatureExtractor extractor(options, unilib_);
 
   std::vector<int> sparse_features;
   std::vector<float> dense_features;
@@ -561,4 +553,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/tokenizer.cc b/annotator/tokenizer.cc
similarity index 94%
rename from tokenizer.cc
rename to annotator/tokenizer.cc
index 722a67b..099dccc 100644
--- a/tokenizer.cc
+++ b/annotator/tokenizer.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "tokenizer.h"
+#include "annotator/tokenizer.h"
 
 #include <algorithm>
 
-#include "util/base/logging.h"
-#include "util/strings/utf8.h"
+#include "utils/base/logging.h"
+#include "utils/strings/utf8.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 Tokenizer::Tokenizer(
     const std::vector<const TokenizationCodepointRange*>& codepoint_ranges,
@@ -123,4 +123,4 @@
   return result;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/tokenizer.h b/annotator/tokenizer.h
similarity index 82%
rename from tokenizer.h
rename to annotator/tokenizer.h
index 2524e12..ec33f2d 100644
--- a/tokenizer.h
+++ b/annotator/tokenizer.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_TOKENIZER_H_
-#define LIBTEXTCLASSIFIER_TOKENIZER_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TOKENIZER_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TOKENIZER_H_
 
 #include <string>
 #include <vector>
 
-#include "model_generated.h"
-#include "types.h"
-#include "util/base/integral_types.h"
-#include "util/utf8/unicodetext.h"
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
+#include "utils/base/integral_types.h"
+#include "utils/utf8/unicodetext.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 const int kInvalidScript = -1;
 const int kUnknownScript = -2;
@@ -66,6 +66,6 @@
   bool split_on_script_change_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TOKENIZER_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TOKENIZER_H_

diff --git a/tokenizer_test.cc b/annotator/tokenizer_test.cc
similarity index 98%
rename from tokenizer_test.cc
rename to annotator/tokenizer_test.cc
index 65072f3..a3ab9da 100644
--- a/tokenizer_test.cc
+++ b/annotator/tokenizer_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "tokenizer.h"
+#include "annotator/tokenizer.h"
 
 #include <vector>
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 using testing::ElementsAreArray;
@@ -331,4 +331,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/types-test-util.h b/annotator/types-test-util.h
similarity index 77%
rename from types-test-util.h
rename to annotator/types-test-util.h
index 1679e7c..fbbdd63 100644
--- a/types-test-util.h
+++ b/annotator/types-test-util.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_TYPES_TEST_UTIL_H_
-#define LIBTEXTCLASSIFIER_TYPES_TEST_UTIL_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
 
 #include <ostream>
 
-#include "types.h"
-#include "util/base/logging.h"
+#include "annotator/types.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 inline std::ostream& operator<<(std::ostream& stream, const Token& value) {
   logging::LoggingStringStream tmp_stream;
@@ -44,6 +44,6 @@
   return stream << tmp_stream.message;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TYPES_TEST_UTIL_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_

diff --git a/types.h b/annotator/types.h
similarity index 94%
rename from types.h
rename to annotator/types.h
index b2f624d..38bce41 100644
--- a/types.h
+++ b/annotator/types.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,21 +14,23 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_TYPES_H_
-#define LIBTEXTCLASSIFIER_TYPES_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_
 
 #include <algorithm>
 #include <cmath>
 #include <functional>
+#include <map>
 #include <set>
 #include <string>
 #include <utility>
 #include <vector>
-#include "util/base/integral_types.h"
 
-#include "util/base/logging.h"
+#include "utils/base/integral_types.h"
+#include "utils/base/logging.h"
+#include "utils/variant.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 constexpr int kInvalidIndex = -1;
 
@@ -221,10 +223,14 @@
   std::string collection;
   float score;
   DatetimeParseResult datetime_parse_result;
+  std::string serialized_knowledge_result;
 
   // Internal score used for conflict resolution.
   float priority_score;
 
+  // Extra information.
+  std::map<std::string, Variant> extra;
+
   explicit ClassificationResult() : score(-1.0f), priority_score(-1.0) {}
 
   ClassificationResult(const std::string& arg_collection, float arg_score)
@@ -318,13 +324,13 @@
   };
 
   enum RelationType {
-    MONDAY = 1,
-    TUESDAY = 2,
-    WEDNESDAY = 3,
-    THURSDAY = 4,
-    FRIDAY = 5,
-    SATURDAY = 6,
-    SUNDAY = 7,
+    SUNDAY = 1,
+    MONDAY = 2,
+    TUESDAY = 3,
+    WEDNESDAY = 4,
+    THURSDAY = 5,
+    FRIDAY = 6,
+    SATURDAY = 7,
     DAY = 8,
     WEEK = 9,
     MONTH = 10,
@@ -391,6 +397,6 @@
   int relation_distance;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TYPES_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_H_

diff --git a/zlib-utils.cc b/annotator/zlib-utils.cc
similarity index 90%
rename from zlib-utils.cc
rename to annotator/zlib-utils.cc
index 7e6646f..d0fb0d0 100644
--- a/zlib-utils.cc
+++ b/annotator/zlib-utils.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#include "zlib-utils.h"
+#include "annotator/zlib-utils.h"
 
 #include <memory>
 
-#include "util/base/logging.h"
-#include "util/flatbuffers.h"
+#include "utils/base/logging.h"
+#include "utils/flatbuffers.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 std::unique_ptr<ZlibDecompressor> ZlibDecompressor::Instance() {
   std::unique_ptr<ZlibDecompressor> result(new ZlibDecompressor());
@@ -112,7 +112,7 @@
 bool CompressModel(ModelT* model) {
   std::unique_ptr<ZlibCompressor> zlib_compressor = ZlibCompressor::Instance();
   if (!zlib_compressor) {
-    TC_LOG(ERROR) << "Cannot compress model.";
+    TC3_LOG(ERROR) << "Cannot compress model.";
     return false;
   }
 
@@ -156,6 +156,9 @@
 bool DecompressBuffer(const CompressedBufferT* compressed_pattern,
                       ZlibDecompressor* zlib_decompressor,
                       std::string* uncompressed_pattern) {
+  if (!compressed_pattern) {
+    return true;
+  }
   std::string packed_pattern =
       PackFlatbuffer<CompressedBuffer>(compressed_pattern);
   if (!zlib_decompressor->Decompress(
@@ -172,7 +175,7 @@
   std::unique_ptr<ZlibDecompressor> zlib_decompressor =
       ZlibDecompressor::Instance();
   if (!zlib_decompressor) {
-    TC_LOG(ERROR) << "Cannot initialize decompressor.";
+    TC3_LOG(ERROR) << "Cannot initialize decompressor.";
     return false;
   }
 
@@ -182,7 +185,7 @@
       RegexModel_::PatternT* pattern = model->regex_model->patterns[i].get();
       if (!DecompressBuffer(pattern->compressed_pattern.get(),
                             zlib_decompressor.get(), &pattern->pattern)) {
-        TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
+        TC3_LOG(ERROR) << "Cannot decompress pattern: " << i;
         return false;
       }
       pattern->compressed_pattern.reset(nullptr);
@@ -197,7 +200,7 @@
         DatetimeModelPattern_::RegexT* regex = pattern->regexes[j].get();
         if (!DecompressBuffer(regex->compressed_pattern.get(),
                               zlib_decompressor.get(), &regex->pattern)) {
-          TC_LOG(ERROR) << "Cannot decompress pattern: " << i << " " << j;
+          TC3_LOG(ERROR) << "Cannot decompress pattern: " << i << " " << j;
           return false;
         }
         regex->compressed_pattern.reset(nullptr);
@@ -208,7 +211,7 @@
           model->datetime_model->extractors[i].get();
       if (!DecompressBuffer(extractor->compressed_pattern.get(),
                             zlib_decompressor.get(), &extractor->pattern)) {
-        TC_LOG(ERROR) << "Cannot decompress pattern: " << i;
+        TC3_LOG(ERROR) << "Cannot decompress pattern: " << i;
         return false;
       }
       extractor->compressed_pattern.reset(nullptr);
@@ -219,8 +222,8 @@
 
 std::string CompressSerializedModel(const std::string& model) {
   std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str());
-  TC_CHECK(unpacked_model != nullptr);
-  TC_CHECK(CompressModel(unpacked_model.get()));
+  TC3_CHECK(unpacked_model != nullptr);
+  TC3_CHECK(CompressModel(unpacked_model.get()));
   flatbuffers::FlatBufferBuilder builder;
   FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
   return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -237,7 +240,7 @@
       compressed_pattern->buffer() != nullptr) {
     if (decompressor == nullptr ||
         !decompressor->Decompress(compressed_pattern, &decompressed_pattern)) {
-      TC_LOG(ERROR) << "Cannot decompress pattern.";
+      TC3_LOG(ERROR) << "Cannot decompress pattern.";
       return nullptr;
     }
     unicode_regex_pattern =
@@ -245,7 +248,7 @@
                           decompressed_pattern.size(), /*do_copy=*/false);
   } else {
     if (uncompressed_pattern == nullptr) {
-      TC_LOG(ERROR) << "Cannot load uncompressed pattern.";
+      TC3_LOG(ERROR) << "Cannot load uncompressed pattern.";
       return nullptr;
     }
     unicode_regex_pattern =
@@ -260,10 +263,10 @@
   std::unique_ptr<UniLib::RegexPattern> regex_pattern =
       unilib.CreateRegexPattern(unicode_regex_pattern);
   if (!regex_pattern) {
-    TC_LOG(ERROR) << "Could not create pattern: "
-                  << unicode_regex_pattern.ToUTF8String();
+    TC3_LOG(ERROR) << "Could not create pattern: "
+                   << unicode_regex_pattern.ToUTF8String();
   }
   return regex_pattern;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/zlib-utils.h b/annotator/zlib-utils.h
similarity index 86%
rename from zlib-utils.h
rename to annotator/zlib-utils.h
index 136f4d2..fbb3479 100644
--- a/zlib-utils.h
+++ b/annotator/zlib-utils.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,16 +16,16 @@
 
 // Functions to compress and decompress low entropy entries in the model.
 
-#ifndef LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
-#define LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_ZLIB_UTILS_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_ZLIB_UTILS_H_
 
 #include <memory>
 
-#include "model_generated.h"
-#include "util/utf8/unilib.h"
+#include "annotator/model_generated.h"
+#include "utils/utf8/unilib.h"
 #include "zlib.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 class ZlibDecompressor {
  public:
@@ -74,6 +74,6 @@
     const CompressedBuffer* compressed_pattern, ZlibDecompressor* decompressor,
     std::string* result_pattern_text = nullptr);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_ZLIB_UTILS_H_
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ZLIB_UTILS_H_

diff --git a/zlib-utils_test.cc b/annotator/zlib-utils_test.cc
similarity index 95%
rename from zlib-utils_test.cc
rename to annotator/zlib-utils_test.cc
index 155f14f..b6399c8 100644
--- a/zlib-utils_test.cc
+++ b/annotator/zlib-utils_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "zlib-utils.h"
+#include "annotator/zlib-utils.h"
 
 #include <memory>
 
-#include "model_generated.h"
+#include "annotator/model_generated.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 TEST(ZlibUtilsTest, CompressModel) {
   ModelT model;
@@ -95,4 +95,4 @@
             "an example datetime extractor");
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/generate_flatbuffers.mk b/generate_flatbuffers.mk
new file mode 100644
index 0000000..e5801dc
--- /dev/null
+++ b/generate_flatbuffers.mk

@@ -0,0 +1,73 @@
+FLATC := $(HOST_OUT_EXECUTABLES)/flatc$(HOST_EXECUTABLE_SUFFIX)
+
+define transform-fbs-to-cpp
+@echo "Flatc: $@ <= $(PRIVATE_INPUT_FBS)"
+@rm -f $@
+@mkdir -p $(dir $@)
+$(FLATC) \
+    --cpp \
+    --no-union-value-namespacing \
+    --gen-object-api \
+    --keep-prefix \
+    -I $(INPUT_DIR) \
+    -o $(dir $@) \
+    $(PRIVATE_INPUT_FBS) \
+    || exit 33
+[ -f $@ ] || exit 33
+endef
+
+intermediates := $(call local-generated-sources-dir)
+
+# Generate utils/intent/intent-config_generated.h using FlatBuffer schema compiler.
+INTENT_CONFIG_FBS := $(LOCAL_PATH)/utils/intents/intent-config.fbs
+INTENT_CONFIG_H := $(intermediates)/utils/intents/intent-config_generated.h
+$(INTENT_CONFIG_H): PRIVATE_INPUT_FBS := $(INTENT_CONFIG_FBS)
+$(INTENT_CONFIG_H): INPUT_DIR := $(LOCAL_PATH)
+$(INTENT_CONFIG_H): $(FLATC) $(INTENT_CONFIG_FBS)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(INTENT_CONFIG_H)
+
+# Generate annotator/model_generated.h using FlatBuffer schema compiler.
+ANNOTATOR_MODEL_FBS := $(LOCAL_PATH)/annotator/model.fbs
+ANNOTATOR_MODEL_H := $(intermediates)/annotator/model_generated.h
+$(ANNOTATOR_MODEL_H): PRIVATE_INPUT_FBS := $(ANNOTATOR_MODEL_FBS)
+$(ANNOTATOR_MODEL_H): INPUT_DIR := $(LOCAL_PATH)
+$(ANNOTATOR_MODEL_H): $(FLATC) $(ANNOTATOR_MODEL_FBS) $(INTENT_CONFIG_H)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(ANNOTATOR_MODEL_H)
+
+# Generate actions/actions_model_generated.h using FlatBuffer schema compiler.
+ACTIONS_MODEL_FBS := $(LOCAL_PATH)/actions/actions_model.fbs
+ACTIONS_MODEL_H := $(intermediates)/actions/actions_model_generated.h
+$(ACTIONS_MODEL_H): PRIVATE_INPUT_FBS := $(ACTIONS_MODEL_FBS)
+$(ACTIONS_MODEL_H): INPUT_DIR := $(LOCAL_PATH)
+$(ACTIONS_MODEL_H): $(FLATC) $(ACTIONS_MODEL_FBS)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(ACTIONS_MODEL_H)
+
+# Generate utils/tflite/text_encoder_config_generated.h using FlatBuffer schema compiler.
+UTILS_TFLITE_TEXT_ENCODER_CONFIG_FBS := $(LOCAL_PATH)/utils/tflite/text_encoder_config.fbs
+UTILS_TFLITE_TEXT_ENCODER_CONFIG_H := $(intermediates)/utils/tflite/text_encoder_config_generated.h
+$(UTILS_TFLITE_TEXT_ENCODER_CONFIG_H): PRIVATE_INPUT_FBS := $(UTILS_TFLITE_TEXT_ENCODER_CONFIG_FBS)
+$(UTILS_TFLITE_TEXT_ENCODER_CONFIG_H): INPUT_DIR := $(LOCAL_PATH)
+$(UTILS_TFLITE_TEXT_ENCODER_CONFIG_H): $(FLATC) $(UTILS_TFLITE_TEXT_ENCODER_CONFIG_FBS)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(UTILS_TFLITE_TEXT_ENCODER_CONFIG_H)
+
+# Generate lang_id/common/flatbuffers/embedding-network_generated.h using FlatBuffer schema compiler.
+LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_FBS := $(LOCAL_PATH)/lang_id/common/flatbuffers/embedding-network.fbs
+LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_H := $(intermediates)/lang_id/common/flatbuffers/embedding-network_generated.h
+$(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_H): PRIVATE_INPUT_FBS := $(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_FBS)
+$(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_H): INPUT_DIR := $(LOCAL_PATH)
+$(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_H): $(FLATC) $(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_FBS)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(LANG_ID_COMMON_FLATBUFFERS_EMBEDDING_NETWORK_H)
+
+# Generate lang_id/common/flatbuffers/model_generated.h using FlatBuffer schema compiler.
+LANG_ID_COMMON_FLATBUFFERS_MODEL_FBS := $(LOCAL_PATH)/lang_id/common/flatbuffers/model.fbs
+LANG_ID_COMMON_FLATBUFFERS_MODEL_H := $(intermediates)/lang_id/common/flatbuffers/model_generated.h
+$(LANG_ID_COMMON_FLATBUFFERS_MODEL_H): PRIVATE_INPUT_FBS := $(LANG_ID_COMMON_FLATBUFFERS_MODEL_FBS)
+$(LANG_ID_COMMON_FLATBUFFERS_MODEL_H): INPUT_DIR := $(LOCAL_PATH)
+$(LANG_ID_COMMON_FLATBUFFERS_MODEL_H): $(FLATC) $(LANG_ID_COMMON_FLATBUFFERS_MODEL_FBS)
+	$(transform-fbs-to-cpp)
+LOCAL_GENERATED_SOURCES += $(LANG_ID_COMMON_FLATBUFFERS_MODEL_H)

diff --git a/java/com/google/android/textclassifier/ActionsSuggestionsModel.java b/java/com/google/android/textclassifier/ActionsSuggestionsModel.java
new file mode 100644
index 0000000..a836e74
--- /dev/null
+++ b/java/com/google/android/textclassifier/ActionsSuggestionsModel.java

@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.android.textclassifier;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Java wrapper for ActionsSuggestions native library interface. This library is used to suggest
+ * actions and replies in a given conversation.
+ *
+ * @hide
+ */
+public final class ActionsSuggestionsModel implements AutoCloseable {
+  private final AtomicBoolean isClosed = new AtomicBoolean(false);
+
+  static {
+    System.loadLibrary("textclassifier");
+  }
+
+  private long actionsModelPtr;
+  private AnnotatorModel annotator;
+
+  /**
+   * Creates a new instance of Actions predictor, using the provided model image, given as a file
+   * descriptor.
+   */
+  public ActionsSuggestionsModel(int fileDescriptor) {
+    this(fileDescriptor, null);
+  }
+
+  public ActionsSuggestionsModel(int fileDescriptor, AnnotatorModel annotator) {
+    actionsModelPtr = nativeNewActionsModel(fileDescriptor);
+    if (actionsModelPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize actions model from file descriptor.");
+    }
+    setAnnotator(annotator);
+  }
+
+  /**
+   * Creates a new instance of Actions predictor, using the provided model image, given as a file
+   * path.
+   */
+  public ActionsSuggestionsModel(String path) {
+    this(path, null);
+  }
+
+  public ActionsSuggestionsModel(String path, AnnotatorModel annotator) {
+    actionsModelPtr = nativeNewActionsModelFromPath(path);
+    if (actionsModelPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize actions model from given file.");
+    }
+    setAnnotator(annotator);
+  }
+
+  /** Suggests actions / replies to the given conversation. */
+  public ActionSuggestion[] suggestActions(
+      Conversation conversation, ActionSuggestionOptions options) {
+    return nativeSuggestActions(actionsModelPtr, conversation, options);
+  }
+
+  /** Frees up the allocated memory. */
+  @Override
+  public void close() {
+    if (isClosed.compareAndSet(false, true)) {
+      nativeCloseActionsModel(actionsModelPtr);
+      actionsModelPtr = 0L;
+    }
+  }
+
+  @Override
+  protected void finalize() throws Throwable {
+    try {
+      close();
+    } finally {
+      super.finalize();
+    }
+  }
+
+  /** Returns a comma separated list of locales supported by the model as BCP 47 tags. */
+  public static String getLocales(int fd) {
+    return nativeGetLocales(fd);
+  }
+
+  /** Returns the version of the model. */
+  public static int getVersion(int fd) {
+    return nativeGetVersion(fd);
+  }
+
+  /** Returns the name of the model. */
+  public static String getName(int fd) {
+    return nativeGetName(fd);
+  }
+
+  /** Action suggestion that contains a response text and the type of the response. */
+  public static final class ActionSuggestion {
+    private final String responseText;
+    private final String actionType;
+    private final float score;
+
+    public ActionSuggestion(String responseText, String actionType, float score) {
+      this.responseText = responseText;
+      this.actionType = actionType;
+      this.score = score;
+    }
+
+    public String getResponseText() {
+      return responseText;
+    }
+
+    public String getActionType() {
+      return actionType;
+    }
+
+    /** Confidence score between 0 and 1 */
+    public float getScore() {
+      return score;
+    }
+  }
+
+  /** Represents a single message in the conversation. */
+  public static final class ConversationMessage {
+    private final int userId;
+    private final String text;
+    private final int timeDiffInSeconds;
+    private final String locales;
+
+    public ConversationMessage(int userId, String text, int timeDiffInSeconds, String locales) {
+      this.userId = userId;
+      this.text = text;
+      this.timeDiffInSeconds = timeDiffInSeconds;
+      this.locales = locales;
+    }
+
+    /** The identifier of the sender */
+    public int getUserId() {
+      return userId;
+    }
+
+    public String getText() {
+      return text;
+    }
+
+    /**
+     * The time difference (in seconds) between the first message of the coversation and this
+     * message, value {@code 0} means unspecified.
+     */
+    public int getTimeDiffInSeconds() {
+      return timeDiffInSeconds;
+    }
+
+    public String getLocales() {
+      return locales;
+    }
+  }
+
+  /** Represents conversation between multiple users. */
+  public static final class Conversation {
+    public final ConversationMessage[] conversationMessages;
+
+    public Conversation(ConversationMessage[] conversationMessages) {
+      this.conversationMessages = conversationMessages;
+    }
+
+    public ConversationMessage[] getConversationMessages() {
+      return conversationMessages;
+    }
+  }
+
+  /** Represents options for the SuggestActions call. */
+  public static final class ActionSuggestionOptions {
+    private final AnnotatorModel.AnnotationOptions annotationOptions;
+
+    public ActionSuggestionOptions() {
+      this.annotationOptions = null;
+    }
+
+    public ActionSuggestionOptions(AnnotatorModel.AnnotationOptions annotationOptions) {
+      this.annotationOptions = annotationOptions;
+    }
+
+    public AnnotatorModel.AnnotationOptions getAnnotationOptions() {
+      return annotationOptions;
+    }
+  }
+
+  /** Sets and annotator to use for actions suggestions. */
+  private void setAnnotator(AnnotatorModel annotator) {
+    this.annotator = annotator;
+    if (annotator != null) {
+      nativeSetAnnotator(annotator.getNativeAnnotator());
+    }
+  }
+
+  private static native long nativeNewActionsModel(int fd);
+
+  private static native long nativeNewActionsModelFromPath(String path);
+
+  private static native String nativeGetLocales(int fd);
+
+  private static native int nativeGetVersion(int fd);
+
+  private static native String nativeGetName(int fd);
+
+  private native ActionSuggestion[] nativeSuggestActions(
+      long context, Conversation conversation, ActionSuggestionOptions options);
+
+  private native void nativeCloseActionsModel(long context);
+
+  private native void nativeSetAnnotator(long annotatorPtr);
+}

diff --git a/java/com/google/android/textclassifier/AnnotatorModel.java b/java/com/google/android/textclassifier/AnnotatorModel.java
new file mode 100644
index 0000000..08a4455
--- /dev/null
+++ b/java/com/google/android/textclassifier/AnnotatorModel.java

@@ -0,0 +1,342 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.android.textclassifier;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Java wrapper for Annotator native library interface. This library is used for detecting entities
+ * in text.
+ *
+ * @hide
+ */
+public final class AnnotatorModel implements AutoCloseable {
+  private final AtomicBoolean isClosed = new AtomicBoolean(false);
+
+  static {
+    System.loadLibrary("textclassifier");
+  }
+
+  // Keep these in sync with the constants defined in AOSP.
+  static final String TYPE_UNKNOWN = "";
+  static final String TYPE_OTHER = "other";
+  static final String TYPE_EMAIL = "email";
+  static final String TYPE_PHONE = "phone";
+  static final String TYPE_ADDRESS = "address";
+  static final String TYPE_URL = "url";
+  static final String TYPE_DATE = "date";
+  static final String TYPE_DATE_TIME = "datetime";
+  static final String TYPE_FLIGHT_NUMBER = "flight";
+
+  private long annotatorPtr;
+
+  /**
+   * Creates a new instance of SmartSelect predictor, using the provided model image, given as a
+   * file descriptor.
+   */
+  public AnnotatorModel(int fileDescriptor) {
+    annotatorPtr = nativeNewAnnotator(fileDescriptor);
+    if (annotatorPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize TC from file descriptor.");
+    }
+  }
+
+  /**
+   * Creates a new instance of SmartSelect predictor, using the provided model image, given as a
+   * file path.
+   */
+  public AnnotatorModel(String path) {
+    annotatorPtr = nativeNewAnnotatorFromPath(path);
+    if (annotatorPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize TC from given file.");
+    }
+  }
+
+  /** Initializes the knowledge engine, passing the given serialized config to it. */
+  public void initializeKnowledgeEngine(byte[] serializedConfig) {
+    if (!nativeInitializeKnowledgeEngine(annotatorPtr, serializedConfig)) {
+      throw new IllegalArgumentException("Couldn't initialize the KG engine");
+    }
+  }
+
+  /**
+   * Given a string context and current selection, computes the selection suggestion.
+   *
+   * <p>The begin and end are character indices into the context UTF8 string. selectionBegin is the
+   * character index where the selection begins, and selectionEnd is the index of one character past
+   * the selection span.
+   *
+   * <p>The return value is an array of two ints: suggested selection beginning and end, with the
+   * same semantics as the input selectionBeginning and selectionEnd.
+   */
+  public int[] suggestSelection(
+      String context, int selectionBegin, int selectionEnd, SelectionOptions options) {
+    return nativeSuggestSelection(annotatorPtr, context, selectionBegin, selectionEnd, options);
+  }
+
+  /**
+   * Given a string context and current selection, classifies the type of the selected text.
+   *
+   * <p>The begin and end params are character indices in the context string.
+   *
+   * <p>Returns an array of ClassificationResult objects with the probability scores for different
+   * collections.
+   */
+  public ClassificationResult[] classifyText(
+      String context, int selectionBegin, int selectionEnd, ClassificationOptions options) {
+    return nativeClassifyText(annotatorPtr, context, selectionBegin, selectionEnd, options);
+  }
+
+  /**
+   * Annotates given input text. The annotations should cover the whole input context except for
+   * whitespaces, and are sorted by their position in the context string.
+   */
+  public AnnotatedSpan[] annotate(String text, AnnotationOptions options) {
+    return nativeAnnotate(annotatorPtr, text, options);
+  }
+
+  /** Frees up the allocated memory. */
+  @Override
+  public void close() {
+    if (isClosed.compareAndSet(false, true)) {
+      nativeCloseAnnotator(annotatorPtr);
+      annotatorPtr = 0L;
+    }
+  }
+
+  @Override
+  protected void finalize() throws Throwable {
+    try {
+      close();
+    } finally {
+      super.finalize();
+    }
+  }
+
+  /** Returns a comma separated list of locales supported by the model as BCP 47 tags. */
+  public static String getLocales(int fd) {
+    return nativeGetLocales(fd);
+  }
+
+  /** Returns the version of the model. */
+  public static int getVersion(int fd) {
+    return nativeGetVersion(fd);
+  }
+
+  /** Returns the name of the model. */
+  public static String getName(int fd) {
+    return nativeGetName(fd);
+  }
+
+  /** Information about a parsed time/date. */
+  public static final class DatetimeResult {
+
+    static final int GRANULARITY_YEAR = 0;
+    static final int GRANULARITY_MONTH = 1;
+    static final int GRANULARITY_WEEK = 2;
+    static final int GRANULARITY_DAY = 3;
+    static final int GRANULARITY_HOUR = 4;
+    static final int GRANULARITY_MINUTE = 5;
+    static final int GRANULARITY_SECOND = 6;
+
+    private final long timeMsUtc;
+    private final int granularity;
+
+    DatetimeResult(long timeMsUtc, int granularity) {
+      this.timeMsUtc = timeMsUtc;
+      this.granularity = granularity;
+    }
+
+    public long getTimeMsUtc() {
+      return timeMsUtc;
+    }
+
+    public int getGranularity() {
+      return granularity;
+    }
+  }
+
+  /** Classification result for classifyText method. */
+  public static final class ClassificationResult {
+    private final String collection;
+    private final float score;
+    private final DatetimeResult datetimeResult;
+    private final byte[] serializedKnowledgeResult;
+
+    public ClassificationResult(
+        String collection,
+        float score,
+        DatetimeResult datetimeResult,
+        byte[] serializedKnowledgeResult) {
+      this.collection = collection;
+      this.score = score;
+      this.datetimeResult = datetimeResult;
+      this.serializedKnowledgeResult = serializedKnowledgeResult;
+    }
+
+    /** Returns the classified entity type. */
+    public String getCollection() {
+      if (TYPE_DATE.equals(collection) && datetimeResult != null) {
+        switch (datetimeResult.getGranularity()) {
+          case DatetimeResult.GRANULARITY_HOUR:
+          case DatetimeResult.GRANULARITY_MINUTE:
+          case DatetimeResult.GRANULARITY_SECOND:
+            return TYPE_DATE_TIME;
+          default:
+            return TYPE_DATE;
+        }
+      }
+      return collection;
+    }
+
+    /** Confidence score between 0 and 1. */
+    public float getScore() {
+      return score;
+    }
+
+    public DatetimeResult getDatetimeResult() {
+      return datetimeResult;
+    }
+
+    byte[] getSerializedKnowledgeResult() {
+      return serializedKnowledgeResult;
+    }
+  }
+
+  /** Represents a result of Annotate call. */
+  public static final class AnnotatedSpan {
+    private final int startIndex;
+    private final int endIndex;
+    private final ClassificationResult[] classification;
+
+    AnnotatedSpan(int startIndex, int endIndex, ClassificationResult[] classification) {
+      this.startIndex = startIndex;
+      this.endIndex = endIndex;
+      this.classification = classification;
+    }
+
+    public int getStartIndex() {
+      return startIndex;
+    }
+
+    public int getEndIndex() {
+      return endIndex;
+    }
+
+    public ClassificationResult[] getClassification() {
+      return classification;
+    }
+  }
+
+  /** Represents options for the suggestSelection call. */
+  public static final class SelectionOptions {
+    private final String locales;
+
+    public SelectionOptions(String locales) {
+      this.locales = locales;
+    }
+
+    public String getLocales() {
+      return locales;
+    }
+  }
+
+  /** Represents options for the classifyText call. */
+  public static final class ClassificationOptions {
+    private final long referenceTimeMsUtc;
+    private final String referenceTimezone;
+    private final String locales;
+
+    public ClassificationOptions(long referenceTimeMsUtc, String referenceTimezone, String locale) {
+      this.referenceTimeMsUtc = referenceTimeMsUtc;
+      this.referenceTimezone = referenceTimezone;
+      this.locales = locale;
+    }
+
+    public long getReferenceTimeMsUtc() {
+      return referenceTimeMsUtc;
+    }
+
+    public String getReferenceTimezone() {
+      return referenceTimezone;
+    }
+
+    public String getLocale() {
+      return locales;
+    }
+  }
+
+  /** Represents options for the annotate call. */
+  public static final class AnnotationOptions {
+    private final long referenceTimeMsUtc;
+    private final String referenceTimezone;
+    private final String locales;
+
+    public AnnotationOptions(long referenceTimeMsUtc, String referenceTimezone, String locale) {
+      this.referenceTimeMsUtc = referenceTimeMsUtc;
+      this.referenceTimezone = referenceTimezone;
+      this.locales = locale;
+    }
+
+    public long getReferenceTimeMsUtc() {
+      return referenceTimeMsUtc;
+    }
+
+    public String getReferenceTimezone() {
+      return referenceTimezone;
+    }
+
+    public String getLocale() {
+      return locales;
+    }
+  }
+
+  /**
+   * Retrieves the pointer to the native object. Note: Need to keep the AnnotatorModel alive as long
+   * as the pointer is used.
+   */
+  long getNativeAnnotator() {
+    return annotatorPtr;
+  }
+
+  private static native long nativeNewAnnotator(int fd);
+
+  private static native long nativeNewAnnotatorFromPath(String path);
+
+  private static native String nativeGetLocales(int fd);
+
+  private static native int nativeGetVersion(int fd);
+
+  private static native String nativeGetName(int fd);
+
+  private native boolean nativeInitializeKnowledgeEngine(long context, byte[] serializedConfig);
+
+  private native int[] nativeSuggestSelection(
+      long context, String text, int selectionBegin, int selectionEnd, SelectionOptions options);
+
+  private native ClassificationResult[] nativeClassifyText(
+      long context,
+      String text,
+      int selectionBegin,
+      int selectionEnd,
+      ClassificationOptions options);
+
+  private native AnnotatedSpan[] nativeAnnotate(
+      long context, String text, AnnotationOptions options);
+
+  private native void nativeCloseAnnotator(long context);
+}

diff --git a/java/com/google/android/textclassifier/LangIdModel.java b/java/com/google/android/textclassifier/LangIdModel.java
new file mode 100644
index 0000000..4b10b9f
--- /dev/null
+++ b/java/com/google/android/textclassifier/LangIdModel.java

@@ -0,0 +1,113 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.android.textclassifier;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * Java wrapper for LangId native library interface. This class is used to detect languages in text.
+ *
+ * @hide
+ */
+public final class LangIdModel implements AutoCloseable {
+  private final AtomicBoolean isClosed = new AtomicBoolean(false);
+
+  static {
+    System.loadLibrary("textclassifier");
+  }
+
+  private long modelPtr;
+
+  /** Creates a new instance of LangId predictor, using the provided model image. */
+  public LangIdModel(int fd) {
+    modelPtr = nativeNew(fd);
+    if (modelPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize LangId from given file descriptor.");
+    }
+  }
+
+  /** Creates a new instance of LangId predictor, using the provided model image. */
+  public LangIdModel(String modelPath) {
+    modelPtr = nativeNewFromPath(modelPath);
+    if (modelPtr == 0L) {
+      throw new IllegalArgumentException("Couldn't initialize LangId from given file.");
+    }
+  }
+
+  /** Detects the languages for given text. */
+  public LanguageResult[] detectLanguages(String text) {
+    return nativeDetectLanguages(modelPtr, text);
+  }
+
+  /** Frees up the allocated memory. */
+  @Override
+  public void close() {
+    if (isClosed.compareAndSet(false, true)) {
+      nativeClose(modelPtr);
+      modelPtr = 0L;
+    }
+  }
+
+  @Override
+  protected void finalize() throws Throwable {
+    try {
+      close();
+    } finally {
+      super.finalize();
+    }
+  }
+
+  /** Result for detectLanguages method. */
+  public static final class LanguageResult {
+    final String mLanguage;
+    final float mScore;
+
+    LanguageResult(String language, float score) {
+      mLanguage = language;
+      mScore = score;
+    }
+
+    public final String getLanguage() {
+      return mLanguage;
+    }
+
+    public final float getScore() {
+      return mScore;
+    }
+  }
+
+  /** Returns the version of the LangId model used. */
+  public int getVersion() {
+    return nativeGetVersion(modelPtr);
+  }
+
+  public static int getVersion(int fd) {
+    return nativeGetVersionFromFd(fd);
+  }
+
+  private static native long nativeNew(int fd);
+
+  private static native long nativeNewFromPath(String path);
+
+  private native LanguageResult[] nativeDetectLanguages(long nativePtr, String text);
+
+  private native void nativeClose(long nativePtr);
+
+  private native int nativeGetVersion(long nativePtr);
+
+  private static native int nativeGetVersionFromFd(int fd);
+}

diff --git a/lang_id/common/embedding-feature-extractor.cc b/lang_id/common/embedding-feature-extractor.cc
new file mode 100644
index 0000000..6235f89
--- /dev/null
+++ b/lang_id/common/embedding-feature-extractor.cc

@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/embedding-feature-extractor.h"
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/fel/feature-types.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/numbers.h"
+#include "lang_id/common/lite_strings/str-split.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+bool GenericEmbeddingFeatureExtractor::Setup(TaskContext *context) {
+  // Don't use version to determine how to get feature FML.
+  const string features = context->Get(GetParamName("features"), "");
+  const string embedding_names =
+      context->Get(GetParamName("embedding_names"), "");
+  const string embedding_dims =
+      context->Get(GetParamName("embedding_dims"), "");
+
+  // NOTE: unfortunately, LiteStrSplit returns a vector of StringPieces pointing
+  // to the original string, in this case |features|, which is local to this
+  // method.  We need to explicitly create new strings.
+  for (StringPiece sp : LiteStrSplit(features, ';')) {
+    embedding_fml_.emplace_back(sp);
+  }
+
+  // Same here.
+  for (StringPiece sp : LiteStrSplit(embedding_names, ';')) {
+    embedding_names_.emplace_back(sp);
+  }
+
+  std::vector<StringPiece> dim_strs = LiteStrSplit(embedding_dims, ';');
+  for (const auto &dim_str : dim_strs) {
+    int dim = 0;
+    if (!LiteAtoi(dim_str, &dim)) {
+      SAFTM_LOG(ERROR) << "Unable to parse " << dim_str;
+      return false;
+    }
+    embedding_dims_.push_back(dim);
+  }
+  return true;
+}
+
+bool GenericEmbeddingFeatureExtractor::Init(TaskContext *context) {
+  return true;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/embedding-feature-extractor.h b/lang_id/common/embedding-feature-extractor.h
new file mode 100644
index 0000000..f51b6e5
--- /dev/null
+++ b/lang_id/common/embedding-feature-extractor.h

@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_EXTRACTOR_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_EXTRACTOR_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/common/lite_base/attributes.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// An EmbeddingFeatureExtractor manages the extraction of features for
+// embedding-based models. It wraps a sequence of underlying classes of feature
+// extractors, along with associated predicate maps. Each class of feature
+// extractors is associated with a name, e.g., "words", "labels", "tags".
+//
+// The class is split between a generic abstract version,
+// GenericEmbeddingFeatureExtractor (that can be initialized without knowing the
+// signature of the ExtractFeatures method) and a typed version.
+//
+// The predicate maps must be initialized before use: they can be loaded using
+// Read() or updated via UpdateMapsForExample.
+class GenericEmbeddingFeatureExtractor {
+ public:
+  // Constructs this GenericEmbeddingFeatureExtractor.
+  //
+  // |arg_prefix| is a string prefix for the relevant TaskContext parameters, to
+  // avoid name clashes.  See GetParamName().
+  explicit GenericEmbeddingFeatureExtractor(const string &arg_prefix)
+      : arg_prefix_(arg_prefix) {}
+
+  virtual ~GenericEmbeddingFeatureExtractor() {}
+
+  // Sets/inits up predicate maps and embedding space names that are common for
+  // all embedding based feature extractors.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool Setup(TaskContext *context);
+  SAFTM_MUST_USE_RESULT virtual bool Init(TaskContext *context);
+
+  // Requests workspace for the underlying feature extractors. This is
+  // implemented in the typed class.
+  virtual void RequestWorkspaces(WorkspaceRegistry *registry) = 0;
+
+  // Returns number of embedding spaces.
+  int NumEmbeddings() const { return embedding_dims_.size(); }
+
+  const std::vector<string> &embedding_fml() const { return embedding_fml_; }
+
+  // Get parameter name by concatenating the prefix and the original name.
+  string GetParamName(const string &param_name) const {
+    string full_name = arg_prefix_;
+    full_name.push_back('_');
+    full_name.append(param_name);
+    return full_name;
+  }
+
+ private:
+  // Prefix for TaskContext parameters.
+  const string arg_prefix_;
+
+  // Embedding space names for parameter sharing.
+  std::vector<string> embedding_names_;
+
+  // FML strings for each feature extractor.
+  std::vector<string> embedding_fml_;
+
+  // Size of each of the embedding spaces (maximum predicate id).
+  std::vector<int> embedding_sizes_;
+
+  // Embedding dimensions of the embedding spaces (i.e. 32, 64 etc.)
+  std::vector<int> embedding_dims_;
+};
+
+// Templated, object-specific implementation of the
+// EmbeddingFeatureExtractor. EXTRACTOR should be a FeatureExtractor<OBJ,
+// ARGS...> class that has the appropriate FeatureTraits() to ensure that
+// locator type features work.
+//
+// Note: for backwards compatibility purposes, this always reads the FML spec
+// from "<prefix>_features".
+template <class EXTRACTOR, class OBJ, class... ARGS>
+class EmbeddingFeatureExtractor : public GenericEmbeddingFeatureExtractor {
+ public:
+  // Constructs this EmbeddingFeatureExtractor.
+  //
+  // |arg_prefix| is a string prefix for the relevant TaskContext parameters, to
+  // avoid name clashes.  See GetParamName().
+  explicit EmbeddingFeatureExtractor(const string &arg_prefix)
+      : GenericEmbeddingFeatureExtractor(arg_prefix) {}
+
+  // Sets up all predicate maps, feature extractors, and flags.
+  SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) override {
+    if (!GenericEmbeddingFeatureExtractor::Setup(context)) {
+      return false;
+    }
+    feature_extractors_.resize(embedding_fml().size());
+    for (int i = 0; i < embedding_fml().size(); ++i) {
+      feature_extractors_[i].reset(new EXTRACTOR());
+      if (!feature_extractors_[i]->Parse(embedding_fml()[i])) return false;
+      if (!feature_extractors_[i]->Setup(context)) return false;
+    }
+    return true;
+  }
+
+  // Initializes resources needed by the feature extractors.
+  SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) override {
+    if (!GenericEmbeddingFeatureExtractor::Init(context)) return false;
+    for (auto &feature_extractor : feature_extractors_) {
+      if (!feature_extractor->Init(context)) return false;
+    }
+    return true;
+  }
+
+  // Requests workspaces from the registry. Must be called after Init(), and
+  // before Preprocess().
+  void RequestWorkspaces(WorkspaceRegistry *registry) override {
+    for (auto &feature_extractor : feature_extractors_) {
+      feature_extractor->RequestWorkspaces(registry);
+    }
+  }
+
+  // Must be called on the object one state for each sentence, before any
+  // feature extraction (e.g., UpdateMapsForExample, ExtractFeatures).
+  void Preprocess(WorkspaceSet *workspaces, OBJ *obj) const {
+    for (auto &feature_extractor : feature_extractors_) {
+      feature_extractor->Preprocess(workspaces, obj);
+    }
+  }
+
+  // Extracts features using the extractors. Note that features must already
+  // be initialized to the correct number of feature extractors. No predicate
+  // mapping is applied.
+  void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &obj,
+                       ARGS... args,
+                       std::vector<FeatureVector> *features) const {
+    // DCHECK(features != nullptr);
+    // DCHECK_EQ(features->size(), feature_extractors_.size());
+    for (int i = 0; i < feature_extractors_.size(); ++i) {
+      (*features)[i].clear();
+      feature_extractors_[i]->ExtractFeatures(workspaces, obj, args...,
+                                              &(*features)[i]);
+    }
+  }
+
+ private:
+  // Templated feature extractor class.
+  std::vector<std::unique_ptr<EXTRACTOR>> feature_extractors_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_EXTRACTOR_H_

diff --git a/lang_id/common/embedding-feature-interface.h b/lang_id/common/embedding-feature-interface.h
new file mode 100644
index 0000000..87576c6
--- /dev/null
+++ b/lang_id/common/embedding-feature-interface.h

@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_INTERFACE_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_INTERFACE_H_
+
+#include <string>
+#include <vector>
+
+#include "lang_id/common/embedding-feature-extractor.h"
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/common/lite_base/attributes.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+template <class EXTRACTOR, class OBJ, class... ARGS>
+class EmbeddingFeatureInterface {
+ public:
+  // Constructs this EmbeddingFeatureInterface.
+  //
+  // |arg_prefix| is a string prefix for the TaskContext parameters, passed to
+  // |the underlying EmbeddingFeatureExtractor.
+  explicit EmbeddingFeatureInterface(const string &arg_prefix)
+      : feature_extractor_(arg_prefix) {}
+
+  // Sets up feature extractors and flags for processing (inference).
+  SAFTM_MUST_USE_RESULT bool SetupForProcessing(TaskContext *context) {
+    return feature_extractor_.Setup(context);
+  }
+
+  // Initializes feature extractor resources for processing (inference)
+  // including requesting a workspace for caching extracted features.
+  SAFTM_MUST_USE_RESULT bool InitForProcessing(TaskContext *context) {
+    if (!feature_extractor_.Init(context)) return false;
+    feature_extractor_.RequestWorkspaces(&workspace_registry_);
+    return true;
+  }
+
+  // Preprocesses *obj using the internal workspace registry.
+  void Preprocess(WorkspaceSet *workspace, OBJ *obj) const {
+    workspace->Reset(workspace_registry_);
+    feature_extractor_.Preprocess(workspace, obj);
+  }
+
+  // Extract features from |obj|.  On return, FeatureVector features[i]
+  // contains the features for the embedding space #i.
+  //
+  // This function uses the precomputed info from |workspace|.  Usage pattern:
+  //
+  //   EmbeddingFeatureInterface<...> feature_interface;
+  //   ...
+  //   OBJ obj;
+  //   WorkspaceSet workspace;
+  //   feature_interface.Preprocess(&workspace, &obj);
+  //
+  //   // For the same obj, but with different args:
+  //   std::vector<FeatureVector> features;
+  //   feature_interface.GetFeatures(obj, args, workspace, &features);
+  //
+  // This pattern is useful (more efficient) if you can pre-compute some info
+  // for the entire |obj|, which is reused by the feature extraction performed
+  // for different args.  If that is not the case, you can use the simpler
+  // version GetFeaturesNoCaching below.
+  void GetFeatures(const OBJ &obj, ARGS... args, const WorkspaceSet &workspace,
+                   std::vector<FeatureVector> *features) const {
+    feature_extractor_.ExtractFeatures(workspace, obj, args..., features);
+  }
+
+  // Simpler version of GetFeatures(), for cases when there is no opportunity to
+  // reuse computation between feature extractions for the same |obj|, but with
+  // different |args|.  Returns the extracted features.  For more info, see the
+  // doc for GetFeatures().
+  std::vector<FeatureVector> GetFeaturesNoCaching(OBJ *obj,
+                                                  ARGS... args) const {
+    // Technically, we still use a workspace, because
+    // feature_extractor_.ExtractFeatures requires one.  But there is no real
+    // caching here, as we start from scratch for each call to ExtractFeatures.
+    WorkspaceSet workspace;
+    Preprocess(&workspace, obj);
+    std::vector<FeatureVector> features(NumEmbeddings());
+    GetFeatures(*obj, args..., workspace, &features);
+    return features;
+  }
+
+  // Returns number of embedding spaces.
+  int NumEmbeddings() const { return feature_extractor_.NumEmbeddings(); }
+
+ private:
+  // Typed feature extractor for embeddings.
+  EmbeddingFeatureExtractor<EXTRACTOR, OBJ, ARGS...> feature_extractor_;
+
+  // The registry of shared workspaces in the feature extractor.
+  WorkspaceRegistry workspace_registry_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_FEATURE_INTERFACE_H_

diff --git a/lang_id/common/embedding-network-params.cc b/lang_id/common/embedding-network-params.cc
new file mode 100644
index 0000000..be7c80e
--- /dev/null
+++ b/lang_id/common/embedding-network-params.cc

@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/embedding-network-params.h"
+
+#include "lang_id/common/lite_base/logging.h"
+
+namespace libtextclassifier3 {
+
+QuantizationType ParseQuantizationType(const string &s) {
+  if (s == "NONE") {
+    return QuantizationType::NONE;
+  }
+  if (s == "UINT8") {
+    return QuantizationType::UINT8;
+  }
+  if (s == "UINT4") {
+    return QuantizationType::UINT4;
+  }
+  if (s == "FLOAT16") {
+    return QuantizationType::FLOAT16;
+  }
+  SAFTM_LOG(FATAL) << "Unsupported quantization type: " << s;
+
+  // Execution should never reach this point; just to keep the compiler happy.
+  // TODO(salcianu): implement SAFTM_LOG(FATAL) in a way that doesn't require
+  // this trick.
+  return QuantizationType::NONE;
+}
+
+}  // namespace nlp_saft

diff --git a/lang_id/common/embedding-network-params.h b/lang_id/common/embedding-network-params.h
new file mode 100755
index 0000000..f43c653
--- /dev/null
+++ b/lang_id/common/embedding-network-params.h

@@ -0,0 +1,316 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_PARAMS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_PARAMS_H_
+
+#include <string>
+
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/lite_base/float16.h"
+#include "lang_id/common/lite_base/logging.h"
+
+namespace libtextclassifier3 {
+
+enum class QuantizationType {
+  NONE = 0,
+
+  // Quantization to 8 bit unsigned ints.
+  UINT8,
+
+  // Quantization to 4 bit unsigned ints.
+  UINT4,
+
+  // Quantization to 16 bit floats, the type defined in
+  // lang_id/common/float16.h
+  FLOAT16,
+
+  // NOTE: for backward compatibility, if you add a new value to this enum, add
+  // it *at the end*, such that you do not change the integer values of the
+  // existing enum values.
+};
+
+// Converts "UINT8" -> QuantizationType::UINT8, and so on.
+QuantizationType ParseQuantizationType(const string &s);
+
+// API for accessing parameters for a feed-forward neural network with
+// embeddings.
+//
+//
+// In fact, we provide two APIs: a high-level (and highly-recommented) API, with
+// methods named using the BigCamel notation (e.g., GetEmbeddingMatrix()) and a
+// low-level API, using C-style names (e.g., softmax_num_cols()).
+//
+// Note: the API below is meant to allow the inference code (the class
+// libtextclassifier3::mobile::EmbeddingNetwork) to use the data directly, with no need
+// for transposing any matrix (which would require extra overhead on mobile
+// devices).  Hence, as indicated by the comments for the API methods, some of
+// the matrices below are the transposes of the corresponding matrices from the
+// original proto.
+class EmbeddingNetworkParams {
+ public:
+  virtual ~EmbeddingNetworkParams() {}
+
+  // Returns true if these params are valid.  False otherwise (e.g., if the
+  // underlying data is corrupted).  If is_valid() returns false, clients should
+  // not call any other method on that instance of EmbeddingNetworkParams.  If
+  // is_valid() returns true, then calls to the API methods below should not
+  // crash *if they are called with index parameters in bounds*.  E.g., if
+  // is_valid() and 0 <= i < embeddings_size(), then GetEmbeddingMatrix(i)
+  // should not crash.
+  virtual bool is_valid() const = 0;
+
+  // **** High-level API.
+
+  // Simple representation of a matrix.  This small struct that doesn't own any
+  // resource intentionally supports copy / assign, to simplify our APIs.
+  struct Matrix {
+    // Number of rows.
+    int rows = 0;
+
+    // Number of columns.
+    int cols = 0;
+
+    QuantizationType quant_type = QuantizationType::NONE;
+
+    // Pointer to matrix elements, in row-major order
+    // (https://en.wikipedia.org/wiki/Row-major_order) Not owned.
+    const void *elements = nullptr;
+
+    // Quantization scales: one scale for each row.
+    const ::libtextclassifier3::mobile::float16 *quant_scales = nullptr;
+  };
+
+  // Returns i-th embedding matrix.  Crashes on out of bounds indices.
+  //
+  // This is the transpose of the corresponding matrix from the original proto.
+  Matrix GetEmbeddingMatrix(int i) const {
+    CheckIndex(i, embeddings_size(), "embedding matrix");
+    Matrix matrix;
+    matrix.rows = embeddings_num_rows(i);
+    matrix.cols = embeddings_num_cols(i);
+    matrix.elements = embeddings_weights(i);
+    matrix.quant_type = embeddings_quant_type(i);
+    matrix.quant_scales = embeddings_quant_scales(i);
+    return matrix;
+  }
+
+  // Returns weight matrix for i-th hidden layer.  Crashes on out of bounds
+  // indices.
+  //
+  // This is the transpose of the corresponding matrix from the original proto.
+  Matrix GetHiddenLayerMatrix(int i) const {
+    CheckIndex(i, hidden_size(), "hidden layer");
+    Matrix matrix;
+    matrix.rows = hidden_num_rows(i);
+    matrix.cols = hidden_num_cols(i);
+
+    // Quantization not supported here.
+    matrix.quant_type = hidden_weights_quant_type(i);
+    matrix.elements = hidden_weights(i);
+    return matrix;
+  }
+
+  // Returns bias for i-th hidden layer.  Technically a Matrix, but we expect it
+  // to be a row/column vector (i.e., num rows or num cols is 1).  However, we
+  // don't CHECK for that: we just provide access to underlying data.  Crashes
+  // on out of bounds indices.
+  Matrix GetHiddenLayerBias(int i) const {
+    CheckIndex(i, hidden_bias_size(), "hidden layer bias");
+    Matrix matrix;
+    matrix.rows = hidden_bias_num_rows(i);
+    matrix.cols = hidden_bias_num_cols(i);
+
+    // Quantization not supported here.
+    matrix.quant_type = QuantizationType::NONE;
+    matrix.elements = hidden_bias_weights(i);
+    return matrix;
+  }
+
+  // Returns true if a softmax layer exists.
+  bool HasSoftmax() const {
+    return softmax_size() == 1;
+  }
+
+  // Returns weight matrix for the softmax layer.  Note: should be called only
+  // if HasSoftmax() is true.
+  //
+  // This is the transpose of the corresponding matrix from the original proto.
+  Matrix GetSoftmaxMatrix() const {
+    SAFTM_CHECK(HasSoftmax()) << "No softmax layer.";
+    Matrix matrix;
+    matrix.rows = softmax_num_rows(0);
+    matrix.cols = softmax_num_cols(0);
+
+    // Quantization not supported here.
+    matrix.quant_type = softmax_weights_quant_type(0);
+    matrix.elements = softmax_weights(0);
+    return matrix;
+  }
+
+  // Returns bias for the softmax layer.  Technically a Matrix, but we expect it
+  // to be a row/column vector (i.e., num rows or num cols is 1).  However, we
+  // don't CHECK for that: we just provide access to underlying data.
+  Matrix GetSoftmaxBias() const {
+    SAFTM_CHECK(HasSoftmax()) << "No softmax layer.";
+    Matrix matrix;
+    matrix.rows = softmax_bias_num_rows(0);
+    matrix.cols = softmax_bias_num_cols(0);
+
+    // Quantization not supported here.
+    matrix.quant_type = QuantizationType::NONE;
+    matrix.elements = softmax_bias_weights(0);
+    return matrix;
+  }
+
+  // Updates the EmbeddingNetwork-related parameters from task_context.  Returns
+  // true on success, false on error.
+  virtual bool UpdateTaskContextParameters(
+      mobile::TaskContext *task_context) = 0;
+
+  // **** Low-level API.
+  //
+  // * Most low-level API methods are documented by giving an equivalent
+  //   function call on proto, the original proto (of type
+  //   EmbeddingNetworkProto) which was used to generate the C++ code.
+  //
+  // * To simplify our generation code, optional proto fields of message type
+  //   are treated as repeated fields with 0 or 1 instances.  As such, we have
+  //   *_size() methods for such optional fields: they return 0 or 1.
+  //
+  // * "transpose(M)" denotes the transpose of a matrix M.
+
+  // ** Access methods for repeated MatrixParams embeddings.
+  //
+  // Returns proto.embeddings_size().
+  virtual int embeddings_size() const = 0;
+
+  // Returns number of rows of transpose(proto.embeddings(i)).
+  virtual int embeddings_num_rows(int i) const = 0;
+
+  // Returns number of columns of transpose(proto.embeddings(i)).
+  virtual int embeddings_num_cols(int i) const = 0;
+
+  // Returns pointer to elements of transpose(proto.embeddings(i)), in row-major
+  // order.  NOTE: for unquantized embeddings, this returns a pointer to float;
+  // for quantized embeddings, this returns a pointer to uint8.
+  virtual const void *embeddings_weights(int i) const = 0;
+
+  virtual QuantizationType embeddings_quant_type(int i) const {
+    return QuantizationType::NONE;
+  }
+
+  virtual const ::libtextclassifier3::mobile::float16 *embeddings_quant_scales(
+      int i) const {
+    return nullptr;
+  }
+
+  // ** Access methods for repeated MatrixParams hidden.
+  //
+  // Returns embedding_network_proto.hidden_size().
+  virtual int hidden_size() const = 0;
+
+  // Returns embedding_network_proto.hidden(i).rows().
+  virtual int hidden_num_rows(int i) const = 0;
+
+  // Returns embedding_network_proto.hidden(i).rows().
+  virtual int hidden_num_cols(int i) const = 0;
+
+  // Returns quantization mode for the weights of the i-th hidden layer.
+  virtual QuantizationType hidden_weights_quant_type(int i) const {
+    return QuantizationType::NONE;
+  }
+
+  // Returns pointer to beginning of array of floats with all values from
+  // embedding_network_proto.hidden(i).
+  virtual const void *hidden_weights(int i) const = 0;
+
+  // ** Access methods for repeated MatrixParams hidden_bias.
+  //
+  // Returns proto.hidden_bias_size().
+  virtual int hidden_bias_size() const = 0;
+
+  // Returns number of rows of proto.hidden_bias(i).
+  virtual int hidden_bias_num_rows(int i) const = 0;
+
+  // Returns number of columns of proto.hidden_bias(i).
+  virtual int hidden_bias_num_cols(int i) const = 0;
+
+  // Returns pointer to elements of proto.hidden_bias(i), in row-major order.
+  virtual const void *hidden_bias_weights(int i) const = 0;
+
+  // ** Access methods for optional MatrixParams softmax.
+  //
+  // Returns 1 if proto has optional field softmax, 0 otherwise.
+  virtual int softmax_size() const = 0;
+
+  // Returns number of rows of transpose(proto.softmax()).
+  virtual int softmax_num_rows(int i) const = 0;
+
+  // Returns number of columns of transpose(proto.softmax()).
+  virtual int softmax_num_cols(int i) const = 0;
+
+  // Returns quantization mode for the softmax weights.
+  virtual QuantizationType softmax_weights_quant_type(int i) const {
+    return QuantizationType::NONE;
+  }
+
+  // Returns pointer to elements of transpose(proto.softmax()), in row-major
+  // order.
+  virtual const void *softmax_weights(int i) const = 0;
+
+  // ** Access methods for optional MatrixParams softmax_bias.
+  //
+  // Returns 1 if proto has optional field softmax_bias, 0 otherwise.
+  virtual int softmax_bias_size() const = 0;
+
+  // Returns number of rows of proto.softmax_bias().
+  virtual int softmax_bias_num_rows(int i) const = 0;
+
+  // Returns number of columns of proto.softmax_bias().
+  virtual int softmax_bias_num_cols(int i) const = 0;
+
+  // Returns pointer to elements of proto.softmax_bias(), in row-major order.
+  virtual const void *softmax_bias_weights(int i) const = 0;
+
+  // ** Access methods for repeated int32 embedding_num_features.
+  //
+  // Returns proto.embedding_num_features_size().
+  virtual int embedding_num_features_size() const = 0;
+
+  // Returns proto.embedding_num_features(i).
+  virtual int embedding_num_features(int i) const = 0;
+
+  // ** Access methods for is_precomputed
+  //
+  // Returns proto.has_is_precomputed().
+  virtual bool has_is_precomputed() const = 0;
+
+  // Returns proto.is_precomputed().
+  virtual bool is_precomputed() const = 0;
+
+ protected:
+  void CheckIndex(int index, int size, const string &description) const {
+    SAFTM_CHECK_GE(index, 0)
+        << "Out-of-range index for " << description << ": " << index;
+    SAFTM_CHECK_LT(index, size)
+        << "Out-of-range index for " << description << ": " << index;
+  }
+};  // class EmbeddingNetworkParams
+
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_PARAMS_H_

diff --git a/lang_id/common/embedding-network.cc b/lang_id/common/embedding-network.cc
new file mode 100644
index 0000000..469cb1f
--- /dev/null
+++ b/lang_id/common/embedding-network.cc

@@ -0,0 +1,323 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/embedding-network.h"
+
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace {
+
+void CheckNoQuantization(const EmbeddingNetworkParams::Matrix &matrix) {
+  SAFTM_CHECK_EQ(static_cast<int>(QuantizationType::NONE),
+                 static_cast<int>(matrix.quant_type))
+      << "Quantization not allowed here";
+}
+
+int GetMatrixRowSizeInBytes(const EmbeddingNetworkParams::Matrix &matrix) {
+  int cols = matrix.cols;
+  QuantizationType quant_type = matrix.quant_type;
+  switch (quant_type) {
+    case QuantizationType::NONE:
+      return cols * sizeof(float);
+    case QuantizationType::UINT8:
+      return cols * sizeof(uint8);
+    case QuantizationType::UINT4:
+      SAFTM_DCHECK_EQ(cols % 2, 0) << "UINT4 with odd #cols = " << cols;
+      return cols / 2;
+    case QuantizationType::FLOAT16:
+      return cols * sizeof(float16);
+    default:
+      SAFTM_LOG(FATAL) << "Unknown quant type: "
+                       << static_cast<int>(quant_type);
+  }
+}
+
+// Computes y = weights * Relu(x) + b where Relu is optionally applied.
+//
+// weights and b are the weight matrix, respectively the bias vector of a neural
+// network layer.
+//
+// Note: in the research literature, usually Relu (the activation function) is
+// the last part of a neural layer.  From that perspective, this function
+// computes the Relu part of the previous layer (if any) and next the first half
+// (the computation of the state) for the current layer.
+//
+// Note: weights is expected to be the transposed version of the real weight
+// matrix.  Hence, instead of computing a linear combination of the columns of
+// weights, we compute a linear combination of its rows; but we are mindful that
+// these rows are the columns of the original matrix, hence the name
+// weights_col_i in the code.
+void SparseReluProductPlusBias(bool apply_relu,
+                               const EmbeddingNetworkParams::Matrix &weights,
+                               const EmbeddingNetworkParams::Matrix &b,
+                               const std::vector<float> &x,
+                               std::vector<float> *y) {
+  // Initialize y to b.  b is a column matrix (i.e., nb.cols == 1); we already
+  // CHECK-ed that the EmbeddingNetwork constructor.
+  const float *b_start = reinterpret_cast<const float *>(b.elements);
+  SAFTM_DCHECK_EQ(b.cols, 1);
+  y->assign(b_start, b_start + b.rows);
+
+  float *const y_data = y->data();
+  const int y_size = y->size();
+  SAFTM_CHECK_EQ(weights.cols, y_size);
+  const int x_size = x.size();
+  SAFTM_CHECK_EQ(weights.rows, x_size);
+
+  // NOTE: the code below reads x_size * y_size elements from weights; these
+  // reads are safe as long as weights.elements contains weights.rows *
+  // weights.cols elements (where the element size depends on the quantization
+  // type).  That requirement is checked by the params provider, e.g., by
+  // EmbeddingNetworkParamsFromFlatbuffer.
+
+  // There is some code duplication between the two main cases of the switch
+  // below: the idea was to "lift" the switch outside the loops, to reduce the
+  // number of tests at runtime.
+  switch (weights.quant_type) {
+    case QuantizationType::NONE: {
+      // We compute a linear combination of the rows from |weights|, using
+      // elements of x (optionally, Relu(x)) as scaling factors (the i-th row
+      // gets multiplied by x[i] before being added with the other rows).  Note:
+      // elements of |weights| are stored in row-major order: first the elements
+      // of row #0, next the elements of row #1, etc.  In the comments below, we
+      // write "weights[i][j]" to refer to the j-th element from the i-th row of
+      // weights.
+      const float *weight_ptr =
+          reinterpret_cast<const float *>(weights.elements);
+      for (int i = 0; i < x_size; ++i) {
+        // Invariant 1: weight_ptr points to the beginning of the i-th row from
+        // weights (i.e., weights[i][0]).
+        const float scale = x[i];
+        if (!apply_relu || (scale > 0)) {
+          for (int j = 0; j < y_size; ++j, ++weight_ptr) {
+            // Invariant 2: weight_ptr points to weights[i][j].
+            y_data[j] += (*weight_ptr) * scale;
+          }
+        } else {
+          // We don't update y_data, but we still have to move weight_ptr to the
+          // next row (to satisfy Invariant 1).  We do this by adding y_size ==
+          // weights.cols() (see earlier CHECK_EQ).
+          weight_ptr += y_size;
+        }
+      }
+      break;
+    }
+    case QuantizationType::FLOAT16: {
+      // See comments for the QuantizationType::NONE case: the code is almost
+      // identical, except for float16 (instead of float) and the Float16To32
+      // conversion.  We could unify these two cases using a template, but since
+      // this is a critical loop, don't want to risk that e.g., inlining of the
+      // conversion function doesn't happen.
+      const float16 *weight_ptr =
+          reinterpret_cast<const float16 *>(weights.elements);
+      for (int i = 0; i < x_size; ++i) {
+        const float scale = x[i];
+        if (!apply_relu || (scale > 0)) {
+          for (int j = 0; j < y_size; ++j, ++weight_ptr) {
+            y_data[j] += Float16To32(*weight_ptr) * scale;
+          }
+        } else {
+          weight_ptr += y_size;
+        }
+      }
+      break;
+    }
+    default:
+      SAFTM_LOG(FATAL) << "Unsupported weights quantization type: "
+                       << static_cast<int>(weights.quant_type);
+  }
+}
+}  // namespace
+
+void EmbeddingNetwork::ConcatEmbeddings(
+    const std::vector<FeatureVector> &feature_vectors,
+    std::vector<float> *concat) const {
+  concat->resize(concat_layer_size_);
+
+  // "es_index" stands for "embedding space index".
+  for (int es_index = 0; es_index < feature_vectors.size(); ++es_index) {
+    const int concat_offset = concat_offset_[es_index];
+
+    const EmbeddingNetworkParams::Matrix &embedding_matrix =
+        embedding_matrices_[es_index];
+    const int embedding_dim = embedding_matrix.cols;
+    const int embedding_row_size_in_bytes =
+        embedding_row_size_in_bytes_[es_index];
+
+    const FeatureVector &feature_vector = feature_vectors[es_index];
+    const int num_features = feature_vector.size();
+    for (int fi = 0; fi < num_features; ++fi) {
+      const FeatureType *feature_type = feature_vector.type(fi);
+      int feature_offset = concat_offset + feature_type->base() * embedding_dim;
+      SAFTM_CHECK_LE(feature_offset + embedding_dim, concat->size());
+
+      // Weighted embeddings will be added starting from this address.
+      float *concat_ptr = concat->data() + feature_offset;
+
+      // Multiplier for each embedding weight.  Includes feature weight (for
+      // continuous features) and quantization scale (for quantized embeddings).
+      float multiplier;
+      int feature_id;
+      const FeatureValue feature_value = feature_vector.value(fi);
+      if (feature_type->is_continuous()) {
+        // Continuous features (encoded as FloatFeatureValue).
+        FloatFeatureValue float_feature_value(feature_value);
+        feature_id = float_feature_value.id;
+        multiplier = float_feature_value.weight;
+      } else {
+        // Discrete features: every present feature has implicit value 1.0.
+        feature_id = feature_value;
+        multiplier = 1.0;
+      }
+
+      SAFTM_CHECK_GE(feature_id, 0);
+      SAFTM_CHECK_LT(feature_id, embedding_matrix.rows);
+
+      // Pointer to float / uint8 weights for relevant embedding.
+      const void *embedding_data =
+          (reinterpret_cast<const char *>(embedding_matrix.elements) +
+           feature_id * embedding_row_size_in_bytes);
+
+      switch (embedding_matrix.quant_type) {
+        case QuantizationType::NONE: {
+          const float *weights =
+              reinterpret_cast<const float *>(embedding_data);
+          for (int i = 0; i < embedding_dim; ++i, ++weights, ++concat_ptr) {
+            *concat_ptr += *weights * multiplier;
+          }
+          break;
+        }
+        case QuantizationType::UINT8: {
+          multiplier *= Float16To32(embedding_matrix.quant_scales[feature_id]);
+          const uint8 *quant_weights =
+              reinterpret_cast<const uint8 *>(embedding_data);
+          for (int i = 0; i < embedding_dim;
+               ++i, ++quant_weights, ++concat_ptr) {
+            // 128 is bias for UINT8 quantization.
+            *concat_ptr +=
+                (static_cast<int>(*quant_weights) - 128) * multiplier;
+          }
+          break;
+        }
+        case QuantizationType::UINT4: {
+          multiplier *= Float16To32(embedding_matrix.quant_scales[feature_id]);
+          const uint8 *quant_weights =
+              reinterpret_cast<const uint8 *>(embedding_data);
+          for (int i = 0; i < embedding_dim / 2; ++i, ++quant_weights) {
+            const uint8 qq = *quant_weights;
+            concat_ptr[0] +=
+                (static_cast<int>((qq & 0xF0) | 0x08) - 128) * multiplier;
+            concat_ptr[1] +=
+                (static_cast<int>(((qq & 0x0F) << 4) | 0x08) - 128) *
+                multiplier;
+            concat_ptr += 2;
+          }
+          break;
+        }
+        default:
+          // We already checked (in GetMatrixRowSizeInBytes) that each embedding
+          // matrix has a known quantization type.  Hence, DLOG is enough here.
+          SAFTM_DLOG(ERROR) << "Unknown embeddings quantization type "
+                            << static_cast<int>(embedding_matrix.quant_type);
+          break;
+      }
+    }
+  }
+}
+
+void EmbeddingNetwork::ComputeFinalScores(
+    const std::vector<FeatureVector> &features,
+    std::vector<float> *scores) const {
+  ComputeFinalScores(features, {}, scores);
+}
+
+void EmbeddingNetwork::ComputeFinalScores(
+    const std::vector<FeatureVector> &features,
+    const std::vector<float> &extra_inputs, std::vector<float> *scores) const {
+  // Construct the input layer for our feed-forward neural network (FFNN).
+  std::vector<float> input;
+  ConcatEmbeddings(features, &input);
+  if (!extra_inputs.empty()) {
+    input.reserve(input.size() + extra_inputs.size());
+    for (int i = 0; i < extra_inputs.size(); i++) {
+      input.push_back(extra_inputs[i]);
+    }
+  }
+
+  // Propagate input through all layers of our FFNN.
+
+  // Alternating storage for activations of the different layers.  We can't use
+  // a single vector because all activations of the previous layer are required
+  // when computing the activations of the next one.
+  std::vector<float> storage[2];
+  const std::vector<float> *v_in = &input;
+  const int num_layers = layer_weights_.size();
+  for (int i = 0; i < num_layers; ++i) {
+    std::vector<float> *v_out = nullptr;
+    if (i == num_layers - 1) {
+      // Final layer: write results directly into |scores|.
+      v_out = scores;
+    } else {
+      // Hidden layer: write results into the alternating storage.  The i % 2
+      // trick ensures the alternation.
+      v_out = &(storage[i % 2]);
+    }
+    const bool apply_relu = i > 0;
+    SparseReluProductPlusBias(
+        apply_relu, layer_weights_[i], layer_bias_[i], *v_in, v_out);
+    v_in = v_out;
+  }
+}
+
+EmbeddingNetwork::EmbeddingNetwork(const EmbeddingNetworkParams *model)
+    : model_(model) {
+  int offset_sum = 0;
+  for (int i = 0; i < model_->embedding_num_features_size(); ++i) {
+    concat_offset_.push_back(offset_sum);
+    EmbeddingNetworkParams::Matrix matrix = model_->GetEmbeddingMatrix(i);
+    offset_sum += matrix.cols * model_->embedding_num_features(i);
+
+    // NOTE: each Matrix is a small struct that doesn't own the actual matrix
+    // weights.  Hence, the push_back below is fast.
+    embedding_matrices_.push_back(matrix);
+    embedding_row_size_in_bytes_.push_back(GetMatrixRowSizeInBytes(matrix));
+  }
+  concat_layer_size_ = offset_sum;
+
+  SAFTM_CHECK_EQ(model_->hidden_size(), model_->hidden_bias_size());
+  for (int i = 0; i < model_->hidden_size(); ++i) {
+    layer_weights_.push_back(model_->GetHiddenLayerMatrix(i));
+
+    EmbeddingNetworkParams::Matrix bias = model_->GetHiddenLayerBias(i);
+    SAFTM_CHECK_EQ(1, bias.cols);
+    CheckNoQuantization(bias);
+    layer_bias_.push_back(bias);
+  }
+
+  SAFTM_CHECK(model_->HasSoftmax());
+  layer_weights_.push_back(model_->GetSoftmaxMatrix());
+
+  EmbeddingNetworkParams::Matrix softmax_bias = model_->GetSoftmaxBias();
+  SAFTM_CHECK_EQ(1, softmax_bias.cols);
+  CheckNoQuantization(softmax_bias);
+  layer_bias_.push_back(softmax_bias);
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/embedding-network.h b/lang_id/common/embedding-network.h
new file mode 100644
index 0000000..54094d7
--- /dev/null
+++ b/lang_id/common/embedding-network.h

@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_H_
+
+#include <vector>
+
+#include "lang_id/common/embedding-network-params.h"
+#include "lang_id/common/fel/feature-extractor.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Classifier using a hand-coded feed-forward neural network.
+//
+// No gradient computation, just inference.
+//
+// Based on the more general nlp_saft::EmbeddingNetwork (without ::mobile).
+//
+// Classification works as follows:
+//
+// Discrete features -> Embeddings -> Concatenation -> Hidden+ -> Softmax
+//
+// In words: given some discrete features, this class extracts the embeddings
+// for these features, concatenates them, passes them through one or more hidden
+// layers (each layer uses Relu) and next through a softmax layer that computes
+// an unnormalized score for each possible class.  Note: there is always a
+// softmax layer at the end.
+class EmbeddingNetwork {
+ public:
+  // Constructs an embedding network using the parameters from model.
+  //
+  // Note: model should stay alive for at least the lifetime of this
+  // EmbeddingNetwork object.
+  explicit EmbeddingNetwork(const EmbeddingNetworkParams *model);
+
+  virtual ~EmbeddingNetwork() {}
+
+  // Runs forward computation to fill scores with unnormalized output unit
+  // scores. This is useful for making predictions.
+  void ComputeFinalScores(const std::vector<FeatureVector> &features,
+                          std::vector<float> *scores) const;
+
+  // Same as above, but allows specification of extra extra neural network
+  // inputs that will be appended to the embedding vector build from features.
+  void ComputeFinalScores(const std::vector<FeatureVector> &features,
+                          const std::vector<float> &extra_inputs,
+                          std::vector<float> *scores) const;
+
+ private:
+  // Constructs the concatenated input embedding vector in place in output
+  // vector concat.
+  void ConcatEmbeddings(const std::vector<FeatureVector> &features,
+                        std::vector<float> *concat) const;
+
+  // Pointer to the model object passed to the constructor.  Not owned.
+  const EmbeddingNetworkParams *model_;
+
+  // Network parameters.
+
+  // One weight matrix for each embedding.
+  std::vector<EmbeddingNetworkParams::Matrix> embedding_matrices_;
+
+  // embedding_row_size_in_bytes_[i] is the size (in bytes) of a row from
+  // embedding_matrices_[i].  We precompute this in order to quickly find the
+  // beginning of the k-th row from an embedding matrix (which is stored in
+  // row-major order).
+  std::vector<int> embedding_row_size_in_bytes_;
+
+  // concat_offset_[i] is the input layer offset for i-th embedding space.
+  std::vector<int> concat_offset_;
+
+  // Size of the input ("concatenation") layer.
+  int concat_layer_size_ = 0;
+
+  // One weight matrix and one vector of bias weights for each layer of neurons.
+  // Last layer is the softmax layer, the previous ones are the hidden layers.
+  std::vector<EmbeddingNetworkParams::Matrix> layer_weights_;
+  std::vector<EmbeddingNetworkParams::Matrix> layer_bias_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_EMBEDDING_NETWORK_H_

diff --git a/lang_id/common/fel/feature-descriptors.cc b/lang_id/common/fel/feature-descriptors.cc
new file mode 100644
index 0000000..bf03dd5
--- /dev/null
+++ b/lang_id/common/fel/feature-descriptors.cc

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/fel/feature-descriptors.h"
+
+#include "lang_id/common/lite_strings/str-cat.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+void ToFELFunction(const FeatureFunctionDescriptor &function, string *output) {
+  LiteStrAppend(output, function.type());
+  if (function.argument() != 0 || function.parameter_size() > 0) {
+    LiteStrAppend(output, "(");
+    bool first = true;
+    if (function.argument() != 0) {
+      LiteStrAppend(output, function.argument());
+      first = false;
+    }
+    for (int i = 0; i < function.parameter_size(); ++i) {
+      if (!first) LiteStrAppend(output, ",");
+      LiteStrAppend(output, function.parameter(i).name(), "=\"",
+                    function.parameter(i).value(), "\"");
+      first = false;
+    }
+    LiteStrAppend(output, ")");
+  }
+}
+
+void ToFEL(const FeatureFunctionDescriptor &function, string *output) {
+  ToFELFunction(function, output);
+  if (function.feature_size() == 1) {
+    LiteStrAppend(output, ".");
+    ToFEL(function.feature(0), output);
+  } else if (function.feature_size() > 1) {
+    LiteStrAppend(output, " { ");
+    for (int i = 0; i < function.feature_size(); ++i) {
+      if (i > 0) LiteStrAppend(output, " ");
+      ToFEL(function.feature(i), output);
+    }
+    LiteStrAppend(output, " } ");
+  }
+}
+
+void ToFEL(const FeatureExtractorDescriptor &extractor, string *output) {
+  for (int i = 0; i < extractor.feature_size(); ++i) {
+    ToFEL(extractor.feature(i), output);
+    LiteStrAppend(output, "\n");
+  }
+}
+
+string FeatureFunctionDescriptor::DebugString() const {
+  string str;
+  ToFEL(*this, &str);
+  return str;
+}
+
+string FeatureExtractorDescriptor::DebugString() const {
+  string str;
+  ToFEL(*this, &str);
+  return str;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/fel/feature-descriptors.h b/lang_id/common/fel/feature-descriptors.h
new file mode 100644
index 0000000..a9408c9
--- /dev/null
+++ b/lang_id/common/fel/feature-descriptors.h

@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_DESCRIPTORS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_DESCRIPTORS_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Named feature parameter.
+class Parameter {
+ public:
+  Parameter() {}
+
+  void set_name(const string &name) { name_ = name; }
+  const string &name() const { return name_; }
+
+  void set_value(const string &value) { value_ = value; }
+  const string &value() const { return value_; }
+
+ private:
+  string name_;
+  string value_;
+};
+
+// Descriptor for a feature function.  Used to store the results of parsing one
+// feature function.
+class FeatureFunctionDescriptor {
+ public:
+  FeatureFunctionDescriptor() {}
+
+  // Accessors for the feature function type.  The function type is the string
+  // that the feature extractor code is registered under.
+  void set_type(const string &type) { type_ = type; }
+  const string &type() const { return type_; }
+
+  // Accessors for the feature function name.  The function name (if available)
+  // is used for some log messages.  Otherwise, a more precise, but also more
+  // verbose name based on the feature specification is used.
+  void set_name(const string &name) { name_ = name; }
+  const string &name() const { return name_; }
+
+  // Accessors for the default (name-less) parameter.
+  void set_argument(int32 argument) { argument_ = argument; }
+  bool has_argument() const {
+    // If argument has not been specified, clients should treat it as 0.  This
+    // makes the test below correct, without having a separate has_argument_
+    // bool field.
+    return argument_ != 0;
+  }
+  int32 argument() const { return argument_; }
+
+  // Accessors for the named parameters.
+  Parameter *add_parameter() {
+    parameters_.emplace_back();
+    return &(parameters_.back());
+  }
+  int parameter_size() const { return parameters_.size(); }
+  const Parameter &parameter(int i) const {
+    SAFTM_DCHECK((i >= 0) && (i < parameter_size()));
+    return parameters_[i];
+  }
+
+  // Accessors for the sub (i.e., nested) features.  Nested features: as in
+  // offset(1).label.
+  FeatureFunctionDescriptor *add_feature() {
+    sub_features_.emplace_back(new FeatureFunctionDescriptor());
+    return sub_features_.back().get();
+  }
+  int feature_size() const { return sub_features_.size(); }
+  const FeatureFunctionDescriptor &feature(int i) const {
+    SAFTM_DCHECK((i >= 0) && (i < feature_size()));
+    return *(sub_features_[i].get());
+  }
+
+  // Returns human-readable representation of this FeatureFunctionDescriptor.
+  string DebugString() const;
+
+ private:
+  // See comments for set_type().
+  string type_;
+
+  // See comments for set_name().
+  string name_;
+
+  // See comments for set_argument().
+  int32 argument_ = 0;
+
+  // See comemnts for add_parameter().
+  std::vector<Parameter> parameters_;
+
+  // See comments for add_feature().
+  std::vector<std::unique_ptr<FeatureFunctionDescriptor>> sub_features_;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(FeatureFunctionDescriptor);
+};
+
+// List of FeatureFunctionDescriptors.  Used to store the result of parsing the
+// spec for several feature functions.
+class FeatureExtractorDescriptor {
+ public:
+  FeatureExtractorDescriptor() {}
+
+  int feature_size() const { return features_.size(); }
+
+  FeatureFunctionDescriptor *add_feature() {
+    features_.emplace_back(new FeatureFunctionDescriptor());
+    return features_.back().get();
+  }
+
+  const FeatureFunctionDescriptor &feature(int i) const {
+    SAFTM_DCHECK((i >= 0) && (i < feature_size()));
+    return *(features_[i].get());
+  }
+
+  // Returns human-readable representation of this FeatureExtractorDescriptor.
+  string DebugString() const;
+
+ private:
+  std::vector<std::unique_ptr<FeatureFunctionDescriptor>> features_;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(FeatureExtractorDescriptor);
+};
+
+// Appends to |*output| the FEL representation of the top-level feature from
+// |function|, without diving into the nested features.
+void ToFELFunction(const FeatureFunctionDescriptor &function, string *output);
+
+// Appends to |*output| the FEL representation of |function|.
+void ToFEL(const FeatureFunctionDescriptor &function, string *output);
+
+// Appends to |*output| the FEL representation of |extractor|.
+void ToFEL(const FeatureExtractorDescriptor &extractor, string *output);
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_DESCRIPTORS_H_

diff --git a/lang_id/common/fel/feature-extractor.cc b/lang_id/common/fel/feature-extractor.cc
new file mode 100644
index 0000000..c256257
--- /dev/null
+++ b/lang_id/common/fel/feature-extractor.cc

@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/fel/feature-extractor.h"
+
+#include "lang_id/common/fel/feature-types.h"
+#include "lang_id/common/fel/fel-parser.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/numbers.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+constexpr FeatureValue GenericFeatureFunction::kNone;
+
+GenericFeatureExtractor::GenericFeatureExtractor() {}
+
+GenericFeatureExtractor::~GenericFeatureExtractor() {}
+
+bool GenericFeatureExtractor::Parse(const string &source) {
+  // Parse feature specification into descriptor.
+  FELParser parser;
+
+  if (!parser.Parse(source, mutable_descriptor())) {
+    SAFTM_LOG(ERROR) << "Error parsing the FEL spec " << source;
+    return false;
+  }
+
+  // Initialize feature extractor from descriptor.
+  return InitializeFeatureFunctions();
+}
+
+bool GenericFeatureExtractor::InitializeFeatureTypes() {
+  // Register all feature types.
+  GetFeatureTypes(&feature_types_);
+  for (size_t i = 0; i < feature_types_.size(); ++i) {
+    FeatureType *ft = feature_types_[i];
+    ft->set_base(i);
+
+    // Check for feature space overflow.
+    double domain_size = ft->GetDomainSize();
+    if (domain_size < 0) {
+      SAFTM_LOG(ERROR) << "Illegal domain size for feature " << ft->name()
+                       << ": " << domain_size;
+      return false;
+    }
+  }
+  return true;
+}
+
+string GenericFeatureFunction::GetParameter(const string &name,
+                                            const string &default_value) const {
+  // Find named parameter in feature descriptor.
+  for (int i = 0; i < descriptor_->parameter_size(); ++i) {
+    if (name == descriptor_->parameter(i).name()) {
+      return descriptor_->parameter(i).value();
+    }
+  }
+  return default_value;
+}
+
+GenericFeatureFunction::GenericFeatureFunction() {}
+
+GenericFeatureFunction::~GenericFeatureFunction() { delete feature_type_; }
+
+int GenericFeatureFunction::GetIntParameter(const string &name,
+                                            int default_value) const {
+  string value_str = GetParameter(name, "");
+  if (value_str.empty()) {
+    // Parameter not specified, use default value for it.
+    return default_value;
+  }
+  int value = 0;
+  if (!LiteAtoi(value_str, &value)) {
+    SAFTM_LOG(DFATAL) << "Unable to parse '" << value_str
+                      << "' as int for parameter " << name;
+    return default_value;
+  }
+  return value;
+}
+
+bool GenericFeatureFunction::GetBoolParameter(const string &name,
+                                              bool default_value) const {
+  string value = GetParameter(name, "");
+  if (value.empty()) return default_value;
+  if (value == "true") return true;
+  if (value == "false") return false;
+  SAFTM_LOG(DFATAL) << "Illegal value '" << value << "' for bool parameter "
+                    << name;
+  return default_value;
+}
+
+void GenericFeatureFunction::GetFeatureTypes(
+    std::vector<FeatureType *> *types) const {
+  if (feature_type_ != nullptr) types->push_back(feature_type_);
+}
+
+FeatureType *GenericFeatureFunction::GetFeatureType() const {
+  // If a single feature type has been registered return it.
+  if (feature_type_ != nullptr) return feature_type_;
+
+  // Get feature types for function.
+  std::vector<FeatureType *> types;
+  GetFeatureTypes(&types);
+
+  // If there is exactly one feature type return this, else return null.
+  if (types.size() == 1) return types[0];
+  return nullptr;
+}
+
+string GenericFeatureFunction::name() const {
+  string output;
+  if (descriptor_->name().empty()) {
+    if (!prefix_.empty()) {
+      output.append(prefix_);
+      output.append(".");
+    }
+    ToFEL(*descriptor_, &output);
+  } else {
+    output = descriptor_->name();
+  }
+  return output;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/fel/feature-extractor.h b/lang_id/common/fel/feature-extractor.h
new file mode 100644
index 0000000..8763852
--- /dev/null
+++ b/lang_id/common/fel/feature-extractor.h

@@ -0,0 +1,651 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generic feature extractor for extracting features from objects. The feature
+// extractor can be used for extracting features from any object. The feature
+// extractor and feature function classes are template classes that have to
+// be instantiated for extracting feature from a specific object type.
+//
+// A feature extractor consists of a hierarchy of feature functions. Each
+// feature function extracts one or more feature type and value pairs from the
+// object.
+//
+// The feature extractor has a modular design where new feature functions can be
+// registered as components. The feature extractor is initialized from a
+// descriptor represented by a protocol buffer. The feature extractor can also
+// be initialized from a text-based source specification of the feature
+// extractor. Feature specification parsers can be added as components. By
+// default the feature extractor can be read from an ASCII protocol buffer or in
+// a simple feature modeling language (fml).
+
+// A feature function is invoked with a focus. Nested feature function can be
+// invoked with another focus determined by the parent feature function.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_
+
+#include <stddef.h>
+
+#include <string>
+#include <vector>
+
+#include "lang_id/common/fel/feature-descriptors.h"
+#include "lang_id/common/fel/feature-types.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/common/lite_base/attributes.h"
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+#include "lang_id/common/registry.h"
+#include "lang_id/common/stl-util.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// TODO(djweiss) Clean this up as well.
+// Use the same type for feature values as is used for predicated.
+typedef int64 Predicate;
+typedef Predicate FeatureValue;
+
+// A union used to represent discrete and continuous feature values.
+union FloatFeatureValue {
+ public:
+  explicit FloatFeatureValue(FeatureValue v) : discrete_value(v) {}
+  FloatFeatureValue(uint32 i, float w) : id(i), weight(w) {}
+  FeatureValue discrete_value;
+  struct {
+    uint32 id;
+    float weight;
+  };
+};
+
+// A feature vector contains feature type and value pairs.
+class FeatureVector {
+ public:
+  FeatureVector() {}
+
+  // Adds feature type and value pair to feature vector.
+  void add(FeatureType *type, FeatureValue value) {
+    features_.emplace_back(type, value);
+  }
+
+  // Removes all elements from the feature vector.
+  void clear() { features_.clear(); }
+
+  // Returns the number of elements in the feature vector.
+  int size() const { return features_.size(); }
+
+  // Reserves space in the underlying feature vector.
+  void reserve(int n) { features_.reserve(n); }
+
+  // Returns feature type for an element in the feature vector.
+  FeatureType *type(int index) const { return features_[index].type; }
+
+  // Returns feature value for an element in the feature vector.
+  FeatureValue value(int index) const { return features_[index].value; }
+
+ private:
+  // Structure for holding feature type and value pairs.
+  struct Element {
+    Element() : type(nullptr), value(-1) {}
+    Element(FeatureType *t, FeatureValue v) : type(t), value(v) {}
+
+    FeatureType *type;
+    FeatureValue value;
+  };
+
+  // Array for storing feature vector elements.
+  std::vector<Element> features_;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(FeatureVector);
+};
+
+// The generic feature extractor is the type-independent part of a feature
+// extractor. This holds the descriptor for the feature extractor and the
+// collection of feature types used in the feature extractor.  The feature
+// types are not available until FeatureExtractor<>::Init() has been called.
+class GenericFeatureExtractor {
+ public:
+  GenericFeatureExtractor();
+  virtual ~GenericFeatureExtractor();
+
+  // Initializes the feature extractor from the FEL specification |source|.
+  //
+  // Returns true on success, false otherwise (e.g., FEL syntax error).
+  SAFTM_MUST_USE_RESULT bool Parse(const string &source);
+
+  // Returns the feature extractor descriptor.
+  const FeatureExtractorDescriptor &descriptor() const { return descriptor_; }
+  FeatureExtractorDescriptor *mutable_descriptor() { return &descriptor_; }
+
+  // Returns the number of feature types in the feature extractor.  Invalid
+  // before Init() has been called.
+  int feature_types() const { return feature_types_.size(); }
+
+ protected:
+  // Initializes the feature types used by the extractor.  Called from
+  // FeatureExtractor<>::Init().
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool InitializeFeatureTypes();
+
+ private:
+  // Initializes the top-level feature functions.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool InitializeFeatureFunctions() = 0;
+
+  // Returns all feature types used by the extractor. The feature types are
+  // added to the result array.
+  virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const = 0;
+
+  // Descriptor for the feature extractor. This is a protocol buffer that
+  // contains all the information about the feature extractor. The feature
+  // functions are initialized from the information in the descriptor.
+  FeatureExtractorDescriptor descriptor_;
+
+  // All feature types used by the feature extractor. The collection of all the
+  // feature types describes the feature space of the feature set produced by
+  // the feature extractor.  Not owned.
+  std::vector<FeatureType *> feature_types_;
+};
+
+// The generic feature function is the type-independent part of a feature
+// function. Each feature function is associated with the descriptor that it is
+// instantiated from.  The feature types associated with this feature function
+// will be established by the time FeatureExtractor<>::Init() completes.
+class GenericFeatureFunction {
+ public:
+  // A feature value that represents the absence of a value.
+  static constexpr FeatureValue kNone = -1;
+
+  GenericFeatureFunction();
+  virtual ~GenericFeatureFunction();
+
+  // Sets up the feature function. NB: FeatureTypes of nested functions are not
+  // guaranteed to be available until Init().
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool Setup(TaskContext *context) {
+    return true;
+  }
+
+  // Initializes the feature function. NB: The FeatureType of this function must
+  // be established when this method completes.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool Init(TaskContext *context) { return true; }
+
+  // Requests workspaces from a registry to obtain indices into a WorkspaceSet
+  // for any Workspace objects used by this feature function. NB: This will be
+  // called after Init(), so it can depend on resources and arguments.
+  virtual void RequestWorkspaces(WorkspaceRegistry *registry) {}
+
+  // Appends the feature types produced by the feature function to types.  The
+  // default implementation appends feature_type(), if non-null.  Invalid
+  // before Init() has been called.
+  virtual void GetFeatureTypes(std::vector<FeatureType *> *types) const;
+
+  // Returns the feature type for feature produced by this feature function. If
+  // the feature function produces features of different types this returns
+  // null.  Invalid before Init() has been called.
+  virtual FeatureType *GetFeatureType() const;
+
+  // Returns value of parameter |name| from the feature function descriptor.
+  // If the parameter is not present, returns the indicated |default_value|.
+  string GetParameter(const string &name, const string &default_value) const;
+
+  // Returns value of int parameter |name| from feature function descriptor.
+  // If the parameter is not present, or its value can't be parsed as an int,
+  // returns |default_value|.
+  int GetIntParameter(const string &name, int default_value) const;
+
+  // Returns value of bool parameter |name| from feature function descriptor.
+  // If the parameter is not present, or its value is not "true" or "false",
+  // returns |default_value|.  NOTE: this method is case sensitive, it doesn't
+  // do any lower-casing.
+  bool GetBoolParameter(const string &name, bool default_value) const;
+
+  // Returns the FEL function description for the feature function, i.e. the
+  // name and parameters without the nested features.
+  string FunctionName() const {
+    string output;
+    ToFELFunction(*descriptor_, &output);
+    return output;
+  }
+
+  // Returns the prefix for nested feature functions. This is the prefix of this
+  // feature function concatenated with the feature function name.
+  string SubPrefix() const {
+    return prefix_.empty() ? FunctionName() : prefix_ + "." + FunctionName();
+  }
+
+  // Returns/sets the feature extractor this function belongs to.
+  const GenericFeatureExtractor *extractor() const { return extractor_; }
+  void set_extractor(const GenericFeatureExtractor *extractor) {
+    extractor_ = extractor;
+  }
+
+  // Returns/sets the feature function descriptor.
+  const FeatureFunctionDescriptor *descriptor() const { return descriptor_; }
+  void set_descriptor(const FeatureFunctionDescriptor *descriptor) {
+    descriptor_ = descriptor;
+  }
+
+  // Returns a descriptive name for the feature function. The name is taken from
+  // the descriptor for the feature function. If the name is empty or the
+  // feature function is a variable the name is the FEL representation of the
+  // feature, including the prefix.
+  string name() const;
+
+  // Returns the argument from the feature function descriptor. It defaults to
+  // 0 if the argument has not been specified.
+  int argument() const {
+    return descriptor_->has_argument() ? descriptor_->argument() : 0;
+  }
+
+  // Returns/sets/clears function name prefix.
+  const string &prefix() const { return prefix_; }
+  void set_prefix(const string &prefix) { prefix_ = prefix; }
+
+ protected:
+  // Returns the feature type for single-type feature functions.
+  FeatureType *feature_type() const { return feature_type_; }
+
+  // Sets the feature type for single-type feature functions.  This takes
+  // ownership of feature_type.  Can only be called once.
+  void set_feature_type(FeatureType *feature_type) {
+    SAFTM_CHECK_EQ(feature_type_, nullptr);
+    feature_type_ = feature_type;
+  }
+
+ private:
+  // Feature extractor this feature function belongs to.  Not owned.  Set to a
+  // pointer != nullptr as soon as this object is created by Instantiate().
+  // Normal methods can safely assume this is != nullptr.
+  const GenericFeatureExtractor *extractor_ = nullptr;
+
+  // Descriptor for feature function.  Not owned.  Set to a pointer != nullptr
+  // as soon as this object is created by Instantiate().  Normal methods can
+  // safely assume this is != nullptr.
+  const FeatureFunctionDescriptor *descriptor_ = nullptr;
+
+  // Feature type for features produced by this feature function. If the
+  // feature function produces features of multiple feature types this is null
+  // and the feature function must return it's feature types in
+  // GetFeatureTypes().  Owned.
+  FeatureType *feature_type_ = nullptr;
+
+  // Prefix used for sub-feature types of this function.
+  string prefix_;
+};
+
+// Feature function that can extract features from an object.  Templated on
+// two type arguments:
+//
+// OBJ:  The "object" from which features are extracted; e.g., a sentence.  This
+//       should be a plain type, rather than a reference or pointer.
+//
+// ARGS: A set of 0 or more types that are used to "index" into some part of the
+//       object that should be extracted, e.g. an int token index for a sentence
+//       object.  This should not be a reference type.
+template <class OBJ, class... ARGS>
+class FeatureFunction
+    : public GenericFeatureFunction,
+      public RegisterableClass<FeatureFunction<OBJ, ARGS...> > {
+ public:
+  using Self = FeatureFunction<OBJ, ARGS...>;
+
+  // Preprocesses the object.  This will be called prior to calling Evaluate()
+  // or Compute() on that object.
+  virtual void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const {}
+
+  // Appends features computed from the object and focus to the result.  The
+  // default implementation delegates to Compute(), adding a single value if
+  // available.  Multi-valued feature functions must override this method.
+  virtual void Evaluate(const WorkspaceSet &workspaces, const OBJ &object,
+                        ARGS... args, FeatureVector *result) const {
+    FeatureValue value = Compute(workspaces, object, args...);
+    if (value != kNone) result->add(feature_type(), value);
+  }
+
+  // Returns a feature value computed from the object and focus, or kNone if no
+  // value is computed.  Single-valued feature functions only need to override
+  // this method.
+  virtual FeatureValue Compute(const WorkspaceSet &workspaces,
+                               const OBJ &object, ARGS... args) const {
+    return kNone;
+  }
+
+  // Instantiates a new feature function in a feature extractor from a feature
+  // descriptor.
+  //
+  // Returns a pointer to the newly-created object if everything goes well.
+  // Returns nullptr if the feature function could not be instantiated (e.g., if
+  // the function with that name is not registered; this usually happens because
+  // the relevant cc_library was not linked-in).
+  static Self *Instantiate(const GenericFeatureExtractor *extractor,
+                           const FeatureFunctionDescriptor *fd,
+                           const string &prefix) {
+    Self *f = Self::Create(fd->type());
+    if (f != nullptr) {
+      f->set_extractor(extractor);
+      f->set_descriptor(fd);
+      f->set_prefix(prefix);
+    }
+    return f;
+  }
+
+ private:
+  // Special feature function class for resolving variable references. The type
+  // of the feature function is used for resolving the variable reference. When
+  // evaluated it will either get the feature value(s) from the variable portion
+  // of the feature vector, if present, or otherwise it will call the referenced
+  // feature extractor function directly to extract the feature(s).
+  class Reference;
+};
+
+// Base class for features with nested feature functions. The nested functions
+// are of type NES, which may be different from the type of the parent function.
+// NB: NestedFeatureFunction will ensure that all initialization of nested
+// functions takes place during Setup() and Init() -- after the nested features
+// are initialized, the parent feature is initialized via SetupNested() and
+// InitNested(). Alternatively, a derived classes that overrides Setup() and
+// Init() directly should call Parent::Setup(), Parent::Init(), etc. first.
+//
+// Note: NestedFeatureFunction cannot know how to call Preprocess, Evaluate, or
+// Compute, since the nested functions may be of a different type.
+template <class NES, class OBJ, class... ARGS>
+class NestedFeatureFunction : public FeatureFunction<OBJ, ARGS...> {
+ public:
+  using Parent = NestedFeatureFunction<NES, OBJ, ARGS...>;
+
+  // Clean up nested functions.
+  ~NestedFeatureFunction() override { utils::STLDeleteElements(&nested_); }
+
+  // By default, just appends the nested feature types.
+  void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
+    SAFTM_CHECK(!this->nested().empty())
+        << "Nested features require nested features to be defined.";
+    for (auto *function : nested_) function->GetFeatureTypes(types);
+  }
+
+  // Sets up the nested features.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) override {
+    bool success = CreateNested(this->extractor(), this->descriptor(), &nested_,
+                                this->SubPrefix());
+    if (!success) return false;
+    for (auto *function : nested_) {
+      if (!function->Setup(context)) return false;
+    }
+    if (!SetupNested(context)) return false;
+    return true;
+  }
+
+  // Sets up this NestedFeatureFunction specifically.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool SetupNested(TaskContext *context) {
+    return true;
+  }
+
+  // Initializes the nested features.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) override {
+    for (auto *function : nested_) {
+      if (!function->Init(context)) return false;
+    }
+    if (!InitNested(context)) return false;
+    return true;
+  }
+
+  // Initializes this NestedFeatureFunction specifically.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT virtual bool InitNested(TaskContext *context) {
+    return true;
+  }
+
+  // Gets all the workspaces needed for the nested functions.
+  void RequestWorkspaces(WorkspaceRegistry *registry) override {
+    for (auto *function : nested_) function->RequestWorkspaces(registry);
+  }
+
+  // Returns the list of nested feature functions.
+  const std::vector<NES *> &nested() const { return nested_; }
+
+  // Instantiates nested feature functions for a feature function. Creates and
+  // initializes one feature function for each sub-descriptor in the feature
+  // descriptor.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT static bool CreateNested(
+      const GenericFeatureExtractor *extractor,
+      const FeatureFunctionDescriptor *fd, std::vector<NES *> *functions,
+      const string &prefix) {
+    for (int i = 0; i < fd->feature_size(); ++i) {
+      const FeatureFunctionDescriptor &sub = fd->feature(i);
+      NES *f = NES::Instantiate(extractor, &sub, prefix);
+      if (f == nullptr) return false;
+      functions->push_back(f);
+    }
+    return true;
+  }
+
+ protected:
+  // The nested feature functions, if any, in order of declaration in the
+  // feature descriptor.  Owned.
+  std::vector<NES *> nested_;
+};
+
+// Base class for a nested feature function that takes nested features with the
+// same signature as these features, i.e. a meta feature. For this class, we can
+// provide preprocessing of the nested features.
+template <class OBJ, class... ARGS>
+class MetaFeatureFunction
+    : public NestedFeatureFunction<FeatureFunction<OBJ, ARGS...>, OBJ,
+                                   ARGS...> {
+ public:
+  // Preprocesses using the nested features.
+  void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override {
+    for (auto *function : this->nested_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+};
+
+// Template for a special type of locator: The locator of type
+// FeatureFunction<OBJ, ARGS...> calls nested functions of type
+// FeatureFunction<OBJ, IDX, ARGS...>, where the derived class DER is
+// responsible for translating by providing the following:
+//
+// // Gets the new additional focus.
+// IDX GetFocus(const WorkspaceSet &workspaces, const OBJ &object);
+//
+// This is useful to e.g. add a token focus to a parser state based on some
+// desired property of that state.
+template <class DER, class OBJ, class IDX, class... ARGS>
+class FeatureAddFocusLocator
+    : public NestedFeatureFunction<FeatureFunction<OBJ, IDX, ARGS...>, OBJ,
+                                   ARGS...> {
+ public:
+  void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const override {
+    for (auto *function : this->nested_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+
+  void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
+                FeatureVector *result) const override {
+    IDX focus =
+        static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
+    for (auto *function : this->nested()) {
+      function->Evaluate(workspaces, object, focus, args..., result);
+    }
+  }
+
+  // Returns the first nested feature's computed value.
+  FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
+                       ARGS... args) const override {
+    IDX focus =
+        static_cast<const DER *>(this)->GetFocus(workspaces, object, args...);
+    return this->nested()[0]->Compute(workspaces, object, focus, args...);
+  }
+};
+
+// CRTP feature locator class. This is a meta feature that modifies ARGS and
+// then calls the nested feature functions with the modified ARGS. Note that in
+// order for this template to work correctly, all of ARGS must be types for
+// which the reference operator & can be interpreted as a pointer to the
+// argument. The derived class DER must implement the UpdateFocus method which
+// takes pointers to the ARGS arguments:
+//
+// // Updates the current arguments.
+// void UpdateArgs(const OBJ &object, ARGS *...args) const;
+template <class DER, class OBJ, class... ARGS>
+class FeatureLocator : public MetaFeatureFunction<OBJ, ARGS...> {
+ public:
+  // Feature locators have an additional check that there is no intrinsic type.
+  void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
+    SAFTM_CHECK_EQ(this->feature_type(), nullptr)
+        << "FeatureLocators should not have an intrinsic type.";
+    MetaFeatureFunction<OBJ, ARGS...>::GetFeatureTypes(types);
+  }
+
+  // Evaluates the locator.
+  void Evaluate(const WorkspaceSet &workspaces, const OBJ &object, ARGS... args,
+                FeatureVector *result) const override {
+    static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
+    for (auto *function : this->nested()) {
+      function->Evaluate(workspaces, object, args..., result);
+    }
+  }
+
+  // Returns the first nested feature's computed value.
+  FeatureValue Compute(const WorkspaceSet &workspaces, const OBJ &object,
+                       ARGS... args) const override {
+    static_cast<const DER *>(this)->UpdateArgs(workspaces, object, &args...);
+    return this->nested()[0]->Compute(workspaces, object, args...);
+  }
+};
+
+// Feature extractor for extracting features from objects of a certain class.
+// Template type parameters are as defined for FeatureFunction.
+template <class OBJ, class... ARGS>
+class FeatureExtractor : public GenericFeatureExtractor {
+ public:
+  // Feature function type for top-level functions in the feature extractor.
+  typedef FeatureFunction<OBJ, ARGS...> Function;
+  typedef FeatureExtractor<OBJ, ARGS...> Self;
+
+  // Feature locator type for the feature extractor.
+  template <class DER>
+  using Locator = FeatureLocator<DER, OBJ, ARGS...>;
+
+  // Initializes feature extractor.
+  FeatureExtractor() {}
+
+  ~FeatureExtractor() override { utils::STLDeleteElements(&functions_); }
+
+  // Sets up the feature extractor. Note that only top-level functions exist
+  // until Setup() is called. This does not take ownership over the context,
+  // which must outlive this.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool Setup(TaskContext *context) {
+    for (Function *function : functions_) {
+      if (!function->Setup(context)) return false;
+    }
+    return true;
+  }
+
+  // Initializes the feature extractor.  Must be called after Setup().  This
+  // does not take ownership over the context, which must outlive this.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool Init(TaskContext *context) {
+    for (Function *function : functions_) {
+      if (!function->Init(context)) return false;
+    }
+    if (!this->InitializeFeatureTypes()) return false;
+    return true;
+  }
+
+  // Requests workspaces from the registry. Must be called after Init(), and
+  // before Preprocess(). Does not take ownership over registry. This should be
+  // the same registry used to initialize the WorkspaceSet used in Preprocess()
+  // and ExtractFeatures(). NB: This is a different ordering from that used in
+  // SentenceFeatureRepresentation style feature computation.
+  void RequestWorkspaces(WorkspaceRegistry *registry) {
+    for (auto *function : functions_) function->RequestWorkspaces(registry);
+  }
+
+  // Preprocesses the object using feature functions for the phase.  Must be
+  // called before any calls to ExtractFeatures() on that object and phase.
+  void Preprocess(WorkspaceSet *workspaces, const OBJ *object) const {
+    for (Function *function : functions_) {
+      function->Preprocess(workspaces, object);
+    }
+  }
+
+  // Extracts features from an object with a focus. This invokes all the
+  // top-level feature functions in the feature extractor. Only feature
+  // functions belonging to the specified phase are invoked.
+  void ExtractFeatures(const WorkspaceSet &workspaces, const OBJ &object,
+                       ARGS... args, FeatureVector *result) const {
+    result->reserve(this->feature_types());
+
+    // Extract features.
+    for (int i = 0; i < functions_.size(); ++i) {
+      functions_[i]->Evaluate(workspaces, object, args..., result);
+    }
+  }
+
+ private:
+  // Creates and initializes all feature functions in the feature extractor.
+  //
+  // Returns true on success, false otherwise.
+  SAFTM_MUST_USE_RESULT bool InitializeFeatureFunctions() override {
+    // Create all top-level feature functions.
+    for (int i = 0; i < descriptor().feature_size(); ++i) {
+      const FeatureFunctionDescriptor &fd = descriptor().feature(i);
+      Function *function = Function::Instantiate(this, &fd, "");
+      if (function == nullptr) return false;
+      functions_.push_back(function);
+    }
+    return true;
+  }
+
+  // Collect all feature types used in the feature extractor.
+  void GetFeatureTypes(std::vector<FeatureType *> *types) const override {
+    for (int i = 0; i < functions_.size(); ++i) {
+      functions_[i]->GetFeatureTypes(types);
+    }
+  }
+
+  // Top-level feature functions (and variables) in the feature extractor.
+  // Owned.
+  std::vector<Function *> functions_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_EXTRACTOR_H_

diff --git a/lang_id/common/fel/feature-types.h b/lang_id/common/fel/feature-types.h
new file mode 100644
index 0000000..18cf69a
--- /dev/null
+++ b/lang_id/common/fel/feature-types.h

@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Common feature types for parser components.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_
+
+#include <algorithm>
+#include <map>
+#include <string>
+#include <utility>
+
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/str-cat.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// TODO(djweiss) Clean this up as well.
+// Use the same type for feature values as is used for predicated.
+typedef int64 Predicate;
+typedef Predicate FeatureValue;
+
+// Each feature value in a feature vector has a feature type. The feature type
+// is used for converting feature type and value pairs to predicate values. The
+// feature type can also return names for feature values and calculate the size
+// of the feature value domain. The FeatureType class is abstract and must be
+// specialized for the concrete feature types.
+class FeatureType {
+ public:
+  // Initializes a feature type.
+  explicit FeatureType(const string &name)
+      : name_(name), base_(0),
+        is_continuous_(name.find("continuous") != string::npos) {
+  }
+
+  virtual ~FeatureType() {}
+
+  // Converts a feature value to a name.
+  virtual string GetFeatureValueName(FeatureValue value) const = 0;
+
+  // Returns the size of the feature values domain.
+  virtual int64 GetDomainSize() const = 0;
+
+  // Returns the feature type name.
+  const string &name() const { return name_; }
+
+  Predicate base() const { return base_; }
+  void set_base(Predicate base) { base_ = base; }
+
+  // Returns true iff this feature is continuous; see FloatFeatureValue.
+  bool is_continuous() const { return is_continuous_; }
+
+ private:
+  // Feature type name.
+  string name_;
+
+  // "Base" feature value: i.e. a "slot" in a global ordering of features.
+  Predicate base_;
+
+  // See doc for is_continuous().
+  bool is_continuous_;
+};
+
+// Feature type that is defined using an explicit map from FeatureValue to
+// string values.  This can reduce some of the boilerplate when defining
+// features that generate enum values.  Example usage:
+//
+//   class BeverageSizeFeature : public FeatureFunction<Beverage>
+//     enum FeatureValue { SMALL, MEDIUM, LARGE };  // values for this feature
+//     void Init(TaskContext *context) override {
+//       set_feature_type(new EnumFeatureType("beverage_size",
+//           {{SMALL, "SMALL"}, {MEDIUM, "MEDIUM"}, {LARGE, "LARGE"}});
+//     }
+//     [...]
+//   };
+class EnumFeatureType : public FeatureType {
+ public:
+  EnumFeatureType(const string &name,
+                  const std::map<FeatureValue, string> &value_names)
+      : FeatureType(name), value_names_(value_names) {
+    for (const auto &pair : value_names) {
+      SAFTM_CHECK_GE(pair.first, 0)
+          << "Invalid feature value: " << pair.first << ", " << pair.second;
+      domain_size_ = std::max(domain_size_, pair.first + 1);
+    }
+  }
+
+  // Returns the feature name for a given feature value.
+  string GetFeatureValueName(FeatureValue value) const override {
+    auto it = value_names_.find(value);
+    if (it == value_names_.end()) {
+      SAFTM_LOG(ERROR) << "Invalid feature value " << value << " for "
+                       << name();
+      return "<INVALID>";
+    }
+    return it->second;
+  }
+
+  // Returns the number of possible values for this feature type. This is one
+  // greater than the largest value in the value_names map.
+  FeatureValue GetDomainSize() const override { return domain_size_; }
+
+ protected:
+  // Maximum possible value this feature could take.
+  FeatureValue domain_size_ = 0;
+
+  // Names of feature values.
+  std::map<FeatureValue, string> value_names_;
+};
+
+// Feature type for binary features.
+class BinaryFeatureType : public FeatureType {
+ public:
+  BinaryFeatureType(const string &name, const string &off, const string &on)
+      : FeatureType(name), off_(off), on_(on) {}
+
+  // Returns the feature name for a given feature value.
+  string GetFeatureValueName(FeatureValue value) const override {
+    if (value == 0) return off_;
+    if (value == 1) return on_;
+    return "";
+  }
+
+  // Binary features always have two feature values.
+  FeatureValue GetDomainSize() const override { return 2; }
+
+ private:
+  // Feature value names for on and off.
+  string off_;
+  string on_;
+};
+
+// Feature type for numeric features.
+class NumericFeatureType : public FeatureType {
+ public:
+  // Initializes numeric feature.
+  NumericFeatureType(const string &name, FeatureValue size)
+      : FeatureType(name), size_(size) {}
+
+  // Returns numeric feature value.
+  string GetFeatureValueName(FeatureValue value) const override {
+    if (value < 0) return "";
+    return LiteStrCat(value);
+  }
+
+  // Returns the number of feature values.
+  FeatureValue GetDomainSize() const override { return size_; }
+
+ private:
+  // The underlying size of the numeric feature.
+  FeatureValue size_;
+};
+
+// Feature type for byte features, including an "outside" value.
+class ByteFeatureType : public NumericFeatureType {
+ public:
+  explicit ByteFeatureType(const string &name)
+      : NumericFeatureType(name, 257) {}
+
+  string GetFeatureValueName(FeatureValue value) const override {
+    if (value == 256) {
+      return "<NULL>";
+    }
+    string result;
+    result += static_cast<char>(value);
+    return result;
+  }
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEATURE_TYPES_H_

diff --git a/lang_id/common/fel/fel-parser.cc b/lang_id/common/fel/fel-parser.cc
new file mode 100644
index 0000000..4346fb7
--- /dev/null
+++ b/lang_id/common/fel/fel-parser.cc

@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/fel/fel-parser.h"
+
+#include <ctype.h>
+#include <string>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/numbers.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace {
+inline bool IsValidCharAtStartOfIdentifier(char c) {
+  return isalpha(c) || (c == '_') || (c == '/');
+}
+
+// Returns true iff character c can appear inside an identifier.
+inline bool IsValidCharInsideIdentifier(char c) {
+  return isalnum(c) || (c == '_') || (c == '-') || (c == '/');
+}
+
+// Returns true iff character c can appear at the beginning of a number.
+inline bool IsValidCharAtStartOfNumber(char c) {
+  return isdigit(c) || (c == '+') || (c == '-');
+}
+
+// Returns true iff character c can appear inside a number.
+inline bool IsValidCharInsideNumber(char c) {
+  return isdigit(c) || (c == '.');
+}
+}  // namespace
+
+bool FELParser::Initialize(const string &source) {
+  // Initialize parser state.
+  source_ = source;
+  current_ = source_.begin();
+  item_start_ = line_start_ = current_;
+  line_number_ = item_line_number_ = 1;
+
+  // Read first input item.
+  return NextItem();
+}
+
+void FELParser::ReportError(const string &error_message) {
+  const int position = item_start_ - line_start_ + 1;
+  const string line(line_start_, current_);
+
+  SAFTM_LOG(ERROR) << "Error in feature model, line " << item_line_number_
+                   << ", position " << position << ": " << error_message
+                   << "\n    " << line << " <--HERE";
+}
+
+void FELParser::Next() {
+  // Move to the next input character. If we are at a line break update line
+  // number and line start position.
+  if (CurrentChar() == '\n') {
+    ++line_number_;
+    ++current_;
+    line_start_ = current_;
+  } else {
+    ++current_;
+  }
+}
+
+bool FELParser::NextItem() {
+  // Skip white space and comments.
+  while (!eos()) {
+    if (CurrentChar() == '#') {
+      // Skip comment.
+      while (!eos() && CurrentChar() != '\n') Next();
+    } else if (isspace(CurrentChar())) {
+      // Skip whitespace.
+      while (!eos() && isspace(CurrentChar())) Next();
+    } else {
+      break;
+    }
+  }
+
+  // Record start position for next item.
+  item_start_ = current_;
+  item_line_number_ = line_number_;
+
+  // Check for end of input.
+  if (eos()) {
+    item_type_ = END;
+    return true;
+  }
+
+  // Parse number.
+  if (IsValidCharAtStartOfNumber(CurrentChar())) {
+    string::iterator start = current_;
+    Next();
+    while (!eos() && IsValidCharInsideNumber(CurrentChar())) Next();
+    item_text_.assign(start, current_);
+    item_type_ = NUMBER;
+    return true;
+  }
+
+  // Parse string.
+  if (CurrentChar() == '"') {
+    Next();
+    string::iterator start = current_;
+    while (CurrentChar() != '"') {
+      if (eos()) {
+        ReportError("Unterminated string");
+        return false;
+      }
+      Next();
+    }
+    item_text_.assign(start, current_);
+    item_type_ = STRING;
+    Next();
+    return true;
+  }
+
+  // Parse identifier name.
+  if (IsValidCharAtStartOfIdentifier(CurrentChar())) {
+    string::iterator start = current_;
+    while (!eos() && IsValidCharInsideIdentifier(CurrentChar())) {
+      Next();
+    }
+    item_text_.assign(start, current_);
+    item_type_ = NAME;
+    return true;
+  }
+
+  // Single character item.
+  item_type_ = CurrentChar();
+  Next();
+  return true;
+}
+
+bool FELParser::Parse(const string &source,
+                      FeatureExtractorDescriptor *result) {
+  // Initialize parser.
+  if (!Initialize(source)) {
+    return false;
+  }
+
+  while (item_type_ != END) {
+    // Current item should be a feature name.
+    if (item_type_ != NAME) {
+      ReportError("Feature type name expected");
+      return false;
+    }
+    string name = item_text_;
+    if (!NextItem()) {
+      return false;
+    }
+
+    if (item_type_ == '=') {
+      ReportError("Invalid syntax: feature expected");
+      return false;
+    } else {
+      // Parse feature.
+      FeatureFunctionDescriptor *descriptor = result->add_feature();
+      descriptor->set_type(name);
+      if (!ParseFeature(descriptor)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool FELParser::ParseFeature(FeatureFunctionDescriptor *result) {
+  // Parse argument and parameters.
+  if (item_type_ == '(') {
+    if (!NextItem()) return false;
+    if (!ParseParameter(result)) return false;
+    while (item_type_ == ',') {
+      if (!NextItem()) return false;
+      if (!ParseParameter(result)) return false;
+    }
+
+    if (item_type_ != ')') {
+      ReportError(") expected");
+      return false;
+    }
+    if (!NextItem()) return false;
+  }
+
+  // Parse feature name.
+  if (item_type_ == ':') {
+    if (!NextItem()) return false;
+    if (item_type_ != NAME && item_type_ != STRING) {
+      ReportError("Feature name expected");
+      return false;
+    }
+    string name = item_text_;
+    if (!NextItem()) return false;
+
+    // Set feature name.
+    result->set_name(name);
+  }
+
+  // Parse sub-features.
+  if (item_type_ == '.') {
+    // Parse dotted sub-feature.
+    if (!NextItem()) return false;
+    if (item_type_ != NAME) {
+      ReportError("Feature type name expected");
+      return false;
+    }
+    string type = item_text_;
+    if (!NextItem()) return false;
+
+    // Parse sub-feature.
+    FeatureFunctionDescriptor *subfeature = result->add_feature();
+    subfeature->set_type(type);
+    if (!ParseFeature(subfeature)) return false;
+  } else if (item_type_ == '{') {
+    // Parse sub-feature block.
+    if (!NextItem()) return false;
+    while (item_type_ != '}') {
+      if (item_type_ != NAME) {
+        ReportError("Feature type name expected");
+        return false;
+      }
+      string type = item_text_;
+      if (!NextItem()) return false;
+
+      // Parse sub-feature.
+      FeatureFunctionDescriptor *subfeature = result->add_feature();
+      subfeature->set_type(type);
+      if (!ParseFeature(subfeature)) return false;
+    }
+    if (!NextItem()) return false;
+  }
+  return true;
+}
+
+bool FELParser::ParseParameter(FeatureFunctionDescriptor *result) {
+  if (item_type_ == NUMBER) {
+    int argument;
+    if (!LiteAtoi(item_text_, &argument)) {
+      ReportError("Unable to parse number");
+      return false;
+    }
+    if (!NextItem()) return false;
+
+    // Set default argument for feature.
+    result->set_argument(argument);
+  } else if (item_type_ == NAME) {
+    string name = item_text_;
+    if (!NextItem()) return false;
+    if (item_type_ != '=') {
+      ReportError("= expected");
+      return false;
+    }
+    if (!NextItem()) return false;
+    if (item_type_ >= END) {
+      ReportError("Parameter value expected");
+      return false;
+    }
+    string value = item_text_;
+    if (!NextItem()) return false;
+
+    // Add parameter to feature.
+    Parameter *parameter;
+    parameter = result->add_parameter();
+    parameter->set_name(name);
+    parameter->set_value(value);
+  } else {
+    ReportError("Syntax error in parameter list");
+    return false;
+  }
+  return true;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/fel/fel-parser.h b/lang_id/common/fel/fel-parser.h
new file mode 100644
index 0000000..eacb442
--- /dev/null
+++ b/lang_id/common/fel/fel-parser.h

@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Feature extraction language (FEL) parser.
+//
+// BNF grammar for FEL:
+//
+// <feature model> ::= { <feature extractor> }
+//
+// <feature extractor> ::= <extractor spec> |
+//                         <extractor spec> '.' <feature extractor> |
+//                         <extractor spec> '{' { <feature extractor> } '}'
+//
+// <extractor spec> ::= <extractor type>
+//                      [ '(' <parameter list> ')' ]
+//                      [ ':' <extractor name> ]
+//
+// <parameter list> = ( <parameter> | <argument> ) { ',' <parameter> }
+//
+// <parameter> ::= <parameter name> '=' <parameter value>
+//
+// <extractor type> ::= NAME
+// <extractor name> ::= NAME | STRING
+// <argument> ::= NUMBER
+// <parameter name> ::= NAME
+// <parameter value> ::= NUMBER | STRING | NAME
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_
+
+#include <string>
+
+#include "lang_id/common/fel/feature-descriptors.h"
+#include "lang_id/common/lite_base/logging.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+class FELParser {
+ public:
+  // Parses fml specification into feature extractor descriptor.
+  // Returns true on success, false on error (e.g., syntax errors).
+  bool Parse(const string &source, FeatureExtractorDescriptor *result);
+
+ private:
+  // Initializes the parser with the source text.
+  // Returns true on success, false on syntax error.
+  bool Initialize(const string &source);
+
+  // Outputs an error message, with context info.
+  void ReportError(const string &error_message);
+
+  // Moves to the next input character.
+  void Next();
+
+  // Moves to the next input item.  Sets item_text_ and item_type_ accordingly.
+  // Returns true on success, false on syntax error.
+  bool NextItem();
+
+  // Parses a feature descriptor.
+  // Returns true on success, false on syntax error.
+  bool ParseFeature(FeatureFunctionDescriptor *result);
+
+  // Parses a parameter specification.
+  // Returns true on success, false on syntax error.
+  bool ParseParameter(FeatureFunctionDescriptor *result);
+
+  // Returns true if end of source input has been reached.
+  bool eos() const { return current_ >= source_.end(); }
+
+  // Returns current character.  Other methods should access the current
+  // character through this method (instead of using *current_ directly): this
+  // method performs extra safety checks.
+  //
+  // In case of an unsafe access, returns '\0'.
+  char CurrentChar() const {
+    if ((current_ >= source_.begin()) && (current_ < source_.end())) {
+      return *current_;
+    } else {
+      SAFTM_LOG(ERROR) << "Unsafe char read";
+      return '\0';
+    }
+  }
+
+  // Item types.
+  enum ItemTypes {
+    END = 0,
+    NAME = -1,
+    NUMBER = -2,
+    STRING = -3,
+  };
+
+  // Source text.
+  string source_;
+
+  // Current input position.
+  string::iterator current_;
+
+  // Line number for current input position.
+  int line_number_;
+
+  // Start position for current item.
+  string::iterator item_start_;
+
+  // Start position for current line.
+  string::iterator line_start_;
+
+  // Line number for current item.
+  int item_line_number_;
+
+  // Item type for current item. If this is positive it is interpreted as a
+  // character. If it is negative it is interpreted as an item type.
+  int item_type_;
+
+  // Text for current item.
+  string item_text_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_FEL_PARSER_H_

diff --git a/lang_id/common/fel/task-context.cc b/lang_id/common/fel/task-context.cc
new file mode 100644
index 0000000..f8b0701
--- /dev/null
+++ b/lang_id/common/fel/task-context.cc

@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/fel/task-context.h"
+
+#include <string>
+
+#include "lang_id/common/lite_strings/numbers.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+string TaskContext::GetInputPath(const string &name) const {
+  auto it = inputs_.find(name);
+  if (it != inputs_.end()) {
+    return it->second;
+  }
+  return "";
+}
+
+void TaskContext::SetInputPath(const string &name, const string &path) {
+  inputs_[name] = path;
+}
+
+string TaskContext::Get(const string &name, const char *defval) const {
+  auto it = parameters_.find(name);
+  if (it != parameters_.end()) {
+    return it->second;
+  }
+  return defval;
+}
+
+int TaskContext::Get(const string &name, int defval) const {
+  const string s = Get(name, "");
+  int value = defval;
+  if (LiteAtoi(s, &value)) {
+    return value;
+  }
+  return defval;
+}
+
+float TaskContext::Get(const string &name, float defval) const {
+  const string s = Get(name, "");
+  float value = defval;
+  if (LiteAtof(s, &value)) {
+    return value;
+  }
+  return defval;
+}
+
+bool TaskContext::Get(const string &name, bool defval) const {
+  string value = Get(name, "");
+  return value.empty() ? defval : value == "true";
+}
+
+void TaskContext::SetParameter(const string &name, const string &value) {
+  parameters_[name] = value;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/fel/task-context.h b/lang_id/common/fel/task-context.h
new file mode 100644
index 0000000..ddc8cfe
--- /dev/null
+++ b/lang_id/common/fel/task-context.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC3_STD_STRING_IMPORT
+#define TC3_STD_STRING_IMPORT
+#include <string>
+
+namespace libtextclassifier3 {
+using string = std::string;
+template <class CharT, class Traits = std::char_traits<CharT>,
+          class Allocator = std::allocator<CharT> >
+using basic_string = std::basic_string<CharT, Traits, Allocator>;
+}  // namespace libtextclassifier3
+#endif
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_TASK_CONTEXT_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_TASK_CONTEXT_H_
+
+#include <map>
+#include <string>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Class that provides access to model parameter and inputs.
+//
+// Note: This class is related to the servers-side nlp_saft::TaskContext, but it
+// has been simplified to reduce code dependencies.
+class TaskContext {
+ public:
+  // Returns path for the input named |name|.  Returns empty string ("") if
+  // there is no input with that name.  Note: this can be a standard file path,
+  // or a path in a more special file system.
+  string GetInputPath(const string &name) const;
+
+  // Sets path for input |name|.  Previous path, if any, is overwritten.
+  void SetInputPath(const string &name, const string &path);
+
+  // Returns parameter value.  If the parameter is not specified in this
+  // context, the default value is returned.
+  string Get(const string &name, const char *defval) const;
+  int Get(const string &name, int defval) const;
+  float Get(const string &name, float defval) const;
+  bool Get(const string &name, bool defval) const;
+
+  // Sets value of parameter |name| to |value|.
+  void SetParameter(const string &name, const string &value);
+
+ private:
+  // Maps input name -> path.
+  std::map<string, string> inputs_;
+
+  // Maps parameter name -> value.
+  std::map<string, string> parameters_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_TASK_CONTEXT_H_

diff --git a/lang_id/common/fel/workspace.cc b/lang_id/common/fel/workspace.cc
new file mode 100644
index 0000000..8cab281
--- /dev/null
+++ b/lang_id/common/fel/workspace.cc

@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/fel/workspace.h"
+
+#include <atomic>
+#include <string>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// static
+int GetFreshTypeId() {
+  // Static local below is initialized the first time this method is run.
+  static std::atomic<int> counter(0);
+  return counter++;
+}
+
+string WorkspaceRegistry::DebugString() const {
+  string str;
+  for (auto &it : workspace_names_) {
+    const string &type_name = workspace_types_.at(it.first);
+    for (size_t index = 0; index < it.second.size(); ++index) {
+      const string &workspace_name = it.second[index];
+      str.append("\n  ");
+      str.append(type_name);
+      str.append(" :: ");
+      str.append(workspace_name);
+    }
+  }
+  return str;
+}
+
+VectorIntWorkspace::VectorIntWorkspace(int size) : elements_(size) {}
+
+VectorIntWorkspace::VectorIntWorkspace(int size, int value)
+    : elements_(size, value) {}
+
+VectorIntWorkspace::VectorIntWorkspace(const std::vector<int> &elements)
+    : elements_(elements) {}
+
+string VectorIntWorkspace::TypeName() { return "Vector"; }
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/fel/workspace.h b/lang_id/common/fel/workspace.h
new file mode 100644
index 0000000..09095e4
--- /dev/null
+++ b/lang_id/common/fel/workspace.h

@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Notes on thread-safety: All of the classes here are thread-compatible.  More
+// specifically, the registry machinery is thread-safe, as long as each thread
+// performs feature extraction on a different Sentence object.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_WORKSPACE_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_WORKSPACE_H_
+
+#include <stddef.h>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// A base class for shared workspaces. Derived classes implement a static member
+// function TypeName() which returns a human readable string name for the class.
+class Workspace {
+ public:
+  // Polymorphic destructor.
+  virtual ~Workspace() {}
+
+ protected:
+  // Create an empty workspace.
+  Workspace() {}
+
+ private:
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(Workspace);
+};
+
+// Returns a new, strictly increasing int every time it is invoked.
+int GetFreshTypeId();
+
+// Struct to simulate typeid, but without RTTI.
+template <typename T>
+struct TypeId {
+  static int type_id;
+};
+
+template <typename T>
+int TypeId<T>::type_id = GetFreshTypeId();
+
+// A registry that keeps track of workspaces.
+class WorkspaceRegistry {
+ public:
+  // Create an empty registry.
+  WorkspaceRegistry() {}
+
+  // Returns the index of a named workspace, adding it to the registry first
+  // if necessary.
+  template <class W>
+  int Request(const string &name) {
+    const int id = TypeId<W>::type_id;
+    max_workspace_id_ = std::max(id, max_workspace_id_);
+    workspace_types_[id] = W::TypeName();
+    std::vector<string> &names = workspace_names_[id];
+    for (int i = 0; i < names.size(); ++i) {
+      if (names[i] == name) return i;
+    }
+    names.push_back(name);
+    return names.size() - 1;
+  }
+
+  // Returns the maximum workspace id that has been registered.
+  int MaxId() const {
+    return max_workspace_id_;
+  }
+
+  const std::unordered_map<int, std::vector<string> > &WorkspaceNames()
+      const {
+    return workspace_names_;
+  }
+
+  // Returns a string describing the registered workspaces.
+  string DebugString() const;
+
+ private:
+  // Workspace type names, indexed as workspace_types_[typeid].
+  std::unordered_map<int, string> workspace_types_;
+
+  // Workspace names, indexed as workspace_names_[typeid][workspace].
+  std::unordered_map<int, std::vector<string> > workspace_names_;
+
+  // The maximum workspace id that has been registered.
+  int max_workspace_id_ = 0;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(WorkspaceRegistry);
+};
+
+// A typed collected of workspaces. The workspaces are indexed according to an
+// external WorkspaceRegistry. If the WorkspaceSet is const, the contents are
+// also immutable.
+class WorkspaceSet {
+ public:
+  ~WorkspaceSet() { Reset(WorkspaceRegistry()); }
+
+  // Returns true if a workspace has been set.
+  template <class W>
+  bool Has(int index) const {
+    const int id = TypeId<W>::type_id;
+    SAFTM_DCHECK_GE(id, 0);
+    SAFTM_DCHECK_LT(id, workspaces_.size());
+    SAFTM_DCHECK_GE(index, 0);
+    SAFTM_DCHECK_LT(index, workspaces_[id].size());
+    if (id >= workspaces_.size()) return false;
+    return workspaces_[id][index] != nullptr;
+  }
+
+  // Returns an indexed workspace; the workspace must have been set.
+  template <class W>
+  const W &Get(int index) const {
+    SAFTM_DCHECK(Has<W>(index));
+    const int id = TypeId<W>::type_id;
+    const Workspace *w = workspaces_[id][index];
+    return reinterpret_cast<const W &>(*w);
+  }
+
+  // Sets an indexed workspace; this takes ownership of the workspace, which
+  // must have been new-allocated.  It is an error to set a workspace twice.
+  template <class W>
+  void Set(int index, W *workspace) {
+    const int id = TypeId<W>::type_id;
+    SAFTM_DCHECK_GE(id, 0);
+    SAFTM_DCHECK_LT(id, workspaces_.size());
+    SAFTM_DCHECK_GE(index, 0);
+    SAFTM_DCHECK_LT(index, workspaces_[id].size());
+    SAFTM_DCHECK(workspaces_[id][index] == nullptr);
+    SAFTM_DCHECK(workspace != nullptr);
+    workspaces_[id][index] = workspace;
+  }
+
+  void Reset(const WorkspaceRegistry &registry) {
+    // Deallocate current workspaces.
+    for (auto &it : workspaces_) {
+      for (size_t index = 0; index < it.size(); ++index) {
+        delete it[index];
+      }
+    }
+    workspaces_.clear();
+    workspaces_.resize(registry.MaxId() + 1, std::vector<Workspace *>());
+    for (auto &it : registry.WorkspaceNames()) {
+      workspaces_[it.first].resize(it.second.size());
+    }
+  }
+
+ private:
+  // The set of workspaces, indexed as workspaces_[typeid][index].
+  std::vector<std::vector<Workspace *> > workspaces_;
+};
+
+// A workspace that wraps around a vector of int.
+class VectorIntWorkspace : public Workspace {
+ public:
+  // Creates a vector of the given size.
+  explicit VectorIntWorkspace(int size);
+
+  // Creates a vector initialized with the given array.
+  explicit VectorIntWorkspace(const std::vector<int> &elements);
+
+  // Creates a vector of the given size, with each element initialized to the
+  // given value.
+  VectorIntWorkspace(int size, int value);
+
+  // Returns the name of this type of workspace.
+  static string TypeName();
+
+  // Returns the i'th element.
+  int element(int i) const { return elements_[i]; }
+
+  // Sets the i'th element.
+  void set_element(int i, int value) { elements_[i] = value; }
+
+  // Returns the size of the underlying vector.
+  int size() const { return elements_.size(); }
+
+ private:
+  // The enclosed vector.
+  std::vector<int> elements_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FEL_WORKSPACE_H_

diff --git a/lang_id/common/file/file-utils.cc b/lang_id/common/file/file-utils.cc
new file mode 100644
index 0000000..108c7d5
--- /dev/null
+++ b/lang_id/common/file/file-utils.cc

@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/file/file-utils.h"
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace file_utils {
+
+bool GetFileContent(const string &filename, string *content) {
+  ScopedMmap scoped_mmap(filename);
+  const MmapHandle &handle = scoped_mmap.handle();
+  if (!handle.ok()) {
+    SAFTM_LOG(ERROR) << "Error opening " << filename;
+    return false;
+  }
+  StringPiece sp = handle.to_stringpiece();
+  content->assign(sp.data(), sp.size());
+  return true;
+}
+
+bool FileExists(const string &filename) {
+  struct stat s = {0};
+  if (!stat(filename.c_str(), &s)) {
+    return s.st_mode & S_IFREG;
+  } else {
+    return false;
+  }
+}
+
+bool DirectoryExists(const string &dirpath) {
+  struct stat s = {0};
+  if (!stat(dirpath.c_str(), &s)) {
+    return s.st_mode & S_IFDIR;
+  } else {
+    return false;
+  }
+}
+
+}  // namespace file_utils
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/file/file-utils.h b/lang_id/common/file/file-utils.h
new file mode 100644
index 0000000..6377d7a
--- /dev/null
+++ b/lang_id/common/file/file-utils.h

@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_FILE_UTILS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_FILE_UTILS_H_
+
+#include <stddef.h>
+#include <string>
+
+#include "lang_id/common/file/mmap.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace file_utils {
+
+// Reads the entire content of a file into a string.  Returns true on success,
+// false on error.
+bool GetFileContent(const string &filename, string *content);
+
+// Parses a proto from its serialized representation in memory.  That
+// representation starts at address |data| and should contain exactly
+// |num_bytes| bytes.  Returns true on success, false otherwise.
+template <class Proto>
+bool ParseProtoFromMemory(const char *data, size_t num_bytes, Proto *proto) {
+  if (data == nullptr) {
+    // Avoid passing a nullptr to ParseFromArray below.
+    return false;
+  }
+  return proto->ParseFromArray(data, num_bytes);
+}
+
+// Convenience StringPiece-based version of ParseProtoFromMemory.
+template <class Proto>
+inline bool ParseProtoFromMemory(StringPiece sp, Proto *proto) {
+  return ParseProtoFromMemory(sp.data(), sp.size(), proto);
+}
+
+// Parses a proto from a file.  Returns true on success, false otherwise.
+//
+// Note: the entire content of the file should be the binary (not
+// human-readable) serialization of a protocol buffer.
+//
+// Note: when we compile for Android, the proto parsing methods need to know the
+// type of the message they are parsing.  We use template polymorphism for that.
+template<class Proto>
+bool ReadProtoFromFile(const string &filename, Proto *proto) {
+  ScopedMmap scoped_mmap(filename);
+  const MmapHandle &handle = scoped_mmap.handle();
+  if (!handle.ok()) {
+    return false;
+  }
+  return ParseProtoFromMemory(handle.to_stringpiece(), proto);
+}
+
+// Returns true if filename is the name of an existing file, and false
+// otherwise.
+bool FileExists(const string &filename);
+
+// Returns true if dirpath is the path to an existing directory, and false
+// otherwise.
+bool DirectoryExists(const string &dirpath);
+
+}  // namespace file_utils
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_FILE_UTILS_H_

diff --git a/lang_id/common/file/mmap.cc b/lang_id/common/file/mmap.cc
new file mode 100644
index 0000000..89efa99
--- /dev/null
+++ b/lang_id/common/file/mmap.cc

@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/file/mmap.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace {
+inline string GetLastSystemError() {
+  return string(strerror(errno));
+}
+
+inline MmapHandle GetErrorMmapHandle() {
+  return MmapHandle(nullptr, 0);
+}
+
+class FileCloser {
+ public:
+  explicit FileCloser(int fd) : fd_(fd) {}
+  ~FileCloser() {
+    int result = close(fd_);
+    if (result != 0) {
+      const string last_error = GetLastSystemError();
+      SAFTM_LOG(ERROR) << "Error closing file descriptor: " << last_error;
+    }
+  }
+ private:
+  const int fd_;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(FileCloser);
+};
+}  // namespace
+
+MmapHandle MmapFile(const string &filename) {
+  int fd = open(filename.c_str(), O_RDONLY);
+
+  if (fd < 0) {
+    const string last_error = GetLastSystemError();
+    SAFTM_LOG(ERROR) << "Error opening " << filename << ": " << last_error;
+    return GetErrorMmapHandle();
+  }
+
+  // Make sure we close fd no matter how we exit this function.  As the man page
+  // for mmap clearly states: "closing the file descriptor does not unmap the
+  // region."  Hence, we can close fd as soon as we return from here.
+  FileCloser file_closer(fd);
+
+  return MmapFile(fd);
+}
+
+MmapHandle MmapFile(int fd) {
+  // Get file stats to obtain file size.
+  struct stat sb;
+  if (fstat(fd, &sb) != 0) {
+    const string last_error = GetLastSystemError();
+    SAFTM_LOG(ERROR) << "Unable to stat fd: " << last_error;
+    return GetErrorMmapHandle();
+  }
+  size_t file_size_in_bytes = static_cast<size_t>(sb.st_size);
+
+  // Perform actual mmap.
+  void *mmap_addr = mmap(
+
+      // Let system pick address for mmapp-ed data.
+      nullptr,
+
+      // Mmap all bytes from the file.
+      file_size_in_bytes,
+
+      // One can read / write the mapped data (but see MAP_PRIVATE below).
+      // Normally, we expect only to read it, but in the future, we may want to
+      // write it, to fix e.g., endianness differences.
+      PROT_READ | PROT_WRITE,
+
+      // Updates to mmaped data are *not* propagated to actual file.
+      // AFAIK(salcianu) that's anyway not possible on Android.
+      MAP_PRIVATE,
+
+      // Descriptor of file to mmap.
+      fd,
+
+      // Map bytes right from the beginning of the file.  This, and
+      // file_size_in_bytes (2nd argument) means we map all bytes from the file.
+      0);
+  if (mmap_addr == MAP_FAILED) {
+    const string last_error = GetLastSystemError();
+    SAFTM_LOG(ERROR) << "Error while mmapping: " << last_error;
+    return GetErrorMmapHandle();
+  }
+
+  return MmapHandle(mmap_addr, file_size_in_bytes);
+}
+
+bool Unmap(MmapHandle mmap_handle) {
+  if (!mmap_handle.ok()) {
+    // Unmapping something that hasn't been mapped is trivially successful.
+    return true;
+  }
+  if (munmap(mmap_handle.start(), mmap_handle.num_bytes()) != 0) {
+    const string last_error = GetLastSystemError();
+    SAFTM_LOG(ERROR) << "Error during Unmap / munmap: " << last_error;
+    return false;
+  }
+  return true;
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/util/memory/mmap.h b/lang_id/common/file/mmap.h
similarity index 65%
copy from util/memory/mmap.h
copy to lang_id/common/file/mmap.h
index 7d28b64..6131803 100644
--- a/util/memory/mmap.h
+++ b/lang_id/common/file/mmap.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
-#define LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_MMAP_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_MMAP_H_
 
 #include <stddef.h>
 
 #include <string>
 
-#include "util/base/integral_types.h"
-#include "util/strings/stringpiece.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 // Handle for a memory area where a file has been mmapped.
 //
@@ -39,22 +39,12 @@
 // are ok keeping that file in memory the whole time).
 class MmapHandle {
  public:
-  MmapHandle(void *start, size_t num_bytes, void *unmap_addr = nullptr)
-      : start_(start), num_bytes_(num_bytes), unmap_addr_(unmap_addr) {}
+  MmapHandle(void *start, size_t num_bytes)
+      : start_(start), num_bytes_(num_bytes) {}
 
   // Returns start address for the memory area where a file has been mmapped.
   void *start() const { return start_; }
 
-  // Returns address to use for munmap call. If unmap_addr was not specified
-  // the start address is used.
-  void *unmap_addr() const {
-    if (unmap_addr_ != nullptr) {
-      return unmap_addr_;
-    } else {
-      return start_;
-    }
-  }
-
   // Returns number of bytes of the memory area from start().
   size_t num_bytes() const { return num_bytes_; }
 
@@ -73,9 +63,6 @@
 
   // See doc for num_bytes().
   const size_t num_bytes_;
-
-  // Address to use for unmapping.
-  void *const unmap_addr_;
 };
 
 // Maps the full content of a file in memory (using mmap).
@@ -86,7 +73,7 @@
 // Sample usage:
 //
 // MmapHandle mmap_handle = MmapFile(filename);
-// TC_DCHECK(mmap_handle.ok()) << "Unable to mmap " << filename;
+// CHECK(mmap_handle.ok()) << "Unable to mmap " << filename;
 //
 // ... use data from addresses
 // ... [mmap_handle.start, mmap_handle.start + mmap_handle.num_bytes)
@@ -96,18 +83,11 @@
 // Note: one can read *and* write the num_bytes bytes from start, but those
 // writes are not propagated to the underlying file, nor to other processes that
 // may have mmapped that file (all changes are local to current process).
-MmapHandle MmapFile(const std::string &filename);
+MmapHandle MmapFile(const string &filename);
 
-// Like MmapFile(const std::string &filename), but uses a file descriptor.
+// Like MmapFile(const string &filename), but uses a file descriptor.
 MmapHandle MmapFile(int fd);
 
-// Maps a segment of a file to memory. File is given by a file descriptor, and
-// offset (relative to the beginning of the file) and size specify the segment
-// to be mapped. NOTE: Internally, we align the offset for the call to mmap
-// system call to be a multiple of page size, so offset does NOT have to be a
-// multiply of the page size.
-MmapHandle MmapFile(int fd, int64 segment_offset, int64 segment_size);
-
 // Unmaps a file mapped using MmapFile.  Returns true on success, false
 // otherwise.
 bool Unmap(MmapHandle mmap_handle);
@@ -116,13 +96,11 @@
 // destruction.
 class ScopedMmap {
  public:
-  explicit ScopedMmap(const std::string &filename)
+  explicit ScopedMmap(const string &filename)
       : handle_(MmapFile(filename)) {}
 
-  explicit ScopedMmap(int fd) : handle_(MmapFile(fd)) {}
-
-  ScopedMmap(int fd, int segment_offset, int segment_size)
-      : handle_(MmapFile(fd, segment_offset, segment_size)) {}
+  explicit ScopedMmap(int fd)
+      : handle_(MmapFile(fd)) {}
 
   ~ScopedMmap() {
     if (handle_.ok()) {
@@ -136,6 +114,7 @@
   MmapHandle handle_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FILE_MMAP_H_

diff --git a/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.cc b/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.cc
new file mode 100644
index 0000000..ee22420
--- /dev/null
+++ b/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.cc

@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.h"
+
+#include "lang_id/common/lite_base/endian.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace {
+// Returns true if and only if ptr points to a location inside allowed_range.
+bool IsPointerInRange(const char *ptr, StringPiece allowed_range) {
+  return (ptr >= allowed_range.data()) &&
+         (ptr < (allowed_range.data() + allowed_range.size()));
+}
+
+// Returns true if and only if the memory range [start, start +
+// range_size_in_bytes) is included inside allowed_range.
+//
+// Special case: if range_size_in_bytes == 0 (empty range) then we require that
+// start is nullptr or in the allowed_range.
+bool IsMemoryRangeValid(const void *start, int range_size_in_bytes,
+                        StringPiece allowed_range) {
+  const char *begin = reinterpret_cast<const char *>(start);
+  if (range_size_in_bytes < 0) {
+    return false;
+  }
+  if (range_size_in_bytes == 0) {
+    return (start == nullptr) || IsPointerInRange(begin, allowed_range);
+  }
+  const char *inclusive_end = begin + (range_size_in_bytes - 1);
+  return (begin <= inclusive_end) && IsPointerInRange(begin, allowed_range) &&
+         IsPointerInRange(inclusive_end, allowed_range);
+}
+
+bool VerifyQuantizationScales(EmbeddingNetworkParams::Matrix matrix,
+                              StringPiece bytes) {
+  if (matrix.quant_scales == nullptr) {
+    SAFTM_LOG(ERROR) << "Quantization type "
+                     << static_cast<int>(matrix.quant_type)
+                     << "; but no quantization scales";
+    return false;
+  }
+  bool valid_scales = IsMemoryRangeValid(matrix.quant_scales,
+                                         matrix.rows * sizeof(float16), bytes);
+  if (!valid_scales) {
+    SAFTM_LOG(ERROR) << "quantization scales not fully inside bytes";
+    return false;
+  }
+  return true;
+}
+
+// Returns false if we detect a problem with |matrix|, true otherwise.  E.g., we
+// check that the array that starts at pointer matrix.elements is fully inside
+// |bytes| (the range of bytes passed to the
+// EmbeddingNetworkParamsFromFlatbuffer constructor).
+bool VerifyMatrix(EmbeddingNetworkParams::Matrix matrix, StringPiece bytes) {
+  if ((matrix.rows < 0) || (matrix.cols < 0)) {
+    SAFTM_LOG(ERROR) << "Wrong matrix geometry: " << matrix.rows << " x "
+                     << matrix.cols;
+    return false;
+  }
+
+  const int num_elements = matrix.rows * matrix.cols;
+
+  // Number of bytes occupied by the num_elements elements that start at address
+  // matrix.elements.
+  int element_range_size_in_bytes = 0;
+  switch (matrix.quant_type) {
+    case QuantizationType::NONE:
+      element_range_size_in_bytes = num_elements * sizeof(float);
+      break;
+    case QuantizationType::UINT8: {
+      element_range_size_in_bytes = num_elements;
+      if (!VerifyQuantizationScales(matrix, bytes)) {
+        return false;
+      }
+      break;
+    }
+    case QuantizationType::UINT4: {
+      if (matrix.cols % 2 != 0) {
+        SAFTM_LOG(ERROR) << "UINT4 doesn't work with odd #cols" << matrix.cols;
+        return false;
+      }
+      element_range_size_in_bytes = num_elements / 2;
+      if (!VerifyQuantizationScales(matrix, bytes)) {
+        return false;
+      }
+      break;
+    }
+    case QuantizationType::FLOAT16: {
+      element_range_size_in_bytes = num_elements * sizeof(float16);
+
+      // No need to verify the scales: FLOAT16 quantization does not use scales.
+      break;
+    }
+    default:
+      SAFTM_LOG(ERROR) << "Unsupported quantization type "
+                       << static_cast<int>(matrix.quant_type);
+      return false;
+  }
+  if (matrix.elements == nullptr) {
+    SAFTM_LOG(ERROR) << "matrix.elements == nullptr";
+    return false;
+  }
+  bool valid =
+      IsMemoryRangeValid(matrix.elements, element_range_size_in_bytes, bytes);
+  if (!valid) {
+    SAFTM_LOG(ERROR) << "elements not fully inside bytes";
+    return false;
+  }
+  return true;
+}
+
+// Checks the geometry of the network layer represented by |weights| and |bias|,
+// assuming the input to this layer has size |input_size|.  Returns false if we
+// detect any problem, true otherwise.
+bool GoodLayerGeometry(int input_size,
+                       const EmbeddingNetworkParams::Matrix &weights,
+                       const EmbeddingNetworkParams::Matrix &bias) {
+  if (weights.rows != input_size) {
+    SAFTM_LOG(ERROR) << "#rows " << weights.rows << " != " << input_size;
+    return false;
+  }
+  if ((bias.rows != 1) && (bias.cols != 1)) {
+    SAFTM_LOG(ERROR) << "bad bias vector geometry: " << bias.rows << " x "
+                     << bias.cols;
+    return false;
+  }
+  int bias_dimension = bias.rows * bias.cols;
+  if (weights.cols != bias_dimension) {
+    SAFTM_LOG(ERROR) << "#cols " << weights.cols << " != " << bias_dimension;
+    return false;
+  }
+  return true;
+}
+}  // namespace
+
+EmbeddingNetworkParamsFromFlatbuffer::EmbeddingNetworkParamsFromFlatbuffer(
+    StringPiece bytes) {
+  // We expect valid_ to be initialized to false at this point.  We set it to
+  // true only if we successfully complete all initialization.  On error, we
+  // return early, leaving valid_ set to false.
+  SAFTM_DCHECK(!valid_);
+
+  // NOTE: current EmbeddingNetworkParams API works only on little-endian
+  // machines.  Fortunately, all modern devices are little-endian so, instead of
+  // a costly API change, we support only the little-endian case.
+  //
+  // Technical explanation: for each Matrix, our API provides a pointer to the
+  // matrix elements (see Matrix field |elements|).  For unquantized matrices,
+  // that's a const float *pointer; the client code (e.g., Neurosis) uses those
+  // floats directly.  That is correct if the EmbeddingNetworkParams come from a
+  // proto, where the proto parsing already handled the endianness differences.
+  // But in the flatbuffer case, that's a pointer to floats in little-endian
+  // format (flatbuffers always use little-endian).  If our API provided access
+  // to only one element at a time, the accessor method could swap the bytes "on
+  // the fly", using temporary variables.  Instead, our API provides a pointer
+  // to all elements: as their number is variable (and underlying data is
+  // immutable), we can't ensure the bytes of all those elements are swapped
+  // without extra memory allocation to store the swapped bytes (which is what
+  // using flatbuffers is supposed to prevent).
+  if (!LittleEndian::IsLittleEndian()) {
+    SAFTM_LOG(INFO) << "Not a little-endian machine";
+    return;
+  }
+
+  const uint8_t *start = reinterpret_cast<const uint8_t *>(bytes.data());
+  if (start == nullptr) {
+    // Note: as |bytes| is expected to be a valid EmbeddingNetwork flatbuffer,
+    // it should contain the 4-char identifier "NS00" (or a later version).  It
+    // can't be empty; hence StringPiece(nullptr, 0) is not legal here.
+    SAFTM_LOG(ERROR) << "nullptr bytes";
+    return;
+  }
+  flatbuffers::Verifier verifier(start, bytes.size());
+  if (!saft_fbs::VerifyEmbeddingNetworkBuffer(verifier)) {
+    SAFTM_LOG(ERROR) << "Not a valid EmbeddingNetwork flatbuffer";
+    return;
+  }
+  network_ = saft_fbs::GetEmbeddingNetwork(start);
+  if (network_ == nullptr) {
+    SAFTM_LOG(ERROR) << "Unable to interpret bytes as a flatbuffer";
+    return;
+  }
+
+  // Perform a few extra checks before declaring this object valid.
+  valid_ = ValidityChecking(bytes);
+}
+
+bool EmbeddingNetworkParamsFromFlatbuffer::ValidityChecking(
+    StringPiece bytes) const {
+  int input_size = 0;
+  for (int i = 0; i < embeddings_size(); ++i) {
+    Matrix embeddings = GetEmbeddingMatrix(i);
+    if (!VerifyMatrix(embeddings, bytes)) {
+      SAFTM_LOG(ERROR) << "Bad embedding matrix #" << i;
+      return false;
+    }
+    input_size += embedding_num_features(i) * embeddings.cols;
+  }
+  int current_size = input_size;
+  for (int i = 0; i < hidden_size(); ++i) {
+    Matrix weights = GetHiddenLayerMatrix(i);
+    if (!VerifyMatrix(weights, bytes)) {
+      SAFTM_LOG(ERROR) << "Bad weights matrix for hidden layer #" << i;
+      return false;
+    }
+    Matrix bias = GetHiddenLayerBias(i);
+    if (!VerifyMatrix(bias, bytes)) {
+      SAFTM_LOG(ERROR) << "Bad bias vector for hidden layer #" << i;
+      return false;
+    }
+    if (!GoodLayerGeometry(current_size, weights, bias)) {
+      SAFTM_LOG(ERROR) << "Bad geometry for hidden layer #" << i;
+      return false;
+    }
+    current_size = weights.cols;
+  }
+
+  if (HasSoftmax()) {
+    Matrix weights = GetSoftmaxMatrix();
+    if (!VerifyMatrix(weights, bytes)) {
+      SAFTM_LOG(ERROR) << "Bad weights matrix for softmax";
+      return false;
+    }
+    Matrix bias = GetSoftmaxBias();
+    if (!VerifyMatrix(bias, bytes)) {
+      SAFTM_LOG(ERROR) << "Bad bias vector for softmax";
+      return false;
+    }
+    if (!GoodLayerGeometry(current_size, weights, bias)) {
+      SAFTM_LOG(ERROR) << "Bad geometry for softmax layer";
+      return false;
+    }
+  }
+  return true;
+}
+
+// static
+bool EmbeddingNetworkParamsFromFlatbuffer::InRangeIndex(int index, int limit,
+                                                        const char *info) {
+  if ((index >= 0) && (index < limit)) {
+    return true;
+  } else {
+    SAFTM_LOG(ERROR) << info << " index " << index << " outside range [0, "
+                     << limit << ")";
+    return false;
+  }
+}
+
+int EmbeddingNetworkParamsFromFlatbuffer::SafeGetNumInputChunks() const {
+  const auto *input_chunks = network_->input_chunks();
+  if (input_chunks == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr input_chunks";
+    return 0;
+  }
+  return input_chunks->size();
+}
+
+const saft_fbs::InputChunk *
+EmbeddingNetworkParamsFromFlatbuffer::SafeGetInputChunk(int i) const {
+  if (!InRangeIndex(i, SafeGetNumInputChunks(), "input chunks")) {
+    return nullptr;
+  }
+  const auto *input_chunks = network_->input_chunks();
+  if (input_chunks == nullptr) {
+    // Execution should not reach this point, due to how SafeGetNumInputChunks()
+    // is implemented.  Still, just to be sure:
+    SAFTM_LOG(ERROR) << "nullptr input_chunks";
+    return nullptr;
+  }
+  const saft_fbs::InputChunk *input_chunk = input_chunks->Get(i);
+  if (input_chunk == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr input chunk #" << i;
+  }
+  return input_chunk;
+}
+
+const saft_fbs::Matrix *
+EmbeddingNetworkParamsFromFlatbuffer::SafeGetEmbeddingMatrix(int i) const {
+  const saft_fbs::InputChunk *input_chunk = SafeGetInputChunk(i);
+  if (input_chunk == nullptr) return nullptr;
+  const saft_fbs::Matrix *matrix = input_chunk->embedding();
+  if (matrix == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr embeding matrix #" << i;
+  }
+  return matrix;
+}
+
+int EmbeddingNetworkParamsFromFlatbuffer::SafeGetNumLayers() const {
+  const auto *layers = network_->layers();
+  if (layers == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr layers";
+    return 0;
+  }
+  return layers->size();
+}
+
+const saft_fbs::NeuralLayer *EmbeddingNetworkParamsFromFlatbuffer::SafeGetLayer(
+    int i) const {
+  if (!InRangeIndex(i, SafeGetNumLayers(), "layer")) {
+    return nullptr;
+  }
+  const auto *layers = network_->layers();
+  if (layers == nullptr) {
+    // Execution should not reach this point, due to how SafeGetNumLayers()
+    // is implemented.  Still, just to be sure:
+    SAFTM_LOG(ERROR) << "nullptr layers";
+    return nullptr;
+  }
+  const saft_fbs::NeuralLayer *layer = layers->Get(i);
+  if (layer == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr layer #" << i;
+  }
+  return layer;
+}
+
+const saft_fbs::Matrix *
+EmbeddingNetworkParamsFromFlatbuffer::SafeGetLayerWeights(int i) const {
+  const saft_fbs::NeuralLayer *layer = SafeGetLayer(i);
+  if (layer == nullptr) return nullptr;
+  const saft_fbs::Matrix *weights = layer->weights();
+  if (weights == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr weights for layer #" << i;
+  }
+  return weights;
+}
+
+const saft_fbs::Matrix *EmbeddingNetworkParamsFromFlatbuffer::SafeGetLayerBias(
+    int i) const {
+  const saft_fbs::NeuralLayer *layer = SafeGetLayer(i);
+  if (layer == nullptr) return nullptr;
+  const saft_fbs::Matrix *bias = layer->bias();
+  if (bias == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr bias for layer #" << i;
+  }
+  return bias;
+}
+
+// static
+const float *EmbeddingNetworkParamsFromFlatbuffer::SafeGetValues(
+    const saft_fbs::Matrix *matrix) {
+  if (matrix == nullptr) return nullptr;
+  const flatbuffers::Vector<float> *values = matrix->values();
+  if (values == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr values";
+  }
+  return values->data();
+}
+
+// static
+const uint8_t *EmbeddingNetworkParamsFromFlatbuffer::SafeGetQuantizedValues(
+    const saft_fbs::Matrix *matrix) {
+  if (matrix == nullptr) return nullptr;
+  const flatbuffers::Vector<uint8_t> *quantized_values =
+      matrix->quantized_values();
+  if (quantized_values == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr quantized_values";
+  }
+  return quantized_values->data();
+}
+
+// static
+const float16 *EmbeddingNetworkParamsFromFlatbuffer::SafeGetScales(
+    const saft_fbs::Matrix *matrix) {
+  if (matrix == nullptr) return nullptr;
+  const flatbuffers::Vector<uint16_t> *scales = matrix->scales();
+  if (scales == nullptr) {
+    SAFTM_LOG(ERROR) << "nullptr scales";
+  }
+  return scales->data();
+}
+
+const saft_fbs::NeuralLayer *
+EmbeddingNetworkParamsFromFlatbuffer::SafeGetSoftmaxLayer() const {
+  int num_layers = SafeGetNumLayers();
+  if (num_layers <= 0) {
+    SAFTM_LOG(ERROR) << "No softmax layer";
+    return nullptr;
+  }
+  return SafeGetLayer(num_layers - 1);
+}
+
+QuantizationType EmbeddingNetworkParamsFromFlatbuffer::SafeGetQuantizationType(
+    const saft_fbs::Matrix *matrix) const {
+  if (matrix == nullptr) {
+    return QuantizationType::NONE;
+  }
+  saft_fbs::QuantizationType quantization_type = matrix->quantization_type();
+
+  // Conversion from nlp_saft::saft_fbs::QuantizationType to
+  // nlp_saft::QuantizationType (due to legacy reasons, we have both).
+  switch (quantization_type) {
+    case saft_fbs::QuantizationType_NONE:
+      return QuantizationType::NONE;
+    case saft_fbs::QuantizationType_UINT8:
+      return QuantizationType::UINT8;
+    case saft_fbs::QuantizationType_UINT4:
+      return QuantizationType::UINT4;
+    case saft_fbs::QuantizationType_FLOAT16:
+      return QuantizationType::FLOAT16;
+    default:
+      SAFTM_LOG(ERROR) << "Unsupported quantization type "
+                       << static_cast<int>(quantization_type);
+      return QuantizationType::NONE;
+  }
+}
+
+const void *EmbeddingNetworkParamsFromFlatbuffer::SafeGetValuesOfMatrix(
+    const saft_fbs::Matrix *matrix) const {
+  if (matrix == nullptr) {
+    return nullptr;
+  }
+  saft_fbs::QuantizationType quantization_type = matrix->quantization_type();
+  switch (quantization_type) {
+    case saft_fbs::QuantizationType_NONE:
+      return SafeGetValues(matrix);
+    case saft_fbs::QuantizationType_UINT8:
+      SAFTM_FALLTHROUGH_INTENDED;
+    case saft_fbs::QuantizationType_UINT4:
+      SAFTM_FALLTHROUGH_INTENDED;
+    case saft_fbs::QuantizationType_FLOAT16:
+      return SafeGetQuantizedValues(matrix);
+    default:
+      SAFTM_LOG(ERROR) << "Unsupported quantization type "
+                       << static_cast<int>(quantization_type);
+      return nullptr;
+  }
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.h b/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.h
new file mode 100644
index 0000000..57d59c5
--- /dev/null
+++ b/lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.h

@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_EMBEDDING_NETWORK_PARAMS_FROM_FLATBUFFER_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_EMBEDDING_NETWORK_PARAMS_FROM_FLATBUFFER_H_
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "lang_id/common/embedding-network-params.h"
+#include "lang_id/common/flatbuffers/embedding-network_generated.h"
+#include "lang_id/common/lite_base/float16.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// EmbeddingNetworkParams implementation backed by a flatbuffer.
+//
+// For info on our flatbuffer schema, see embedding-network.fbs.
+class EmbeddingNetworkParamsFromFlatbuffer : public EmbeddingNetworkParams {
+ public:
+  // Constructs an EmbeddingNetworkParamsFromFlatbuffer instance, using the
+  // flatbuffer from |bytes|.
+  //
+  // IMPORTANT #1: caller should make sure |bytes| are alive during the lifetime
+  // of this EmbeddingNetworkParamsFromFlatbuffer instance.  To avoid overhead,
+  // this constructor does not copy |bytes|.
+  //
+  // IMPORTANT #2: immediately after this constructor returns, we suggest you
+  // call is_valid() on the newly-constructed object and do not call any other
+  // method if the answer is negative (false).
+  explicit EmbeddingNetworkParamsFromFlatbuffer(StringPiece bytes);
+
+  bool UpdateTaskContextParameters(mobile::TaskContext *task_context) override {
+    // This class does not provide access to the overall TaskContext.  It
+    // provides only parameters for the Neurosis neural network.
+    SAFTM_LOG(DFATAL) << "Not supported";
+    return false;
+  }
+
+  bool is_valid() const override { return valid_; }
+
+  int embeddings_size() const override { return SafeGetNumInputChunks(); }
+
+  int embeddings_num_rows(int i) const override {
+    const saft_fbs::Matrix *matrix = SafeGetEmbeddingMatrix(i);
+    return SafeGetNumRows(matrix);
+  }
+
+  int embeddings_num_cols(int i) const override {
+    const saft_fbs::Matrix *matrix = SafeGetEmbeddingMatrix(i);
+    return SafeGetNumCols(matrix);
+  }
+
+  const void *embeddings_weights(int i) const override {
+    const saft_fbs::Matrix *matrix = SafeGetEmbeddingMatrix(i);
+    return SafeGetValuesOfMatrix(matrix);
+  }
+
+  QuantizationType embeddings_quant_type(int i) const override {
+    const saft_fbs::Matrix *matrix = SafeGetEmbeddingMatrix(i);
+    return SafeGetQuantizationType(matrix);
+  }
+
+  const float16 *embeddings_quant_scales(int i) const override {
+    const saft_fbs::Matrix *matrix = SafeGetEmbeddingMatrix(i);
+    return SafeGetScales(matrix);
+  }
+
+  int hidden_size() const override {
+    // -1 because last layer is always the softmax layer.
+    return std::max(SafeGetNumLayers() - 1, 0);
+  }
+
+  int hidden_num_rows(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetLayerWeights(i);
+    return SafeGetNumRows(weights);
+  }
+
+  int hidden_num_cols(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetLayerWeights(i);
+    return SafeGetNumCols(weights);
+  }
+
+  QuantizationType hidden_weights_quant_type(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetLayerWeights(i);
+    return SafeGetQuantizationType(weights);
+  }
+
+  const void *hidden_weights(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetLayerWeights(i);
+    return SafeGetValuesOfMatrix(weights);
+  }
+
+  int hidden_bias_size() const override { return hidden_size(); }
+
+  int hidden_bias_num_rows(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetLayerBias(i);
+    return SafeGetNumRows(bias);
+  }
+
+  int hidden_bias_num_cols(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetLayerBias(i);
+    return SafeGetNumCols(bias);
+  }
+
+  const void *hidden_bias_weights(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetLayerBias(i);
+    return SafeGetValues(bias);
+  }
+
+  int softmax_size() const override { return (SafeGetNumLayers() > 0) ? 1 : 0; }
+
+  int softmax_num_rows(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetSoftmaxWeights();
+    return SafeGetNumRows(weights);
+  }
+
+  int softmax_num_cols(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetSoftmaxWeights();
+    return SafeGetNumCols(weights);
+  }
+
+  QuantizationType softmax_weights_quant_type(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetSoftmaxWeights();
+    return SafeGetQuantizationType(weights);
+  }
+
+  const void *softmax_weights(int i) const override {
+    const saft_fbs::Matrix *weights = SafeGetSoftmaxWeights();
+    return SafeGetValuesOfMatrix(weights);
+  }
+
+  int softmax_bias_size() const override { return softmax_size(); }
+
+  int softmax_bias_num_rows(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetSoftmaxBias();
+    return SafeGetNumRows(bias);
+  }
+
+  int softmax_bias_num_cols(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetSoftmaxBias();
+    return SafeGetNumCols(bias);
+  }
+
+  const void *softmax_bias_weights(int i) const override {
+    const saft_fbs::Matrix *bias = SafeGetSoftmaxBias();
+    return SafeGetValues(bias);
+  }
+
+  int embedding_num_features_size() const override {
+    return SafeGetNumInputChunks();
+  }
+
+  int embedding_num_features(int i) const override {
+    if (!InRangeIndex(i, embedding_num_features_size(),
+                      "embedding num features")) {
+      return 0;
+    }
+    const saft_fbs::InputChunk *input_chunk = SafeGetInputChunk(i);
+    if (input_chunk == nullptr) {
+      return 0;
+    }
+    return input_chunk->num_features();
+  }
+
+  bool has_is_precomputed() const override { return false; }
+  bool is_precomputed() const override { return false; }
+
+ private:
+  // Returns true if and only if index is in [0, limit).  info should be a
+  // pointer to a zero-terminated array of chars (ideally a literal string,
+  // e.g. "layer") indicating what the index refers to; info is used to make log
+  // messages more informative.
+  static bool InRangeIndex(int index, int limit, const char *info);
+
+  // Returns network_->input_chunks()->size(), if all dereferences are safe
+  // (i.e., no nullptr); otherwise, returns 0.
+  int SafeGetNumInputChunks() const;
+
+  // Returns network_->input_chunks()->Get(i), if all dereferences are safe
+  // (i.e., no nullptr) otherwise, returns nullptr.
+  const saft_fbs::InputChunk *SafeGetInputChunk(int i) const;
+
+  // Returns network_->input_chunks()->Get(i)->embedding(), if all dereferences
+  // are safe (i.e., no nullptr); otherwise, returns nullptr.
+  const saft_fbs::Matrix *SafeGetEmbeddingMatrix(int i) const;
+
+  // Returns network_->layers()->size(), if all dereferences are safe (i.e., no
+  // nullptr); otherwise, returns 0.
+  int SafeGetNumLayers() const;
+
+  // Returns network_->layers()->Get(i), if all dereferences are safe
+  // (i.e., no nullptr); otherwise, returns nullptr.
+  const saft_fbs::NeuralLayer *SafeGetLayer(int i) const;
+
+  // Returns network_->layers()->Get(i)->weights(), if all dereferences are safe
+  // (i.e., no nullptr); otherwise, returns nullptr.
+  const saft_fbs::Matrix *SafeGetLayerWeights(int i) const;
+
+  // Returns network_->layers()->Get(i)->bias(), if all dereferences are safe
+  // (i.e., no nullptr); otherwise, returns nullptr.
+  const saft_fbs::Matrix *SafeGetLayerBias(int i) const;
+
+  static int SafeGetNumRows(const saft_fbs::Matrix *matrix) {
+    return (matrix == nullptr) ? 0 : matrix->rows();
+  }
+
+  static int SafeGetNumCols(const saft_fbs::Matrix *matrix) {
+    return (matrix == nullptr) ? 0 : matrix->cols();
+  }
+
+  // Returns matrix->values()->data() if all dereferences are safe (i.e., no
+  // nullptr); otherwise, returns nullptr.
+  static const float *SafeGetValues(const saft_fbs::Matrix *matrix);
+
+  // Returns matrix->quantized_values()->data() if all dereferences are safe
+  // (i.e., no nullptr); otherwise, returns nullptr.
+  static const uint8_t *SafeGetQuantizedValues(const saft_fbs::Matrix *matrix);
+
+  // Returns matrix->scales()->data() if all dereferences are safe (i.e., no
+  // nullptr); otherwise, returns nullptr.
+  static const float16 *SafeGetScales(const saft_fbs::Matrix *matrix);
+
+  // Returns network_->layers()->Get(last_index) with last_index =
+  // SafeGetNumLayers() - 1, if all dereferences are safe (i.e., no nullptr) and
+  // there exists at least one layer; otherwise, returns nullptr.
+  const saft_fbs::NeuralLayer *SafeGetSoftmaxLayer() const;
+
+  const saft_fbs::Matrix *SafeGetSoftmaxWeights() const {
+    const saft_fbs::NeuralLayer *layer = SafeGetSoftmaxLayer();
+    return (layer == nullptr) ? nullptr : layer->weights();
+  }
+
+  const saft_fbs::Matrix *SafeGetSoftmaxBias() const {
+    const saft_fbs::NeuralLayer *layer = SafeGetSoftmaxLayer();
+    return (layer == nullptr) ? nullptr : layer->bias();
+  }
+
+  // Returns the quantization type for |matrix|.  Returns NONE in case of
+  // problems (e.g., matrix is nullptr or unknown quantization type).
+  QuantizationType SafeGetQuantizationType(
+      const saft_fbs::Matrix *matrix) const;
+
+  // Returns a pointer to the values (float, uint8, or float16, depending on
+  // quantization) from |matrix|, in row-major order.  Returns nullptr in case
+  // of a problem.
+  const void *SafeGetValuesOfMatrix(const saft_fbs::Matrix *matrix) const;
+
+  // Performs some validity checks.  E.g., check that dimensions of the network
+  // layers match.  Also checks that all pointers we return are inside the
+  // |bytes| passed to the constructor, such that client that reads from those
+  // pointers will not run into troubles.
+  bool ValidityChecking(StringPiece bytes) const;
+
+  // True if these params are valid.  May be false if the original proto was
+  // corrupted.  We prefer to set this to false to CHECK-failing.
+  bool valid_ = false;
+
+  // EmbeddingNetwork flatbuffer from the bytes passed as parameter to the
+  // constructor; see constructor doc.
+  const saft_fbs::EmbeddingNetwork *network_ = nullptr;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_EMBEDDING_NETWORK_PARAMS_FROM_FLATBUFFER_H_

diff --git a/lang_id/common/flatbuffers/embedding-network.fbs b/lang_id/common/flatbuffers/embedding-network.fbs
new file mode 100644
index 0000000..1fde6a3
--- /dev/null
+++ b/lang_id/common/flatbuffers/embedding-network.fbs

@@ -0,0 +1,117 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Flatbuffer schema for Neurosis (FFNN with embeddings) parameters.
+//
+// Contains the same information as an EmbeddingNetworkProto.
+
+namespace libtextclassifier3.saft_fbs;
+
+// NS stands for NeurosiS.  The next two digits are meant to identify
+// incompatible versions.  Ideally, we'll never have to go beyond 00.
+file_identifier "NS00";
+
+// Should be kept in sync with the C++ enum nlp_saft::QuantizationType.
+enum QuantizationType : byte {
+  NONE = 0,
+  UINT8 = 1,
+  UINT4 = 2,
+  FLOAT16 = 3,
+}
+
+table Matrix {
+  // Number of rows of this matrix.
+  rows:int;
+
+  // Number of columns of this matrix.
+  cols:int;
+
+  // Type of quantization used for the values from this matrix.
+  //
+  // If this is QuantizationType_NONE, then the unquantized values should be
+  // stored in |values| below.  Otherwise, the bytes of the quantized values
+  // should be stored in |quantized_values| and the float16 quantization scales
+  // should be stored in |scales|.
+  quantization_type:QuantizationType = NONE;
+
+  // Non-quantized matrix elements, in row-major order.  See comments for
+  // |quantization_type|.
+  values:[float];
+
+  // Quantized matrix elements, in row-major order.  See comments for
+  // |quantization_type|.
+  quantized_values:[ubyte];
+
+  // Quantization factors (float16), one per matrix row.  There is no float16
+  // primitive type for flatbuffers, we just use another 16 bit type.  See
+  // comments for |quantization_type|.
+  scales:[ushort];
+}
+
+// The input layer for a Neurosis network is composed of several parts (named
+// "chunks" below, "embedding spaces" in some other parts, etc).  For each
+// chunk, we have |num_features| features that extract feature values in that
+// chunk.  All values extracted by a feature get projected via the embedding
+// matrix |embedding| and summed together, producing a vector of
+// |embedding.cols| elements.  The resulting vector gets concatenated with the
+// similar vectors for other |num_features| features, producing a "chunk" of
+// |num_features * embedding.cols| elements.  This chunk gets concatenated with
+// the other chunks.
+//
+// Note: the specification that indicates what those |num_features| features are
+// is stored elsewhere (usually in a ModelParameter, see model.fbs).  But we
+// need to know |num_features| here, in order to specify the geometry of the
+// Neurosis network.
+table InputChunk {
+  embedding:Matrix;
+  num_features:int;
+}
+
+// One layer of neurons from the Neurosis network.  This table can represent a
+// hidden layer or the final (output / softmax) layer.
+//
+// Our formalism is a bit different, but equivalent to the usual description
+// from the literature:
+//
+// Technically, in Neurosis, each layer takes an input (a vector of floats); if
+// this is not the first layer, we apply a nonlinear function (ReLU); for the
+// first layer, we skip ReLU.  Next, we multiply by |weights| and add |bias|,
+// get the input for the next level and so on.  The output from the last layer
+// is generally used for softmax classification.  That's why we say that the
+// last layer is the "softmax layer".
+table NeuralLayer {
+  // Weight matrix for this layer.  Geometry: num_inputs x num_neurons, where
+  // num_inputs is the number of values produced by previous layer (which can be
+  // the input layer, or another hidden layer) and num_neurons is the number of
+  // neurons from this layer.
+  weights:Matrix;
+
+  // Bias vector for this layer.
+  //
+  // NOTE: right now, we accept both 1 x num_neurons and num_neurons x 1
+  // geometries: the layout of the elements is the same in both cases.
+  bias:Matrix;
+}
+
+table EmbeddingNetwork {
+  // Specification of the chunks that compose the input layer.
+  input_chunks:[InputChunk];
+
+  // Hidden layers, followed by the final (softmax) layer.
+  layers:[NeuralLayer];
+}
+
+root_type EmbeddingNetwork;

diff --git a/lang_id/common/flatbuffers/model-utils.cc b/lang_id/common/flatbuffers/model-utils.cc
new file mode 100644
index 0000000..2c57aa2
--- /dev/null
+++ b/lang_id/common/flatbuffers/model-utils.cc

@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/flatbuffers/model-utils.h"
+
+#include <string.h>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/math/checksum.h"
+
+namespace libtextclassifier3 {
+namespace saft_fbs {
+
+namespace {
+
+// Returns true if we have clear evidence that |model| fails its checksum.
+//
+// E.g., if |model| has the crc32 field, and the value of that field does not
+// match the checksum, then this function returns true.  If there is no crc32
+// field, then we don't know what the original (at build time) checksum was, so
+// we don't know anything clear and this function returns false.
+bool ClearlyFailsChecksum(const Model &model) {
+  if (!flatbuffers::IsFieldPresent(&model, Model::VT_CRC32)) {
+    SAFTM_LOG(WARNING)
+        << "No CRC32, most likely an old model; skip CRC32 check";
+    return false;
+  }
+  const mobile::uint32 expected_crc32 = model.crc32();
+  const mobile::uint32 actual_crc32 = ComputeCrc2Checksum(&model);
+  if (actual_crc32 != expected_crc32) {
+    SAFTM_LOG(ERROR) << "Corrupt model: different CRC32: " << actual_crc32
+                     << " vs " << expected_crc32;
+    return true;
+  }
+  SAFTM_LOG(INFO) << "Successfully checked CRC32 " << actual_crc32;
+  return false;
+}
+}  // namespace
+
+const Model *GetVerifiedModelFromBytes(const char *data, size_t num_bytes) {
+  if ((data == nullptr) || (num_bytes == 0)) {
+    SAFTM_LOG(ERROR) << "GetModel called on an empty sequence of bytes";
+    return nullptr;
+  }
+  const uint8_t *start = reinterpret_cast<const uint8_t *>(data);
+  flatbuffers::Verifier verifier(start, num_bytes);
+  if (!VerifyModelBuffer(verifier)) {
+    SAFTM_LOG(ERROR) << "Not a valid Model flatbuffer";
+    return nullptr;
+  }
+  const Model *model = GetModel(start);
+  if (model == nullptr) {
+    return nullptr;
+  }
+  if (ClearlyFailsChecksum(*model)) {
+    return nullptr;
+  }
+  return model;
+}
+
+const ModelInput *GetInputByName(const Model *model, const string &name) {
+  if (model == nullptr) {
+    SAFTM_LOG(ERROR) << "GetInputByName called with model == nullptr";
+    return nullptr;
+  }
+  const auto *inputs = model->inputs();
+  if (inputs == nullptr) {
+    // We should always have a list of inputs; maybe an empty one, if no inputs,
+    // but the list should be there.
+    SAFTM_LOG(ERROR) << "null inputs";
+    return nullptr;
+  }
+  for (const ModelInput *input : *inputs) {
+    if (input != nullptr) {
+      const flatbuffers::String *input_name = input->name();
+      if (input_name && input_name->str() == name) {
+        return input;
+      }
+    }
+  }
+  return nullptr;
+}
+
+mobile::StringPiece GetInputBytes(const ModelInput *input) {
+  if ((input == nullptr) || (input->data() == nullptr)) {
+    SAFTM_LOG(ERROR) << "ModelInput has no content";
+    return mobile::StringPiece(nullptr, 0);
+  }
+  const flatbuffers::Vector<uint8_t> *input_data = input->data();
+  if (input_data == nullptr) {
+    SAFTM_LOG(ERROR) << "null input data";
+    return mobile::StringPiece(nullptr, 0);
+  }
+  return mobile::StringPiece(reinterpret_cast<const char *>(input_data->data()),
+                             input_data->size());
+}
+
+bool FillParameters(const Model &model, mobile::TaskContext *context) {
+  if (context == nullptr) {
+    SAFTM_LOG(ERROR) << "null context";
+    return false;
+  }
+  const auto *parameters = model.parameters();
+  if (parameters == nullptr) {
+    // We should always have a list of parameters; maybe an empty one, if no
+    // parameters, but the list should be there.
+    SAFTM_LOG(ERROR) << "null list of parameters";
+    return false;
+  }
+  for (const ModelParameter *p : *parameters) {
+    if (p == nullptr) {
+      SAFTM_LOG(ERROR) << "null parameter";
+      return false;
+    }
+    if (p->name() == nullptr) {
+      SAFTM_LOG(ERROR) << "null parameter name";
+      return false;
+    }
+    const string name = p->name()->str();
+    if (name.empty()) {
+      SAFTM_LOG(ERROR) << "empty parameter name";
+      return false;
+    }
+    if (p->value() == nullptr) {
+      SAFTM_LOG(ERROR) << "null parameter name";
+      return false;
+    }
+    context->SetParameter(name, p->value()->str());
+  }
+  return true;
+}
+
+namespace {
+// Updates |*crc| with the information from |s|.  Auxiliary for
+// ComputeCrc2Checksum.
+//
+// The bytes from |info| are also used to update the CRC32 checksum.  |info|
+// should be a brief tag that indicates what |s| represents.  The idea is to add
+// some structure to the information that goes into the CRC32 computation.
+template <typename T>
+void UpdateCrc(mobile::Crc32 *crc, const flatbuffers::Vector<T> *s,
+               mobile::StringPiece info) {
+  crc->Update("|");
+  crc->Update(info.data(), info.size());
+  crc->Update(":");
+  if (s == nullptr) {
+    crc->Update("empty");
+  } else {
+    crc->Update(reinterpret_cast<const char *>(s->data()),
+                s->size() * sizeof(T));
+  }
+}
+}  // namespace
+
+mobile::uint32 ComputeCrc2Checksum(const Model *model) {
+  // Implementation note: originally, I (salcianu@) thought we can just compute
+  // a CRC32 checksum of the model bytes.  Unfortunately, the expected checksum
+  // is there too (and because we don't control the flatbuffer format, we can't
+  // "arrange" for it to be placed at the head / tail of those bytes).  Instead,
+  // we traverse |model| and feed into the CRC32 computation those parts we are
+  // interested in (which excludes the crc32 field).
+  //
+  // Note: storing the checksum outside the Model would be too disruptive for
+  // the way we currently ship our models.
+  mobile::Crc32 crc;
+  if (model == nullptr) {
+    return crc.Get();
+  }
+  crc.Update("|Parameters:");
+  const auto *parameters = model->parameters();
+  if (parameters != nullptr) {
+    for (const ModelParameter *p : *parameters) {
+      if (p != nullptr) {
+        UpdateCrc(&crc, p->name(), "name");
+        UpdateCrc(&crc, p->value(), "value");
+      }
+    }
+  }
+  crc.Update("|Inputs:");
+  const auto *inputs = model->inputs();
+  if (inputs != nullptr) {
+    for (const ModelInput *input : *inputs) {
+      if (input != nullptr) {
+        UpdateCrc(&crc, input->name(), "name");
+        UpdateCrc(&crc, input->type(), "type");
+        UpdateCrc(&crc, input->sub_type(), "sub-type");
+        UpdateCrc(&crc, input->data(), "data");
+      }
+    }
+  }
+  return crc.Get();
+}
+
+}  // namespace saft_fbs
+}  // namespace nlp_saft

diff --git a/lang_id/common/flatbuffers/model-utils.h b/lang_id/common/flatbuffers/model-utils.h
new file mode 100644
index 0000000..5427f70
--- /dev/null
+++ b/lang_id/common/flatbuffers/model-utils.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_MODEL_UTILS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_MODEL_UTILS_H_
+
+#include <stddef.h>
+
+#include <string>
+
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/flatbuffers/model_generated.h"
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace saft_fbs {
+
+// Verifies that the |num_bytes| bytes that start at |data| represent a valid
+// Model flatbuffer.  If so, returns that Model.  Otherwise, returns nullptr.
+//
+// Note: if the Model has the crc32 field, this method checks that the Model
+// checksum matches that field; if they don't match, the Model is considered
+// invalid, and this function returns nullptr.  The checksum test is in addition
+// to the standard flatbuffer validity checking.
+const Model *GetVerifiedModelFromBytes(const char *data, size_t num_bytes);
+
+// Convenience StringPiece version of GetVerifiedModelFromBytes.
+inline const Model *GetVerifiedModelFromBytes(mobile::StringPiece bytes) {
+  return GetVerifiedModelFromBytes(bytes.data(), bytes.size());
+}
+
+// Returns the |model| input with specified |name|.  Returns nullptr if no such
+// input exists.  If |model| contains multiple inputs with that |name|, returns
+// the first one (model builders should avoid building such models).
+const ModelInput *GetInputByName(const Model *model, const string &name);
+
+// Returns a StringPiece pointing to the bytes for the content of |input|.  In
+// case of errors, returns StringPiece(nullptr, 0).
+mobile::StringPiece GetInputBytes(const ModelInput *input);
+
+// Fills parameters from |context|, based on the parameters from |model|.
+// Returns false if any error is encountered, true otherwise.  In the case of an
+// error, some parameters may have been added to |context| (e.g., if we find a
+// problem with the 3rd parameter, the first 2 have been added).
+bool FillParameters(const Model &model, mobile::TaskContext *context);
+
+// Returns the CRC32 checksum of |model|.  This checksum is computed over the
+// entire information from the model (including the bytes of the inputs),
+// *except* the crc32 field.  Hence, when a model is build, one can store the
+// result of this function into that field; on the user side, one can check that
+// the result of this function matches the crc32 field, to guard against model
+// corruption.  GetVerifiedModelFromBytes performs this check.
+mobile::uint32 ComputeCrc2Checksum(const Model *model);
+
+}  // namespace saft_fbs
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_FLATBUFFERS_MODEL_UTILS_H_

diff --git a/lang_id/common/flatbuffers/model.fbs b/lang_id/common/flatbuffers/model.fbs
new file mode 100644
index 0000000..41251e1
--- /dev/null
+++ b/lang_id/common/flatbuffers/model.fbs

@@ -0,0 +1,79 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Flatbuffer schema for SAFT models.
+//
+// For info on flatbuffers, see http://go/flatbuffers and
+// http://google.github.io/flatbuffers/, including info on writing schemas:
+// http://google.github.io/flatbuffers/flatbuffers_guide_writing_schema.html
+
+namespace libtextclassifier3.saft_fbs;
+
+// SM stands for Saft Model.  The next two digits are meant to identify
+// incompatible versions.  Ideally, we'll never have to go beyond 00.
+file_identifier "SM00";
+
+// Extension stands for Saft Model in FlatBuffer format.
+file_extension "smfb";
+
+table ModelParameter {
+  // Parameter name.
+  name:string;
+
+  // Parameter value.
+  value:string;
+}
+
+// Input for a SAFT model.  Inputs usually provide extra resources: e.g., the
+// parameters for a Neurosis FFNN with embeddings, or a word cluster structure,
+// etc.
+table ModelInput {
+  // Name of this input.  Different input of the same model should have
+  // different names, such that we can non-ambiguously look them up.
+  name:string;
+
+  // General description of the type of this input.  Required to parse the
+  // content of this input (see |data| below).  If |data| is a flatbuffer, use
+  // "flatbuffer".  If |data| is a proto, use "proto".  Otherwise, use your best
+  // judgment: use something human-readable, and look around to make sure you
+  // don't invent a new name for something that already exists.
+  type:string;
+
+  // More specific information about the type of this input.  E.g., if |type| is
+  // "flatbuffer", this should be the name of the root_type we should parse from
+  // the input bytes., e.g., "EmbeddingNetwork".  If |type| is proto, this
+  // should be the name of the proto serialized as |data|, e.g.,
+  // "EmbeddingNetworkProto".
+  sub_type:string;
+
+  // The content of this input.  With a generous alignment, such that we can
+  // accommodate mmap-friendly data structures.  E.g., the word clusters used by
+  // the Translate team require 8-byte alignment.
+  data:[ubyte] (force_align: 16);
+}
+
+// A Saft model.  A list of parameters with model settings (e.g., the
+// specification of the features to use) and a list of inputs.
+table Model {
+  parameters:[ModelParameter];
+  inputs:[ModelInput];
+
+  // Crc32 checksum of all parameters and inputs (including the bytes of the
+  // inputs).  Used to check that the model has not been corrupted.
+  crc32:uint32;
+}
+
+root_type Model;

diff --git a/lang_id/common/lite_base/attributes.h b/lang_id/common/lite_base/attributes.h
new file mode 100644
index 0000000..f29e48f
--- /dev/null
+++ b/lang_id/common/lite_base/attributes.h

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Various macros related to function inlining.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ATTRIBUTES_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ATTRIBUTES_H_
+
+// SAFTM_HAVE_ATTRIBUTE
+//
+// A function-like feature checking macro that is a wrapper around
+// `__has_attribute`, which is defined by GCC 5+ and Clang and evaluates to a
+// nonzero constant integer if the attribute is supported or 0 if not.
+//
+// It evaluates to zero if `__has_attribute` is not defined by the compiler.
+//
+// GCC: https://gcc.gnu.org/gcc-5/changes.html
+// Clang: https://clang.llvm.org/docs/LanguageExtensions.html
+#ifdef __has_attribute
+#define SAFTM_HAVE_ATTRIBUTE(x) __has_attribute(x)
+#else
+#define SAFTM_HAVE_ATTRIBUTE(x) 0
+#endif
+
+// SAFTM_MUST_USE_RESULT
+//
+// Tells the compiler to warn about unused return values for functions declared
+// with this macro. The macro must appear as the very first part of a function
+// declaration or definition:
+//
+// Example:
+//
+//   SAFTM_MUST_USE_RESULT Sprocket* AllocateSprocket();
+//
+// This placement has the broadest compatibility with GCC, Clang, and MSVC, with
+// both defs and decls, and with GCC-style attributes, MSVC declspec, C++11
+// and C++17 attributes.
+//
+// SAFTM_MUST_USE_RESULT allows using cast-to-void to suppress the unused result
+// warning. For that, warn_unused_result is used only for clang but not for gcc.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66425
+#if SAFTM_HAVE_ATTRIBUTE(nodiscard)
+#define SAFTM_MUST_USE_RESULT [[nodiscard]]
+#elif defined(__clang__) && SAFTM_HAVE_ATTRIBUTE(warn_unused_result)
+#define SAFTM_MUST_USE_RESULT __attribute__((warn_unused_result))
+#else
+#define SAFTM_MUST_USE_RESULT
+#endif
+
+#if defined(__GNUC__) && \
+    (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+
+// For functions we want to force inline.
+// Introduced in gcc 3.1.
+#define SAFTM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+
+// For functions we don't want to inline, e.g., to keep code size small.
+#define SAFTM_ATTRIBUTE_NOINLINE __attribute__((noinline))
+
+#elif defined(_MSC_VER)
+#define SAFTM_ATTRIBUTE_ALWAYS_INLINE __forceinline
+#else
+
+// Other compilers will have to figure it out for themselves.
+#define SAFTM_ATTRIBUTE_ALWAYS_INLINE
+#define SAFTM_ATTRIBUTE_NOINLINE
+#endif  // big condition on two lines.
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ATTRIBUTES_H_

diff --git a/util/base/casts.h b/lang_id/common/lite_base/casts.h
similarity index 87%
copy from util/base/casts.h
copy to lang_id/common/lite_base/casts.h
index a1d2056..11a4ba2 100644
--- a/util/base/casts.h
+++ b/lang_id/common/lite_base/casts.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_CASTS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_CASTS_H_
 
 #include <string.h>  // for memcpy
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 // bit_cast<Dest, Source> is a template function that implements the equivalent
 // of "*reinterpret_cast<Dest*>(&source)".  We need this in very low-level
@@ -69,14 +70,15 @@
 // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
 // compiles to two loads and two stores.
 //
-// Mike Chastain tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc
-// 7.1.
+// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
 //
 // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
 // is likely to surprise you.
 //
 // Props to Bill Gibbons for the compile time assertion technique and
 // Art Komninos and Igor Tandetnik for the msvc experiments.
+//
+// -- mec 2005-10-17
 
 template <class Dest, class Source>
 inline Dest bit_cast(const Source &source) {
@@ -87,6 +89,7 @@
   return dest;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_CASTS_H_

diff --git a/lang_id/common/lite_base/compact-logging-levels.h b/lang_id/common/lite_base/compact-logging-levels.h
new file mode 100644
index 0000000..977f4da
--- /dev/null
+++ b/lang_id/common/lite_base/compact-logging-levels.h

@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC3_STD_STRING_IMPORT
+#define TC3_STD_STRING_IMPORT
+#include <string>
+
+namespace libtextclassifier3 {
+using string = std::string;
+template <class CharT, class Traits = std::char_traits<CharT>,
+          class Allocator = std::allocator<CharT> >
+using basic_string = std::basic_string<CharT, Traits, Allocator>;
+}  // namespace libtextclassifier3
+#endif
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_LEVELS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_LEVELS_H_
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
+
+enum LogSeverity {
+  FATAL = 0,
+  ERROR,
+  WARNING,
+  INFO,
+
+  // In debug mode, DFATAL has the same semantics as FATAL.  Otherwise, it
+  // behaves like ERROR.
+  DFATAL,
+};
+
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_LEVELS_H_

diff --git a/util/base/logging_raw.cc b/lang_id/common/lite_base/compact-logging-raw.cc
similarity index 75%
copy from util/base/logging_raw.cc
copy to lang_id/common/lite_base/compact-logging-raw.cc
index 6d97852..53dfc8e 100644
--- a/util/base/logging_raw.cc
+++ b/lang_id/common/lite_base/compact-logging-raw.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/base/logging_raw.h"
+#include "lang_id/common/lite_base/compact-logging-raw.h"
 
 #include <stdio.h>
 #include <string>
@@ -26,8 +26,9 @@
 // Compiled as part of Android.
 #include <android/log.h>
 
-namespace libtextclassifier2 {
-namespace logging {
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
 
 namespace {
 // Converts LogSeverity to level for __android_log_write.
@@ -47,10 +48,10 @@
 }
 }  // namespace
 
-void LowLevelLogging(LogSeverity severity, const std::string& tag,
-                     const std::string& message) {
+void LowLevelLogging(LogSeverity severity, const string &tag,
+                     const string &message) {
   const int android_log_level = GetAndroidLogLevel(severity);
-#if !defined(TC_DEBUG_LOGGING)
+#if !defined(SAFTM_DEBUG_LOGGING)
   if (android_log_level != ANDROID_LOG_ERROR &&
       android_log_level != ANDROID_LOG_FATAL) {
     return;
@@ -59,14 +60,16 @@
   __android_log_write(android_log_level, tag.c_str(), message.c_str());
 }
 
-}  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft
 
 #else  // if defined(__ANDROID__)
 
 // Not on Android: implement LowLevelLogging to print to stderr (see below).
-namespace libtextclassifier2 {
-namespace logging {
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
 
 namespace {
 // Converts LogSeverity to human-readable text.
@@ -86,14 +89,15 @@
 }
 }  // namespace
 
-void LowLevelLogging(LogSeverity severity, const std::string &tag,
-                     const std::string &message) {
+void LowLevelLogging(LogSeverity severity, const string &tag,
+                     const string &message) {
   fprintf(stderr, "[%s] %s : %s\n", LogSeverityToString(severity), tag.c_str(),
           message.c_str());
   fflush(stderr);
 }
 
-}  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft
 
 #endif  // if defined(__ANDROID__)

diff --git a/lang_id/common/lite_base/compact-logging-raw.h b/lang_id/common/lite_base/compact-logging-raw.h
new file mode 100644
index 0000000..f67287c
--- /dev/null
+++ b/lang_id/common/lite_base/compact-logging-raw.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_RAW_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_RAW_H_
+
+#include <string>
+
+#include "lang_id/common/lite_base/compact-logging-levels.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
+
+// Low-level logging primitive.  Logs a message, with the indicated log
+// severity.  From android/log.h: "the tag normally corresponds to the component
+// that emits the log message, and should be reasonably small".
+void LowLevelLogging(LogSeverity severity, const string &tag,
+                     const string &message);
+
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_RAW_H_

diff --git a/util/base/logging.cc b/lang_id/common/lite_base/compact-logging.cc
similarity index 69%
copy from util/base/logging.cc
copy to lang_id/common/lite_base/compact-logging.cc
index 919bb36..99d60a3 100644
--- a/util/base/logging.cc
+++ b/lang_id/common/lite_base/compact-logging.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,24 @@
  * limitations under the License.
  */
 
-#include "util/base/logging.h"
+#include "lang_id/common/lite_base/compact-logging.h"
 
 #include <stdlib.h>
 
 #include <iostream>
 
-#include "util/base/logging_raw.h"
+#include "lang_id/common/lite_base/compact-logging-raw.h"
 
-namespace libtextclassifier2 {
-namespace logging {
+#ifndef SAFTM_LOGGING_TAG
+
+// Tag inserted in the prefix of the generated log messages.  The user can
+// override this by defining this macro on the blaze build command-line.
+#define SAFTM_LOGGING_TAG "saftm"
+#endif
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
 
 namespace {
 // Returns pointer to beginning of last /-separated token from file_name.
@@ -57,11 +65,20 @@
 }
 
 LogMessage::~LogMessage() {
-  LowLevelLogging(severity_, /* tag = */ "txtClsf", stream_.message);
-  if (severity_ == FATAL) {
+  LogSeverity level = severity_;
+  if (level == DFATAL) {
+#ifdef NDEBUG
+    level = ERROR;
+#else
+    level = FATAL;
+#endif
+  }
+  LowLevelLogging(level, /* tag = */ SAFTM_LOGGING_TAG, stream_.message);
+  if (level == FATAL) {
     exit(1);
   }
 }
 
-}  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/lite_base/compact-logging.h b/lang_id/common/lite_base/compact-logging.h
new file mode 100644
index 0000000..eccb7d1
--- /dev/null
+++ b/lang_id/common/lite_base/compact-logging.h

@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_H_
+
+#include <cassert>
+#include <string>
+
+#include "lang_id/common/lite_base/attributes.h"
+#include "lang_id/common/lite_base/compact-logging-levels.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace internal_logging {
+
+// A tiny code footprint string stream for assembling log messages.
+struct LoggingStringStream {
+  LoggingStringStream() {}
+  LoggingStringStream &stream() { return *this; }
+
+  // Needed for invocation in SAFTM_CHECK macro.
+  explicit operator bool() const { return true; }
+
+  string message;
+};
+
+template <typename T>
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const T &entry) {
+  stream.message.append(std::to_string(entry));
+  return stream;
+}
+
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const char *message) {
+  stream.message.append(message);
+  return stream;
+}
+
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const string &message) {
+  stream.message.append(message);
+  return stream;
+}
+
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       StringPiece sp) {
+  stream.message.append(sp.data(), sp.size());
+  return stream;
+}
+
+// The class that does all the work behind our SAFTM_LOG(severity) macros.  Each
+// SAFTM_LOG(severity) << obj1 << obj2 << ...; logging statement creates a
+// LogMessage temporary object containing a stringstream.  Each operator<< adds
+// info to that stringstream and the LogMessage destructor performs the actual
+// logging.  The reason this works is that in C++, "all temporary objects are
+// destroyed as the last step in evaluating the full-expression that (lexically)
+// contains the point where they were created."  For more info, see
+// http://en.cppreference.com/w/cpp/language/lifetime.  Hence, the destructor is
+// invoked after the last << from that logging statement.
+class LogMessage {
+ public:
+  LogMessage(LogSeverity severity, const char *file_name,
+             int line_number) SAFTM_ATTRIBUTE_NOINLINE;
+
+  ~LogMessage() SAFTM_ATTRIBUTE_NOINLINE;
+
+  // Returns the stream associated with the logger object.
+  LoggingStringStream &stream() { return stream_; }
+
+ private:
+  const LogSeverity severity_;
+
+  // Stream that "prints" all info into a string (not to a file).  We construct
+  // here the entire logging message and next print it in one operation.
+  LoggingStringStream stream_;
+};
+
+// Pseudo-stream that "eats" the tokens <<-pumped into it, without printing
+// anything.
+class NullStream {
+ public:
+  NullStream() {}
+  NullStream &stream() { return *this; }
+};
+template <typename T>
+inline NullStream &operator<<(NullStream &str, const T &) {
+  return str;
+}
+
+}  // namespace internal_logging
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#define SAFTM_LOG(severity)                                               \
+  ::libtextclassifier3::mobile::internal_logging::LogMessage(                       \
+      ::libtextclassifier3::mobile::internal_logging::severity, __FILE__, __LINE__) \
+      .stream()
+
+// If condition x is true, does nothing.  Otherwise, crashes the program (liek
+// LOG(FATAL)) with an informative message.  Can be continued with extra
+// messages, via <<, like any logging macro, e.g.,
+//
+// SAFTM_CHECK(my_cond) << "I think we hit a problem";
+#define SAFTM_CHECK(x)                                                \
+  (x) || SAFTM_LOG(FATAL) << __FILE__ << ":" << __LINE__              \
+  << ": check failed: \"" << #x
+
+#define SAFTM_CHECK_EQ(x, y) SAFTM_CHECK((x) == (y))
+#define SAFTM_CHECK_LT(x, y) SAFTM_CHECK((x) < (y))
+#define SAFTM_CHECK_GT(x, y) SAFTM_CHECK((x) > (y))
+#define SAFTM_CHECK_LE(x, y) SAFTM_CHECK((x) <= (y))
+#define SAFTM_CHECK_GE(x, y) SAFTM_CHECK((x) >= (y))
+#define SAFTM_CHECK_NE(x, y) SAFTM_CHECK((x) != (y))
+
+#define SAFTM_NULLSTREAM \
+  ::libtextclassifier3::mobile::internal_logging::NullStream().stream()
+
+// Debug checks: a SAFTM_DCHECK<suffix> macro should behave like
+// SAFTM_CHECK<suffix> in debug mode an don't check / don't print anything in
+// non-debug mode.
+#ifdef NDEBUG
+
+#define SAFTM_DCHECK(x) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_EQ(x, y) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_LT(x, y) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_GT(x, y) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_LE(x, y) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_GE(x, y) SAFTM_NULLSTREAM
+#define SAFTM_DCHECK_NE(x, y) SAFTM_NULLSTREAM
+
+// In non-debug mode, SAFT_DLOG statements do not generate any logging.
+#define SAFTM_DLOG(severity) SAFTM_NULLSTREAM
+
+#else  // NDEBUG
+
+// In debug mode, each SAFTM_DCHECK<suffix> is equivalent to
+// SAFTM_CHECK<suffix>, i.e., a real check that crashes when the condition is
+// not true.
+#define SAFTM_DCHECK(x) SAFTM_CHECK(x)
+#define SAFTM_DCHECK_EQ(x, y) SAFTM_CHECK_EQ(x, y)
+#define SAFTM_DCHECK_LT(x, y) SAFTM_CHECK_LT(x, y)
+#define SAFTM_DCHECK_GT(x, y) SAFTM_CHECK_GT(x, y)
+#define SAFTM_DCHECK_LE(x, y) SAFTM_CHECK_LE(x, y)
+#define SAFTM_DCHECK_GE(x, y) SAFTM_CHECK_GE(x, y)
+#define SAFTM_DCHECK_NE(x, y) SAFTM_CHECK_NE(x, y)
+
+// In debug mode, SAFT_DLOG statements are like SAFT_LOG.
+#define SAFTM_DLOG SAFTM_LOG
+
+#endif  // NDEBUG
+
+#ifdef LIBTEXTCLASSIFIER_VLOG
+#define SAFTM_VLOG(severity)                                              \
+  ::libtextclassifier3::mobile::internal_logging::LogMessage(                     \
+       ::libtextclassifier3::mobile::internal_logging::INFO, __FILE__, __LINE__)  \
+  .stream()
+#else
+#define SAFTM_VLOG(severity) SAFTM_NULLSTREAM
+#endif
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_COMPACT_LOGGING_H_

diff --git a/lang_id/common/lite_base/endian.h b/lang_id/common/lite_base/endian.h
new file mode 100644
index 0000000..16c2dca
--- /dev/null
+++ b/lang_id/common/lite_base/endian.h

@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ENDIAN_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ENDIAN_H_
+
+#include "lang_id/common/lite_base/integral-types.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+#if defined OS_LINUX || defined OS_CYGWIN || defined OS_ANDROID || \
+    defined(__ANDROID__)
+#include <endian.h>
+#endif
+
+// The following guarantees declaration of the byte swap functions, and
+// defines __BYTE_ORDER for MSVC
+#if defined(__GLIBC__) || defined(__CYGWIN__)
+#include <byteswap.h>  // IWYU pragma: export
+
+#else
+#ifndef bswap_16
+static inline uint16 bswap_16(uint16 x) {
+  return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));  // NOLINT
+}
+#define bswap_16(x) bswap_16(x)
+#endif  // bswap_16
+
+#ifndef bswap_32
+static inline uint32 bswap_32(uint32 x) {
+  return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+          ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+#endif  // bswap_32
+
+#ifndef bswap_64
+#define SAFTM_GG_ULONGLONG(x) x##ULL
+static inline uint64 bswap_64(uint64 x) {
+  return (((x & SAFTM_GG_ULONGLONG(0xFF)) << 56) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF00)) << 40) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF0000)) << 24) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF000000)) << 8) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF00000000)) >> 8) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF0000000000)) >> 24) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF000000000000)) >> 40) |
+          ((x & SAFTM_GG_ULONGLONG(0xFF00000000000000)) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+#endif  // bswap_64
+
+#endif
+
+// define the macros SAFTM_IS_LITTLE_ENDIAN or SAFTM_IS_BIG_ENDIAN using the
+// above endian definitions from endian.h if endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define SAFTM_IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define SAFTM_IS_BIG_ENDIAN
+#endif
+
+#else  // __BYTE_ORDER
+
+#if defined(__LITTLE_ENDIAN__)
+#define SAFTM_IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define SAFTM_IS_BIG_ENDIAN
+#endif
+
+// there is also PDP endian ...
+
+#endif  // __BYTE_ORDER
+
+class LittleEndian {
+ public:
+// Conversion functions.
+#ifdef SAFTM_IS_LITTLE_ENDIAN
+
+  static uint16 FromHost16(uint16 x) { return x; }
+  static uint16 ToHost16(uint16 x) { return x; }
+
+  static uint32 FromHost32(uint32 x) { return x; }
+  static uint32 ToHost32(uint32 x) { return x; }
+
+  static uint64 FromHost64(uint64 x) { return x; }
+  static uint64 ToHost64(uint64 x) { return x; }
+
+  static bool IsLittleEndian() { return true; }
+
+#elif defined SAFTM_IS_BIG_ENDIAN
+
+  static uint16 FromHost16(uint16 x) { return gbswap_16(x); }
+  static uint16 ToHost16(uint16 x) { return gbswap_16(x); }
+
+  static uint32 FromHost32(uint32 x) { return gbswap_32(x); }
+  static uint32 ToHost32(uint32 x) { return gbswap_32(x); }
+
+  static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
+  static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
+
+  static bool IsLittleEndian() { return false; }
+
+#endif /* ENDIAN */
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_ENDIAN_H_

diff --git a/lang_id/common/lite_base/float16.h b/lang_id/common/lite_base/float16.h
new file mode 100644
index 0000000..bc3fd21
--- /dev/null
+++ b/lang_id/common/lite_base/float16.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_FLOAT16_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_FLOAT16_H_
+
+#include "lang_id/common/lite_base/casts.h"
+#include "lang_id/common/lite_base/integral-types.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// 16 bit encoding of a float.  NOTE: can't be used directly for computation:
+// one first needs to convert it to a normal float, using Float16To32.
+//
+// Compact 16-bit encoding of floating point numbers. This
+// representation uses 1 bit for the sign, 8 bits for the exponent and
+// 7 bits for the mantissa.  It is assumed that floats are in IEEE 754
+// format so a float16 is just bits 16-31 of a single precision float.
+//
+// NOTE: The IEEE floating point standard defines a float16 format that
+// is different than this format (it has fewer bits of exponent and more
+// bits of mantissa).  We don't use that format here because conversion
+// to/from 32-bit floats is more complex for that format, and the
+// conversion for this format is very simple.
+//
+// <---------float16------------>
+// s e e e e e e e e f f f f f f f f f f f f f f f f f f f f f f f
+// <------------------------------float-------------------------->
+// 3 3             2 2             1 1                           0
+// 1 0             3 2             5 4                           0
+
+typedef uint16 float16;
+
+static inline float16 Float32To16(float f) {
+  // Note that we just truncate the mantissa bits: we make no effort to
+  // do any smarter rounding.
+  return (bit_cast<uint32>(f) >> 16) & 0xffff;
+}
+
+static inline float Float16To32(float16 f) {
+  // We fill in the new mantissa bits with 0, and don't do anything smarter.
+  return bit_cast<float>(f << 16);
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_FLOAT16_H_

diff --git a/util/base/integral_types.h b/lang_id/common/lite_base/integral-types.h
similarity index 79%
copy from util/base/integral_types.h
copy to lang_id/common/lite_base/integral-types.h
index f82c9cd..4c3038c 100644
--- a/util/base/integral_types.h
+++ b/lang_id/common/lite_base/integral-types.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,12 +16,11 @@
 
 // Basic integer type definitions.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_INTEGRAL_TYPES_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_INTEGRAL_TYPES_H_
 
-#include "util/base/config.h"
-
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 typedef unsigned int uint32;
 typedef unsigned long long uint64;
@@ -45,8 +44,6 @@
 #endif  // COMPILER_MSVC
 
 // Some compile-time assertions that our new types have the intended size.
-// static_assert exists only since C++11, so we need an ifdef.
-#ifdef LANG_CXX11
 static_assert(sizeof(int) == 4, "Our typedefs depend on int being 32 bits");
 static_assert(sizeof(uint32) == 4, "wrong size");
 static_assert(sizeof(int32) == 4, "wrong size");
@@ -54,8 +51,8 @@
 static_assert(sizeof(uint16) == 2, "wrong size");
 static_assert(sizeof(char32) == 4, "wrong size");
 static_assert(sizeof(int64) == 8, "wrong size");
-#endif  // LANG_CXX11
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_INTEGRAL_TYPES_H_

diff --git a/lang_id/common/lite_base/logging.h b/lang_id/common/lite_base/logging.h
new file mode 100644
index 0000000..88797cb
--- /dev/null
+++ b/lang_id/common/lite_base/logging.h

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_LOGGING_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_LOGGING_H_
+
+#ifdef SAFTM_COMPACT_LOGGING
+
+// One gets the compact logging only one requests it explicitly, by passing
+// --define saftm_compact_logging=true on the blaze command-line.
+#include "lang_id/common/lite_base/compact-logging.h"
+
+#else
+
+// Otherwise, one gets the standard base/logging.h You should do so, unless you
+// have a really good reason to switch to the compact logging.
+#include "base/logging.h"
+
+#define SAFTM_LOG LOG
+#define SAFTM_CHECK CHECK
+#define SAFTM_CHECK_EQ CHECK_EQ
+#define SAFTM_CHECK_LT CHECK_LT
+#define SAFTM_CHECK_LE CHECK_LE
+#define SAFTM_CHECK_GT CHECK_GT
+#define SAFTM_CHECK_GE CHECK_GE
+#define SAFTM_CHECK_NE CHECK_NE
+
+#define SAFTM_DLOG DLOG
+#define SAFTM_DCHECK DCHECK
+#define SAFTM_DCHECK_EQ DCHECK_EQ
+#define SAFTM_DCHECK_LT DCHECK_LT
+#define SAFTM_DCHECK_LE DCHECK_LE
+#define SAFTM_DCHECK_GT DCHECK_GT
+#define SAFTM_DCHECK_GE DCHECK_GE
+#define SAFTM_DCHECK_NE DCHECK_NE
+
+#endif  // SAFTM_COMPACT_LOGGING
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_LOGGING_H_

diff --git a/lang_id/common/lite_base/macros.h b/lang_id/common/lite_base/macros.h
new file mode 100644
index 0000000..8fe5e8a
--- /dev/null
+++ b/lang_id/common/lite_base/macros.h

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_MACROS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_MACROS_H_
+
+#define SAFTM_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName &) = delete;         \
+  TypeName &operator=(const TypeName &) = delete
+
+// The SAFTM_FALLTHROUGH_INTENDED macro can be used to annotate implicit
+// fall-through between switch labels:
+//
+//  switch (x) {
+//    case 40:
+//    case 41:
+//      if (truth_is_out_there) {
+//        ++x;
+//        SAFTM_FALLTHROUGH_INTENDED;  // Use instead of/along with annotations
+//                                     // in comments.
+//      } else {
+//        return x;
+//      }
+//    case 42:
+//      ...
+//
+//  As shown in the example above, the SAFTM_FALLTHROUGH_INTENDED macro should
+//  be followed by a semicolon. It is designed to mimic control-flow statements
+//  like 'break;', so it can be placed in most places where 'break;' can, but
+//  only if there are no statements on the execution path between it and the
+//  next switch label.
+//
+//  When compiled with clang, the SAFTM_FALLTHROUGH_INTENDED macro is expanded
+//  to [[clang::fallthrough]] attribute, which is analysed when performing
+//  switch labels fall-through diagnostic ('-Wimplicit-fallthrough').  See clang
+//  documentation on language extensions for details:
+//  http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+//
+//  When used with unsupported compilers, the SAFTM_FALLTHROUGH_INTENDED macro
+//  has no effect on diagnostics.
+//
+//  In either case this macro has no effect on runtime behavior and performance
+//  of code.
+#if defined(__clang__) && defined(__has_warning)
+#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
+#define SAFTM_FALLTHROUGH_INTENDED [[clang::fallthrough]]  // NOLINT
+#endif
+#endif
+
+#ifndef SAFTM_FALLTHROUGH_INTENDED
+#define SAFTM_FALLTHROUGH_INTENDED \
+  do {                           \
+  } while (0)
+#endif
+
+// SAFTM_UNIQUE_ID(prefix) expands to a unique id that starts with prefix.
+//
+// The current implementation expands to prefix_<line_number>; hence, multiple
+// uses of this macro with the same prefix and on the same line will result in
+// the same identifier name.  In those cases, if you need different ids, we
+// suggest you use different prefixes.
+//
+// Implementation is tricky; for more info, see
+// https://stackoverflow.com/questions/1597007/creating-c-macro-with-and-line-token-concatenation-with-positioning-macr
+#define SAFTM_UNIQUE_ID_INTERNAL2(x, y)  x ## y
+#define SAFTM_UNIQUE_ID_INTERNAL(x, y)   SAFTM_UNIQUE_ID_INTERNAL2(x, y)
+#define SAFTM_UNIQUE_ID(prefix)  SAFTM_UNIQUE_ID_INTERNAL(prefix ## _, __LINE__)
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_BASE_MACROS_H_

diff --git a/lang_id/common/lite_strings/numbers.cc b/lang_id/common/lite_strings/numbers.cc
new file mode 100644
index 0000000..e0c66f3
--- /dev/null
+++ b/lang_id/common/lite_strings/numbers.cc

@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/lite_strings/numbers.h"
+
+#include <ctype.h>
+#include <stdlib.h>
+#include <climits>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Returns true if the characters that start at address ptr (inclusive) and stop
+// at the first '\0' consist of only whitespaces, as determined by isspace().
+// Note: this function returns false if ptr is nullptr.
+static bool OnlyWhitespaces(const char *ptr) {
+  if (ptr == nullptr) {
+    return false;
+  }
+  for (; *ptr != '\0'; ++ptr) {
+    if (!isspace(*ptr)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool LiteAtoi(const char *c_str, int *value) {
+  if (c_str == nullptr) {
+    return false;
+  }
+
+  // Short version of man strtol:
+  //
+  // strtol parses some optional whitespaces, an optional +/- sign, and next a
+  // succession of digits.  If it finds some digits, it sets temp to point to
+  // the first character after that succession of digits and returns the parsed
+  // integer.
+  //
+  // If there were no digits at all, strtol() sets temp to be c_str (the start
+  // address) and returns 0.
+  char *temp = nullptr;
+  const long int parsed_value = strtol(c_str, &temp, 0);  // NOLINT
+
+  // Check for overflow.  Note: to simplify the code, we assume that LONG_MIN /
+  // LONG_MAX means that strtol encountered an overflow (normally, in that case,
+  // one should also inspect errno).  Hence, we maybe give up the possibility to
+  // parse one extreme value on each side (min/max).  That should be ok.
+  if ((parsed_value == LONG_MIN) || (parsed_value == LONG_MAX) ||
+      (parsed_value < INT_MIN) || (parsed_value > INT_MAX)) {
+    return false;
+  }
+  *value = static_cast<int>(parsed_value);
+
+  // First part of the expression below means that the input string contained at
+  // least one digit.  The other part checks that what remains after the number
+  // (if anything) consists only of whitespaces.
+  return (temp != c_str) && OnlyWhitespaces(temp);
+}
+
+bool LiteAtof(const char *c_str, float *value) {
+  if (c_str == nullptr) {
+    return false;
+  }
+
+  // strtof is similar to strtol, see more detailed comments inside LiteAtoi.
+  char *temp = nullptr;
+  *value = strtof(c_str, &temp);
+  return (temp != c_str) && OnlyWhitespaces(temp);
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/lite_strings/numbers.h b/lang_id/common/lite_strings/numbers.h
new file mode 100644
index 0000000..4b3c93c
--- /dev/null
+++ b/lang_id/common/lite_strings/numbers.h

@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_NUMBERS_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_NUMBERS_H_
+
+#include <string>
+
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Parses an int from a C-style string; similar to absl::SimpleAtoi.
+//
+// c_str should point to a zero-terminated array of chars that contains the
+// number representation as (a) "<radix-10-number>" (e.g., "721"), (b)
+// "0x<radix-16-number>" (e.g., "0xa1"), or (c) "0<radix-8-number>" (e.g.,
+// "017201").  Whitespaces (as determined by isspace()) are allowed before and
+// after the number representation (but obviously not in the middle).
+//
+// Stores parsed number into *value.  Returns true on success, false on error.
+// Note: presence of extra non-whitespace characters after the number counts as
+// an error: e.g., parsing "123a" will return false due to the extra "a" (which
+// is not a valid radix-10 digit).  This function also returns false for strings
+// that do not contain any digit (e.g., ""), as well as for overflows /
+// underflows.
+bool LiteAtoi(const char *c_str, int *value);
+
+inline bool LiteAtoi(const string &s, int *value) {
+  return LiteAtoi(s.c_str(), value);
+}
+
+inline bool LiteAtoi(StringPiece sp, int *value) {
+  // Unfortunately, we can't directly call LiteAtoi(sp.data()): LiteAtoi(const
+  // char *) needs a zero-terminated string.
+  const string temp(sp.data(), sp.size());
+  return LiteAtoi(temp.c_str(), value);
+}
+
+// Like LiteAtoi, but for float; similar to absl::SimpleAtof.
+//
+// NOTE: currently, does not properly handle overflow / underflow.
+// TODO(salcianu): fix that.
+bool LiteAtof(const char *c_str, float *value);
+
+inline bool LiteAtof(const string &s, float *value) {
+  return LiteAtof(s.c_str(), value);
+}
+
+inline bool LiteAtof(StringPiece sp, float *value) {
+  // Unfortunately, we can't directly call LiteAtoi(sp.data()): LiteAtoi(const
+  // char *) needs a zero-terminated string.
+  const string temp(sp.data(), sp.size());
+  return LiteAtof(temp.c_str(), value);
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_NUMBERS_H_

diff --git a/lang_id/common/lite_strings/str-cat.h b/lang_id/common/lite_strings/str-cat.h
new file mode 100644
index 0000000..f24e6e6
--- /dev/null
+++ b/lang_id/common/lite_strings/str-cat.h

@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_CAT_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_CAT_H_
+
+// Less efficient but more compact versions of several absl string utils.
+//
+// "More compact" means "pulls in fewer code dependencies".  That's useful if
+// one tries to minimize the code size.
+//
+// Note: the name and the signature of the functions from this header were
+// chosen to minimize the effort of converting code that uses absl::LiteStrCat &
+// co to our more compact functions.
+
+#include <string>
+
+#ifdef COMPILER_MSVC
+#include <sstream>
+#endif  // COMPILER_MSVC
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Less efficient but more compact version of absl::LiteStrCat().
+//
+// Given a value v (see supported types below) LiteStrCat(v) returns a new
+// string that contains the representation of v.  For examples, see
+// str-cat_test.cc.
+template <typename T>
+inline string LiteStrCat(T v) {
+#ifdef COMPILER_MSVC
+  std::stringstream stream;
+  stream << input;
+  return stream.str();
+#else
+  return std::to_string(v);
+#endif
+}
+
+template <>
+inline string LiteStrCat(const char *v) {
+  return string(v);
+}
+
+// TODO(salcianu): use a reference type (const string &).  For some reason, I
+// couldn't get that to work on a first try.
+template <>
+inline string LiteStrCat(string v) {
+  return v;
+}
+
+template <>
+inline string LiteStrCat(char v) {
+  return string(1, v);
+}
+
+// Less efficient but more compact version of absl::LiteStrAppend().
+template <typename T>
+inline void LiteStrAppend(string *dest, T v) {
+  dest->append(LiteStrCat(v));  // NOLINT
+}
+
+template <typename T1, typename T2>
+inline void LiteStrAppend(string *dest, T1 v1, T2 v2) {
+  dest->append(LiteStrCat(v1));  // NOLINT
+  dest->append(LiteStrCat(v2));  // NOLINT
+}
+
+template <typename T1, typename T2, typename T3>
+inline void LiteStrAppend(string *dest, T1 v1, T2 v2, T3 v3) {
+  LiteStrAppend(dest, v1, v2);
+  dest->append(LiteStrCat(v3));  // NOLINT
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+inline void LiteStrAppend(string *dest, T1 v1, T2 v2, T3 v3, T4 v4) {
+  LiteStrAppend(dest, v1, v2, v3);
+  dest->append(LiteStrCat(v4));  // NOLINT
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+inline void LiteStrAppend(string *dest, T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) {
+  LiteStrAppend(dest, v1, v2, v3, v4);
+  dest->append(LiteStrCat(v5));  // NOLINT
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_CAT_H_

diff --git a/util/strings/split.cc b/lang_id/common/lite_strings/str-split.cc
similarity index 65%
copy from util/strings/split.cc
copy to lang_id/common/lite_strings/str-split.cc
index 2c610ba..199bb69 100644
--- a/util/strings/split.cc
+++ b/lang_id/common/lite_strings/str-split.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,19 +14,18 @@
  * limitations under the License.
  */
 
-#include "util/strings/split.h"
+#include "lang_id/common/lite_strings/str-split.h"
 
-namespace libtextclassifier2 {
-namespace strings {
+namespace libtextclassifier3 {
+namespace mobile {
 
-std::vector<StringPiece> Split(const StringPiece &text, char delim) {
+std::vector<StringPiece> LiteStrSplit(StringPiece text, char delim) {
   std::vector<StringPiece> result;
   int token_start = 0;
   if (!text.empty()) {
-    for (size_t i = 0; i < text.size() + 1; i++) {
+    for (size_t i = 0; i < text.size() + 1; ++i) {
       if ((i == text.size()) || (text[i] == delim)) {
-        result.push_back(
-            StringPiece(text.data() + token_start, i - token_start));
+        result.emplace_back(text.data() + token_start, i - token_start);
         token_start = i + 1;
       }
     }
@@ -34,5 +33,5 @@
   return result;
 }
 
-}  // namespace strings
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/lite_strings/str-split.h b/lang_id/common/lite_strings/str-split.h
new file mode 100644
index 0000000..300bc9f
--- /dev/null
+++ b/lang_id/common/lite_strings/str-split.h

@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_SPLIT_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_SPLIT_H_
+
+#include <vector>
+
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Splits |text| on |delim|; similar to absl::StrSplit.
+//
+// Returns a list of tokens.  Each token is represented by a StringPiece that
+// indicates a range of chars from |text|.
+//
+// Example: StrSplit("apple,orange", ',') returns two tokens: a StringPiece that
+// points to "apple", and another one for "orange".
+//
+// If one concatenates all returned tokens with |delim| in between, one gets the
+// original |text|.  E.g., If we split "apple,orange," on ',', we get three
+// tokens: "apple", "orange" and "" (an empty token).  We do not filter out
+// empty tokens.  If necessary, the caller can do that.
+//
+// Note: if the input text is empty, we return an empty list of tokens.  In
+// general, the number of returned tokens is 1 + the number of occurences of
+// |delim| inside |text|.
+std::vector<StringPiece> LiteStrSplit(StringPiece text, char delim);
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STR_SPLIT_H_

diff --git a/lang_id/common/lite_strings/stringpiece.h b/lang_id/common/lite_strings/stringpiece.h
new file mode 100644
index 0000000..d19ea41
--- /dev/null
+++ b/lang_id/common/lite_strings/stringpiece.h

@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC3_STD_STRING_IMPORT
+#define TC3_STD_STRING_IMPORT
+#include <string>
+
+namespace libtextclassifier3 {
+using string = std::string;
+template <class CharT, class Traits = std::char_traits<CharT>,
+          class Allocator = std::allocator<CharT> >
+using basic_string = std::basic_string<CharT, Traits, Allocator>;
+}  // namespace libtextclassifier3
+#endif
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STRINGPIECE_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STRINGPIECE_H_
+
+#include <stddef.h>
+#include <string.h>
+
+#include <ostream>
+#include <string>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Read-only "view" of a piece of data.  Does not own the underlying data.
+class StringPiece {
+ public:
+  StringPiece() : StringPiece(nullptr, 0) {}
+
+  StringPiece(const char *str)  // NOLINT
+      : start_(str), size_(strlen(str)) {}
+
+  StringPiece(const char *start, size_t size) : start_(start), size_(size) {}
+
+  // Intentionally no "explicit" keyword: in function calls, we want strings to
+  // be converted to StringPiece implicitly.
+  StringPiece(const string &s)  // NOLINT
+      : StringPiece(s.data(), s.size()) {}
+
+  StringPiece(const string &s, int offset, int len)
+      : StringPiece(s.data() + offset, len) {}
+
+  char operator[](size_t i) const { return start_[i]; }
+
+  // Returns start address of underlying data.
+  const char *data() const { return start_; }
+
+  // Returns number of bytes of underlying data.
+  size_t size() const { return size_; }
+  size_t length() const { return size_; }
+
+  // Returns true if this StringPiece does not refer to any characters.
+  bool empty() const { return size() == 0; }
+
+  template <typename A>
+  explicit operator basic_string<char, std::char_traits<char>, A>() const {
+    if (!data()) return {};
+    return basic_string<char, std::char_traits<char>, A>(data(), size());
+  }
+
+ private:
+  const char *start_;  // Not owned.
+  size_t size_;
+};
+
+inline std::ostream &operator<<(std::ostream &out, StringPiece sp) {
+  return out.write(sp.data(), sp.size());
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_LITE_STRINGS_STRINGPIECE_H_

diff --git a/lang_id/common/math/algorithm.h b/lang_id/common/math/algorithm.h
new file mode 100644
index 0000000..a963807
--- /dev/null
+++ b/lang_id/common/math/algorithm.h

@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generic utils similar to those from the C++ header <algorithm>.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_ALGORITHM_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_ALGORITHM_H_
+
+#include <algorithm>
+#include <vector>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Returns index of max element from the vector |elements|.  Returns 0 if
+// |elements| is empty.  T should be a type that can be compared by operator<.
+template<typename T>
+inline int GetArgMax(const std::vector<T> &elements) {
+  return std::distance(
+      elements.begin(),
+      std::max_element(elements.begin(), elements.end()));
+}
+
+// Returns index of min element from the vector |elements|.  Returns 0 if
+// |elements| is empty.  T should be a type that can be compared by operator<.
+template<typename T>
+inline int GetArgMin(const std::vector<T> &elements) {
+  return std::distance(
+      elements.begin(),
+      std::min_element(elements.begin(), elements.end()));
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_ALGORITHM_H_

diff --git a/lang_id/common/math/checksum.cc b/lang_id/common/math/checksum.cc
new file mode 100644
index 0000000..23d88bc
--- /dev/null
+++ b/lang_id/common/math/checksum.cc

@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/math/checksum.h"
+
+// Though we use the same zlib header on all platforms, the implementation used
+// is from NDK on android and from third_party/zlib on iOS/linux.  See BUILD
+// rule.
+#include <zlib.h>
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// static
+uint32 Crc32::GetInitialCrc32() {
+  static const uint32 kCrcInitZero = crc32(0L, nullptr, 0);
+  return kCrcInitZero;
+}
+
+void Crc32::Update(const char *str, int len) {
+  if (str == nullptr || len == 0) {
+    return;
+  }
+  current_ = crc32(current_,
+                   reinterpret_cast<const unsigned char *>(str),
+                   len);
+}
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/math/checksum.h b/lang_id/common/math/checksum.h
new file mode 100644
index 0000000..d62893f
--- /dev/null
+++ b/lang_id/common/math/checksum.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_CHECKSUM_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_CHECKSUM_H_
+
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Class to compute a 32bit Cyclic Redundancy Check (CRC) in a cummulative way.
+//
+// To use, create an instance of this class, repeatedly call Update() to "feed"
+// it your pieces of data, and, when done, call Get().
+class Crc32 {
+ public:
+  Crc32() : current_(GetInitialCrc32()) {}
+
+  // Updates current CRC32 code to also take into account the |len| bytes that
+  // start at address |str|.
+  void Update(const char *str, int len);
+
+  // Updates current CRC32 code to also take into account the bytes from |s|.
+  void Update(StringPiece s) { Update(s.data(), s.size()); }
+
+  // Returns the CRC32 code for the data so far.
+  uint32 Get() const { return current_; }
+
+ private:
+  // Returns the initial value for current_.
+  static uint32 GetInitialCrc32();
+
+  // CRC32 for the data so far.
+  uint32 current_;
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_CHECKSUM_H_

diff --git a/util/math/fastexp.cc b/lang_id/common/math/fastexp.cc
similarity index 91%
copy from util/math/fastexp.cc
copy to lang_id/common/math/fastexp.cc
index 4bf8592..44df91f 100644
--- a/util/math/fastexp.cc
+++ b/lang_id/common/math/fastexp.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#include "util/math/fastexp.h"
+#include "lang_id/common/math/fastexp.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 const int FastMathClass::kBits;
 const int FastMathClass::kMask1;
@@ -45,4 +46,5 @@
      7940441, 8029106, 8118253, 8207884, 8298001}
 };
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/util/math/fastexp.h b/lang_id/common/math/fastexp.h
similarity index 73%
copy from util/math/fastexp.h
copy to lang_id/common/math/fastexp.h
index af7a08c..05b654a 100644
--- a/util/math/fastexp.h
+++ b/lang_id/common/math/fastexp.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,19 +15,21 @@
  */
 
 // Fast approximation for exp.
+//
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
-#define LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_FASTEXP_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_FASTEXP_H_
 
 #include <cassert>
 #include <cmath>
 #include <limits>
 
-#include "util/base/casts.h"
-#include "util/base/integral_types.h"
-#include "util/base/logging.h"
+#include "lang_id/common/lite_base/casts.h"
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/lite_base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 class FastMathClass {
  private:
@@ -42,7 +44,7 @@
 
  public:
   float VeryFastExp2(float f) const {
-    TC_DCHECK_LE(fabs(f), 126);
+    SAFTM_DCHECK_LE(fabs(f), 126);
     const float g = f + (127 + (1 << (23 - kBits)));
     const int32 x = bit_cast<int32>(g);
     int32 ret = ((x & kMask2) << (23 - kBits))
@@ -60,9 +62,9 @@
 
 extern FastMathClass FastMathInstance;
 
-inline float VeryFastExp2(float f) { return FastMathInstance.VeryFastExp2(f); }
 inline float VeryFastExp(float f) { return FastMathInstance.VeryFastExp(f); }
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_FASTEXP_H_

diff --git a/lang_id/common/math/hash.cc b/lang_id/common/math/hash.cc
new file mode 100644
index 0000000..d320428
--- /dev/null
+++ b/lang_id/common/math/hash.cc

@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/math/hash.h"
+
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace utils {
+
+namespace {
+// Lower-level versions of Get... that read directly from a character buffer
+// without any bounds checking.
+inline uint32 DecodeFixed32(const char *ptr) {
+  return ((static_cast<uint32>(static_cast<unsigned char>(ptr[0]))) |
+          (static_cast<uint32>(static_cast<unsigned char>(ptr[1])) << 8) |
+          (static_cast<uint32>(static_cast<unsigned char>(ptr[2])) << 16) |
+          (static_cast<uint32>(static_cast<unsigned char>(ptr[3])) << 24));
+}
+
+// 0xff is in case char is signed.
+static inline uint32 ByteAs32(char c) { return static_cast<uint32>(c) & 0xff; }
+}  // namespace
+
+uint32 Hash32(const char *data, size_t n, uint32 seed) {
+  // 'm' and 'r' are mixing constants generated offline.
+  // They're not really 'magic', they just happen to work well.
+  const uint32 m = 0x5bd1e995;
+  const int r = 24;
+
+  // Initialize the hash to a 'random' value
+  uint32 h = seed ^ n;
+
+  // Mix 4 bytes at a time into the hash
+  while (n >= 4) {
+    uint32 k = DecodeFixed32(data);
+    k *= m;
+    k ^= k >> r;
+    k *= m;
+    h *= m;
+    h ^= k;
+    data += 4;
+    n -= 4;
+  }
+
+  // Handle the last few bytes of the input array
+  switch (n) {
+    case 3:
+      h ^= ByteAs32(data[2]) << 16;
+      SAFTM_FALLTHROUGH_INTENDED;
+    case 2:
+      h ^= ByteAs32(data[1]) << 8;
+      SAFTM_FALLTHROUGH_INTENDED;
+    case 1:
+      h ^= ByteAs32(data[0]);
+      h *= m;
+  }
+
+  // Do a few final mixes of the hash to ensure the last few
+  // bytes are well-incorporated.
+  h ^= h >> 13;
+  h *= m;
+  h ^= h >> 15;
+  return h;
+}
+
+}  // namespace utils
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/math/hash.h b/lang_id/common/math/hash.h
new file mode 100644
index 0000000..08c32be
--- /dev/null
+++ b/lang_id/common/math/hash.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC3_STD_STRING_IMPORT
+#define TC3_STD_STRING_IMPORT
+#include <string>
+
+namespace libtextclassifier3 {
+using string = std::string;
+template <class CharT, class Traits = std::char_traits<CharT>,
+          class Allocator = std::allocator<CharT> >
+using basic_string = std::basic_string<CharT, Traits, Allocator>;
+}  // namespace libtextclassifier3
+#endif
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_HASH_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_HASH_H_
+
+#include <string>
+
+#include "lang_id/common/lite_base/integral-types.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace utils {
+
+// Returns a 32 bit hash of the |n| bytes that start at |data|, using |seed| for
+// internal initialization.  By changing the seed, one effectively gets
+// different hash functions.
+//
+// NOTE: this function is guaranteed not to change in the future.
+//
+// IMPORTANT: for speed reasons, this method does not check its parameters
+// |data| and |n|.  The caller should ensure that n >= 0 and that one can read
+// from the memory area [data, data + n).
+uint32 Hash32(const char *data, size_t n, uint32 seed);
+
+static inline uint32 Hash32WithDefaultSeed(const char *data, size_t n) {
+  return Hash32(data, n, 0xBEEF);
+}
+
+static inline uint32 Hash32WithDefaultSeed(const string &input) {
+  return Hash32WithDefaultSeed(input.data(), input.size());
+}
+
+}  // namespace utils
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_HASH_H_

diff --git a/util/math/softmax.cc b/lang_id/common/math/softmax.cc
similarity index 76%
copy from util/math/softmax.cc
copy to lang_id/common/math/softmax.cc
index 986787f..c21f843 100644
--- a/util/math/softmax.cc
+++ b/lang_id/common/math/softmax.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,19 +14,20 @@
  * limitations under the License.
  */
 
-#include "util/math/softmax.h"
+#include "lang_id/common/math/softmax.h"
 
 #include <limits>
 
-#include "util/base/logging.h"
-#include "util/math/fastexp.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/math/fastexp.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 float ComputeSoftmaxProbability(const std::vector<float> &scores, int label) {
   if ((label < 0) || (label >= scores.size())) {
-    TC_LOG(ERROR) << "label " << label << " outside range "
-                  << "[0, " << scores.size() << ")";
+    SAFTM_LOG(ERROR) << "label " << label << " outside range "
+                     << "[0, " << scores.size() << ")";
     return 0.0f;
   }
 
@@ -69,36 +70,33 @@
   return 1.0f / denominator;
 }
 
-std::vector<float> ComputeSoftmax(const std::vector<float> &scores) {
-  return ComputeSoftmax(scores.data(), scores.size());
-}
-
-std::vector<float> ComputeSoftmax(const float *scores, int scores_size) {
+std::vector<float> ComputeSoftmax(const std::vector<float> &scores,
+                                  float alpha) {
   std::vector<float> softmax;
   std::vector<float> exp_scores;
-  exp_scores.reserve(scores_size);
-  softmax.reserve(scores_size);
+  exp_scores.reserve(scores.size());
+  softmax.reserve(scores.size());
 
   // Find max value in "scores" vector and rescale to avoid overflows.
-  float max = std::numeric_limits<float>::min();
-  for (int i = 0; i < scores_size; ++i) {
-    const float score = scores[i];
+  float max = std::numeric_limits<float>::lowest();
+  for (const auto &score : scores) {
     if (score > max) max = score;
   }
   float denominator = 0;
-  for (int i = 0; i < scores_size; ++i) {
-    const float score = scores[i];
+  for (auto &score : scores) {
     // See comments above in ComputeSoftmaxProbability for the reasoning behind
     // this approximation.
-    const float exp_score = score - max < -16.0f ? 0 : VeryFastExp(score - max);
+    const float delta_score = alpha * (score - max);
+    const float exp_score = delta_score < -16.0f ? 0 : VeryFastExp(delta_score);
     exp_scores.push_back(exp_score);
     denominator += exp_score;
   }
 
-  for (int i = 0; i < scores_size; ++i) {
+  for (int i = 0; i < scores.size(); ++i) {
     softmax.push_back(exp_scores[i] / denominator);
   }
   return softmax;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/util/math/softmax.h b/lang_id/common/math/softmax.h
similarity index 69%
copy from util/math/softmax.h
copy to lang_id/common/math/softmax.h
index f70a9ab..0100e59 100644
--- a/util/math/softmax.h
+++ b/lang_id/common/math/softmax.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
-#define LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_SOFTMAX_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_SOFTMAX_H_
 
 #include <vector>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+namespace mobile {
 
 // Computes probability of a softmax label.  Parameter "scores" is the vector of
 // softmax logits.  Returns 0.0f if "label" is outside the range [0,
@@ -28,11 +29,12 @@
 
 // Computes and returns a softmax for a given vector of floats.  Parameter
 // "scores" is the vector of softmax logits.
-std::vector<float> ComputeSoftmax(const std::vector<float> &scores);
+//
+// The alpha parameter is a scaling factor on the logits.
+std::vector<float> ComputeSoftmax(const std::vector<float> &scores,
+                                  float alpha = 1.0f);
 
-// Same as above but operates on an array of floats.
-std::vector<float> ComputeSoftmax(const float *scores, int scores_size);
+}  // namespace mobile
+}  // namespace nlp_saft
 
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_MATH_SOFTMAX_H_

diff --git a/lang_id/common/registry.h b/lang_id/common/registry.h
new file mode 100644
index 0000000..d2c5271
--- /dev/null
+++ b/lang_id/common/registry.h

@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Mechanism to instantiate classes by name.
+//
+// This mechanism is useful if the concrete classes to be instantiated are not
+// statically known (e.g., if their names are read from a dynamically-provided
+// config).
+//
+// In that case, the first step is to define the API implemented by the
+// instantiated classes.  E.g.,
+//
+//  // In a header file function.h:
+//
+//  // Abstract function that takes a double and returns a double.
+//  class Function : public RegisterableClass<Function> {
+//   public:
+//    virtual ~Function() {}
+//    virtual double Evaluate(double x) = 0;
+//  };
+//
+//  // Should be inside namespace libtextclassifier3::mobile.
+//  SAFTM_DECLARE_CLASS_REGISTRY_NAME(Function);
+//
+// Notice the inheritance from RegisterableClass<Function>.  RegisterableClass
+// is defined by this file (registry.h).  Under the hood, this inheritanace
+// defines a "registry" that maps names (zero-terminated arrays of chars) to
+// factory methods that create Functions.  You should give a human-readable name
+// to this registry.  To do that, use the following macro in a .cc file (it has
+// to be a .cc file, as it defines some static data):
+//
+//  // Inside function.cc
+//  // Should be inside namespace libtextclassifier3::mobile.
+//  SAFTM_DEFINE_CLASS_REGISTRY_NAME("function", Function);
+//
+// Now, let's define a few concrete Functions: e.g.,
+//
+//   class Cos : public Function {
+//    public:
+//     double Evaluate(double x) override { return cos(x); }
+//     SAFTM_DEFINE_REGISTRATION_METHOD("cos", Cos);
+//   };
+//
+//   class Exp : public Function {
+//    public:
+//     double Evaluate(double x) override { return exp(x); }
+//     SAFTM_DEFINE_REGISTRATION_METHOD("sin", Sin);
+//   };
+//
+// Each concrete Function implementation should have (in the public section) the
+// macro
+//
+//   SAFTM_DEFINE_REGISTRATION_METHOD("name", implementation_class);
+//
+// This defines a RegisterClass static method that, when invoked, associates
+// "name" with a factory method that creates instances of implementation_class.
+//
+// Before instantiating Functions by name, we need to tell our system which
+// Functions we may be interested in.  This is done by calling the
+// Foo::RegisterClass() for each relevant Foo implementation of Function.  It is
+// ok to call Foo::RegisterClass() multiple times (even in parallel): only the
+// first call will perform something, the others will return immediately.
+//
+//   Cos::RegisterClass();
+//   Exp::RegisterClass();
+//
+// Now, let's instantiate a Function based on its name.  This get a lot more
+// interesting if the Function name is not statically known (i.e.,
+// read from an input proto:
+//
+//   std::unique_ptr<Function> f(Function::Create("cos"));
+//   double result = f->Evaluate(arg);
+//
+// NOTE: the same binary can use this mechanism for different APIs.  E.g., one
+// can also have (in the binary with Function, Sin, Cos, etc):
+//
+// class IntFunction : public RegisterableClass<IntFunction> {
+//  public:
+//   virtual ~IntFunction() {}
+//   virtual int Evaluate(int k) = 0;
+// };
+//
+// SAFTM_DECLARE_CLASS_REGISTRY_NAME(IntFunction);
+//
+// SAFTM_DEFINE_CLASS_REGISTRY_NAME("int function", IntFunction);
+//
+// class Inc : public IntFunction {
+//  public:
+//   int Evaluate(int k) override { return k + 1; }
+//   SAFTM_DEFINE_REGISTRATION_METHOD("inc", Inc);
+// };
+//
+// RegisterableClass<Function> and RegisterableClass<IntFunction> define their
+// own registries: each maps string names to implementation of the corresponding
+// API.
+//
+// NOTE: the mechanism described above requires you to explicitly call
+// RegisterClass() for all relevant classes before instantiating them.  You can
+// do this in the main() function or in any other function that is guaranteed to
+// run before the code that instantiates those classes.  Alternatively, you can
+// use the macro SAFTM_STATIC_REGISTRATION to perform this registration in a
+// decentralized fashion.  Just use that macro in a .cc file, outside any
+// function / class, e.g.,
+//
+// SAFTM_STATIC_REGISTRATION(Cos);
+//
+// and make sure you link in all symbols from that .cc file; e.g., in bazel, use
+// alwayslink = 1 for the corresponding cc_library.  Still, please be aware that
+// using alwayslink = 1 limits the ability of the linker to perform dead code
+// elimination.
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_REGISTRY_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_REGISTRY_H_
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <string>
+#include <vector>
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/macros.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+namespace internal {
+// Registry that associates keys (zero-terminated array of chars) with values.
+// Values are pointers to type T (the template parameter).  This is used to
+// store the association between component names and factory methods that
+// produce those components; the error messages are focused on that case.
+//
+// Internally, this registry uses a linked list of (key, value) pairs.  We do
+// not use an STL map, list, etc because we aim for small code size.
+template <class T>
+class ComponentRegistry {
+ public:
+  explicit ComponentRegistry(const char *name) : name_(name), head_(nullptr) {}
+
+  // Adds a the (key, value) pair to this registry (if the key does not already
+  // exists in this registry) and returns true.  If the registry already has a
+  // mapping for key, returns false and does not modify the registry.  NOTE: the
+  // error (false) case happens even if the existing value for key is equal with
+  // the new one.
+  //
+  // This method does not take ownership of key, nor of value.
+  bool Add(const char *key, T *value) {
+    const Cell *old_cell = FindCell(key);
+    if (old_cell != nullptr) {
+      SAFTM_LOG(ERROR) << "Duplicate component: " << key;
+      return false;
+    }
+    Cell *new_cell = new Cell(key, value, head_);
+    head_ = new_cell;
+    return true;
+  }
+
+  // Returns the value attached to a key in this registry.  Returns nullptr on
+  // error (e.g., unknown key).
+  T *Lookup(const char *key) const {
+    const Cell *cell = FindCell(key);
+    if (cell == nullptr) {
+      SAFTM_LOG(ERROR) << "Unknown " << name() << " component: " << key;
+    }
+    return (cell == nullptr) ? nullptr : cell->value();
+  }
+
+  T *Lookup(const string &key) const { return Lookup(key.c_str()); }
+
+  // Returns name of this ComponentRegistry.
+  const char *name() const { return name_; }
+
+  // Fills *names with names of all components registered in this
+  // ComponentRegistry.  Previous content of *names is cleared out.
+  void GetComponentNames(std::vector<string> *names) {
+    names->clear();
+    for (const Cell *c = head_; c!= nullptr; c = c->next()) {
+      names->emplace_back(c->key());
+    }
+  }
+
+ private:
+  // Cell for the singly-linked list underlying this ComponentRegistry.  Each
+  // cell contains a key, the value for that key, as well as a pointer to the
+  // next Cell from the list.
+  class Cell {
+   public:
+    // Constructs a new Cell.
+    Cell(const char *key, T *value, Cell *next)
+        : key_(key), value_(value), next_(next) {}
+
+    const char *key() const { return key_; }
+    T *value() const { return value_; }
+    Cell *next() const { return next_; }
+
+   private:
+    const char *const key_;
+    T *const value_;
+    Cell *const next_;
+  };
+
+  // Finds Cell for indicated key in the singly-linked list pointed to by head_.
+  // Returns pointer to that first Cell with that key, or nullptr if no such
+  // Cell (i.e., unknown key).
+  //
+  // Caller does NOT own the returned pointer.
+  const Cell *FindCell(const char *key) const {
+    const Cell *c = head_;
+    while (c != nullptr && strcmp(key, c->key()) != 0) {
+      c = c->next();
+    }
+    return c;
+  }
+
+  // Human-readable description for this ComponentRegistry.  For debug purposes.
+  const char *const name_;
+
+  // Pointer to the first Cell from the underlying list of (key, value) pairs.
+  Cell *head_;
+};
+}  // namespace internal
+
+// Base class for registerable classes.
+template <class T>
+class RegisterableClass {
+ public:
+  // Factory function type.
+  typedef T *(Factory)();
+
+  // Registry type.
+  typedef internal::ComponentRegistry<Factory> Registry;
+
+  // Creates a new instance of T.  Returns pointer to new instance or nullptr in
+  // case of errors (e.g., unknown component).
+  //
+  // Passes ownership of the returned pointer to the caller.
+  static T *Create(const string &name) {  // NOLINT
+    auto *factory = registry()->Lookup(name);
+    if (factory == nullptr) {
+      SAFTM_LOG(ERROR) << "Unknown RegisterableClass " << name;
+      return nullptr;
+    }
+    return factory();
+  }
+
+  // Returns registry for class.
+  static Registry *registry() {
+    static Registry *registry_for_type_t = new Registry(kRegistryName);
+    return registry_for_type_t;
+  }
+
+ protected:
+  // Factory method for subclass ComponentClass.  Used internally by the static
+  // method RegisterClass() defined by SAFTM_DEFINE_REGISTRATION_METHOD.
+  template <class ComponentClass>
+  static T *_internal_component_factory() {
+    return new ComponentClass();
+  }
+
+ private:
+  // Human-readable name for the registry for this class.
+  static const char kRegistryName[];
+};
+
+// Defines the static method component_class::RegisterClass() that should be
+// called before trying to instantiate component_class by name.  Should be used
+// inside the public section of the declaration of component_class.  See
+// comments at the top-level of this file.
+#define SAFTM_DEFINE_REGISTRATION_METHOD(component_name, component_class) \
+  static void RegisterClass() {                                         \
+    static bool once = registry()->Add(                                 \
+        component_name, &_internal_component_factory<component_class>); \
+    if (!once) {                                                        \
+      SAFTM_LOG(ERROR) << "Problem registering " << component_name;     \
+    }                                                                   \
+    SAFTM_DCHECK(once);                                                 \
+  }
+
+// Defines the human-readable name of the registry associated with base_class.
+#define SAFTM_DECLARE_CLASS_REGISTRY_NAME(base_class)                   \
+  template <>                                                           \
+  const char ::libtextclassifier3::mobile::RegisterableClass<base_class>::kRegistryName[]
+
+// Defines the human-readable name of the registry associated with base_class.
+#define SAFTM_DEFINE_CLASS_REGISTRY_NAME(registry_name, base_class)     \
+  template <>                                                           \
+  const char                                                            \
+  ::libtextclassifier3::mobile::RegisterableClass<base_class>::kRegistryName[]    \
+      = registry_name
+
+// Register component_name, by calling component_class::RegisterClass() on
+// program start-up, before main.  NOTE: this macro should be used in
+// conjunction with something like alwayslink = 1 from bazel.  That is
+// discouraged, as it prevents the linker from doing dead code elimination, so
+// please use this macro only in special cases.  Instead, if you care about code
+// size, then you should aim to explicitly call RegisterClass from your code
+// (e.g., from the main method, or from the constructor of the class that may
+// need those registered components).
+#define SAFTM_STATIC_REGISTRATION(component_class)                  \
+  static bool SAFTM_UNIQUE_ID(_kRegistrationDummy) = [] {           \
+    component_class::RegisterClass();                               \
+    return true;                                                    \
+  }()
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_REGISTRY_H_

diff --git a/lang_id/common/stl-util.h b/lang_id/common/stl-util.h
new file mode 100644
index 0000000..95d8d3b
--- /dev/null
+++ b/lang_id/common/stl-util.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_STL_UTIL_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_STL_UTIL_H_
+
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace utils {
+
+// Deletes all the elements in an STL container and clears the container.  This
+// function is suitable for use with a vector, set, hash_set, or any other STL
+// container which defines sensible begin(), end(), and clear() methods.  If
+// container is NULL, this function is a no-op.
+template <typename T>
+void STLDeleteElements(T *container) {
+  if (!container) return;
+  auto it = container->begin();
+  while (it != container->end()) {
+    auto temp = it;
+    ++it;
+    delete *temp;
+  }
+  container->clear();
+}
+
+}  // namespace utils
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_STL_UTIL_H_

diff --git a/lang_id/common/utf8.cc b/lang_id/common/utf8.cc
new file mode 100644
index 0000000..ef00145
--- /dev/null
+++ b/lang_id/common/utf8.cc

@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/common/utf8.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace utils {
+
+const char *GetSafeEndOfUtf8String(const char *data, size_t size) {
+  const char *const hard_end = data + size;
+  const char *curr = data;
+  while (curr < hard_end && *curr) {
+    int num_bytes = utils::OneCharLen(curr);
+    const char *new_curr = curr + num_bytes;
+    if (new_curr > hard_end) {
+      return curr;
+    }
+    curr = new_curr;
+  }
+  return curr;
+}
+
+}  // namespace utils
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/common/utf8.h b/lang_id/common/utf8.h
new file mode 100644
index 0000000..2365429
--- /dev/null
+++ b/lang_id/common/utf8.h

@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TC3_STD_STRING_IMPORT
+#define TC3_STD_STRING_IMPORT
+#include <string>
+
+namespace libtextclassifier3 {
+using string = std::string;
+template <class CharT, class Traits = std::char_traits<CharT>,
+          class Allocator = std::allocator<CharT> >
+using basic_string = std::basic_string<CharT, Traits, Allocator>;
+}  // namespace libtextclassifier3
+#endif
+#ifndef NLP_SAFT_COMPONENTS_COMMON_MOBILE_UTF8_H_
+#define NLP_SAFT_COMPONENTS_COMMON_MOBILE_UTF8_H_
+
+#include <stddef.h>
+
+#include <string>
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace utils {
+
+// Returns the length (number of bytes) of the UTF8 code point starting at src,
+// by reading only the byte from address src.
+//
+// The result is a number from the set {1, 2, 3, 4}.
+static inline int OneCharLen(const char *src) {
+  // On most platforms, char is unsigned by default, but iOS is an exception.
+  // The cast below makes sure we always interpret *src as an unsigned char.
+  return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
+      [(*(reinterpret_cast<const unsigned char *>(src)) & 0xFF) >> 4];
+}
+
+// Returns a pointer "end" inside [data, data + size) such that the prefix from
+// [data, end) is the largest one that does not contain '\0' and offers the
+// following guarantee: if one starts with
+//
+//   curr = text.data()
+//
+// and keeps executing
+//
+//   curr += OneCharLen(curr)
+//
+// one would eventually reach curr == end (the pointer returned by this
+// function) without accessing data outside the string.  This guards against
+// scenarios like a broken UTF8 string which has only e.g., the first 2 bytes
+// from a 3-byte UTF8 sequence.
+//
+// Preconditions: data != nullptr.
+const char *GetSafeEndOfUtf8String(const char *data, size_t size);
+
+static inline const char *GetSafeEndOfUtf8String(const string &text) {
+  return GetSafeEndOfUtf8String(text.data(), text.size());
+}
+
+}  // namespace utils
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_COMMON_MOBILE_UTF8_H_

diff --git a/lang_id/custom-tokenizer.cc b/lang_id/custom-tokenizer.cc
new file mode 100644
index 0000000..5a6b997
--- /dev/null
+++ b/lang_id/custom-tokenizer.cc

@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/custom-tokenizer.h"
+
+#include <ctype.h>
+
+#include <string>
+
+#include "lang_id/common/lite_base/attributes.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/utf8.h"
+#include "utf.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+namespace {
+inline bool IsTokenSeparator(int num_bytes, const char *curr) {
+  if (num_bytes != 1) {
+    return false;
+  }
+  return !isalpha(*curr);
+}
+
+// Appends to *word the UTF8 encoding for the lowercase version of the UTF8
+// character that starts at |curr| and has |num_bytes| bytes.
+//
+// NOTE: if the current UTF8 character does not have a lowercase version, then
+// we append the original UTF8 character.
+inline SAFTM_ATTRIBUTE_ALWAYS_INLINE void AppendLowerCase(const char *curr,
+                                                          int num_bytes,
+                                                          string *word) {
+  if (num_bytes == 1) {
+    // Optimize the ASCII case.
+    word->push_back(tolower(*curr));
+    return;
+  }
+
+  // Harder, general case.
+  //
+  // NOTE: for lowercasing, we use the utils from utf.h:
+  // charntorune + tolowerrune + runetochar.  Unfortunately, that library does
+  // not contain any fast util for determining the number of bytes for the UTF8
+  // character that starts at a given address *without* converting to a full
+  // codepoint (like our utils::OneCharLen, which is used intensively by the
+  // rest of our code, including by the performance-critical char ngram
+  // feature).  Hence, the rest of our code continues to use utils::OneCharLen,
+  // and here, when we append the bytes to *word, we make sure that's consistent
+  // with utils::OneCharLen.
+
+  // charntorune() below reads the UTF8 character that starts at curr (using at
+  // most num_bytes bytes) and stores the corresponding codepoint into rune.
+  Rune rune;
+  charntorune(&rune, curr, num_bytes);
+  if (rune != Runeerror) {
+    Rune lower = tolowerrune(rune);
+    char lower_buf[UTFmax];
+    runetochar(lower_buf, &lower);
+
+    // When appending the UTF8 bytes to word, we do not use the number of bytes
+    // returned by runetochar(); instead, we use utils::OneCharLen(), the same
+    // method used by the char ngram feature.  We expect them to be equal, but
+    // just in case.
+    int lower_num_bytes = utils::OneCharLen(lower_buf);
+
+    // Using lower_num_bytes below is safe, because, by definition of UTFmax,
+    SAFTM_DCHECK_GE(UTFmax, 4);
+
+    // And, by implementation of utils::OneCharLen():
+    SAFTM_DCHECK_GT(lower_num_bytes, 0);
+    SAFTM_DCHECK_LE(lower_num_bytes, 4);
+    word->append(lower_buf, lower_num_bytes);
+  } else {
+    // There are sequences of bytes that charntorune() can't convert into a
+    // valid Rune (a special case is [0xEF, 0xBF, 0xBD], the UTF8 encoding for
+    // the U+FFFD special Unicode character, which is also the value of
+    // Runeerror).  We keep those bytes unchanged.
+    word->append(curr, num_bytes);
+  }
+}
+}  // namespace
+
+void TokenizerForLangId::Setup(TaskContext *context) {
+  lowercase_input_ = context->Get("lang_id_lowercase_input", false);
+}
+
+void TokenizerForLangId::Tokenize(StringPiece text,
+                                  LightSentence *sentence) const {
+  const char *const start = text.data();
+  const char *curr = start;
+  const char *end = utils::GetSafeEndOfUtf8String(start, text.size());
+
+  // Corner case: the safe part of the text is empty ("").
+  if (curr >= end) {
+    return;
+  }
+
+  // Number of bytes for UTF8 character starting at *curr.  Note: the loop below
+  // is guaranteed to terminate because in each iteration, we move curr by at
+  // least num_bytes, and num_bytes is guaranteed to be > 0.
+  int num_bytes = utils::OneCharLen(curr);
+  while (curr < end) {
+    // Jump over consecutive token separators.
+    while (IsTokenSeparator(num_bytes, curr)) {
+      curr += num_bytes;
+      if (curr >= end) {
+        return;
+      }
+      num_bytes = utils::OneCharLen(curr);
+    }
+
+    // If control reaches this point, we are at beginning of a non-empty token.
+    sentence->emplace_back();
+    string *word = &(sentence->back());
+
+    // Add special token-start character.
+    word->push_back('^');
+
+    // Add UTF8 characters to word, until we hit the end of the safe text or a
+    // token separator.
+    while (true) {
+      if (lowercase_input_) {
+        AppendLowerCase(curr, num_bytes, word);
+      } else {
+        word->append(curr, num_bytes);
+      }
+      curr += num_bytes;
+      if (curr >= end) {
+        break;
+      }
+      num_bytes = utils::OneCharLen(curr);
+      if (IsTokenSeparator(num_bytes, curr)) {
+        curr += num_bytes;
+        num_bytes = utils::OneCharLen(curr);
+        break;
+      }
+    }
+    word->push_back('$');
+  }
+}
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/custom-tokenizer.h b/lang_id/custom-tokenizer.h
new file mode 100644
index 0000000..6fab796
--- /dev/null
+++ b/lang_id/custom-tokenizer.h

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_CUSTOM_TOKENIZER_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_CUSTOM_TOKENIZER_H_
+
+#include <string>
+
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+#include "lang_id/light-sentence.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Custom tokenizer for the LangId model.
+class TokenizerForLangId {
+ public:
+  void Setup(TaskContext *context);
+
+  // Tokenizes |text|, placing the tokens into |sentence|.  Customized for
+  // LangId.  Currently (Sep 15, 2016) we tokenize on space, newline, tab, and
+  // any other 1-byte UTF8 character which is not a letter, ignore all empty
+  // tokens, and (for each of the remaining tokens) prepend "^" (special token
+  // begin marker) and append "$" (special token end marker).
+  //
+  // Tokens are stored into the "repeated Token token;" field of *sentence.
+  void Tokenize(StringPiece text, LightSentence *sentence) const;
+
+ private:
+  // If true, during tokenization, we use the lowercase version of each Unicode
+  // character from the text to tokenize.  E.g., if this is true, the text "Foo
+  // bar" is tokenized as ["foo", "bar"]; otherwise, we get ["Foo", "bar"].
+  bool lowercase_input_ = false;
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_CUSTOM_TOKENIZER_H_

diff --git a/lang_id/fb_model/lang-id-from-fb.cc b/lang_id/fb_model/lang-id-from-fb.cc
new file mode 100644
index 0000000..f8e39d7
--- /dev/null
+++ b/lang_id/fb_model/lang-id-from-fb.cc

@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/fb_model/lang-id-from-fb.h"
+
+#include "lang_id/fb_model/model-provider-from-fb.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+std::unique_ptr<LangId> GetLangIdFromFlatbufferFile(const string &filename) {
+  std::unique_ptr<ModelProvider> model_provider(
+      new ModelProviderFromFlatbuffer(filename));
+
+  // NOTE: we avoid absl (including absl::make_unique), due to b/113350902
+  return std::unique_ptr<LangId>(  // NOLINT
+      new LangId(std::move(model_provider)));
+}
+
+std::unique_ptr<LangId> GetLangIdFromFlatbufferFileDescriptor(int fd) {
+  std::unique_ptr<ModelProvider> model_provider(
+      new ModelProviderFromFlatbuffer(fd));
+
+  // NOTE: we avoid absl (including absl::make_unique), due to b/113350902
+  return std::unique_ptr<LangId>(  // NOLINT
+      new LangId(std::move(model_provider)));
+}
+
+std::unique_ptr<LangId> GetLangIdFromFlatbufferBytes(const char *data,
+                                                     size_t num_bytes) {
+  std::unique_ptr<ModelProvider> model_provider(
+      new ModelProviderFromFlatbuffer(data, num_bytes));
+
+  // NOTE: we avoid absl (including absl::make_unique), due to b/113350902
+  return std::unique_ptr<LangId>(  // NOLINT
+      new LangId(std::move(model_provider)));
+}
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/fb_model/lang-id-from-fb.h b/lang_id/fb_model/lang-id-from-fb.h
new file mode 100644
index 0000000..51bcffe
--- /dev/null
+++ b/lang_id/fb_model/lang-id-from-fb.h

@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_LANG_ID_FROM_FB_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_LANG_ID_FROM_FB_H_
+
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+
+#include "lang_id/lang-id.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Returns a LangId built using the SAFT model in flatbuffer format from
+// |filename|.
+std::unique_ptr<LangId> GetLangIdFromFlatbufferFile(const string &filename);
+
+// Returns a LangId built using the SAFT model in flatbuffer format from
+// given file descriptor.
+std::unique_ptr<LangId> GetLangIdFromFlatbufferFileDescriptor(int fd);
+
+// Returns a LangId built using the SAFT model in flatbuffer format from
+// the |num_bytes| bytes that start at address |data|.
+//
+// IMPORTANT: the model bytes must be alive during the lifetime of the returned
+// LangId.  To avoid overhead (e.g., heap allocation), this method does not make
+// a private copy of the model bytes.  Avoiding overhead is the main reason we
+// use flatbuffers.
+std::unique_ptr<LangId> GetLangIdFromFlatbufferBytes(const char *data,
+                                                     size_t num_bytes);
+
+// Convenience string-based version of GetLangIdFromFlatbufferBytes.
+//
+// IMPORTANT: |bytes| must be alive during the lifetime of the returned LangId.
+inline std::unique_ptr<LangId> GetLangIdFromFlatbufferBytes(
+    const string &bytes) {
+  return GetLangIdFromFlatbufferBytes(bytes.data(), bytes.size());
+}
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_LANG_ID_FROM_FB_H_

diff --git a/lang_id/fb_model/model-provider-from-fb.cc b/lang_id/fb_model/model-provider-from-fb.cc
new file mode 100644
index 0000000..3357963
--- /dev/null
+++ b/lang_id/fb_model/model-provider-from-fb.cc

@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/fb_model/model-provider-from-fb.h"
+
+#include "lang_id/common/file/file-utils.h"
+#include "lang_id/common/flatbuffers/embedding-network-params-from-flatbuffer.h"
+#include "lang_id/common/flatbuffers/model-utils.h"
+#include "lang_id/common/lite_strings/str-split.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+ModelProviderFromFlatbuffer::ModelProviderFromFlatbuffer(const string &filename)
+
+    // Using mmap as a fast way to read the model bytes.  As the file is
+    // unmapped only when the field scoped_mmap_ is destructed, the model bytes
+    // stay alive for the entire lifetime of this object.
+    : scoped_mmap_(new ScopedMmap(filename)) {
+  Initialize(scoped_mmap_->handle().to_stringpiece());
+}
+
+ModelProviderFromFlatbuffer::ModelProviderFromFlatbuffer(int fd)
+
+    // Using mmap as a fast way to read the model bytes.  As the file is
+    // unmapped only when the field scoped_mmap_ is destructed, the model bytes
+    // stay alive for the entire lifetime of this object.
+    : scoped_mmap_(new ScopedMmap(fd)) {
+  Initialize(scoped_mmap_->handle().to_stringpiece());
+}
+
+void ModelProviderFromFlatbuffer::Initialize(StringPiece model_bytes) {
+  // Note: valid_ was initialized to false.  In the code below, we set valid_ to
+  // true only if all initialization steps completed successfully.  Otherwise,
+  // we return early, leaving valid_ to its default value false.
+  model_ = saft_fbs::GetVerifiedModelFromBytes(model_bytes);
+  if (model_ == nullptr) {
+    SAFTM_LOG(ERROR) << "Unable to initialize ModelProviderFromFlatbuffer";
+    return;
+  }
+
+  // Initialize context_ parameters.
+  if (!saft_fbs::FillParameters(*model_, &context_)) {
+    // FillParameters already performs error logging.
+    return;
+  }
+
+  // Init languages_.
+  const string known_languages_str = context_.Get("supported_languages", "");
+  for (StringPiece sp : LiteStrSplit(known_languages_str, ',')) {
+    languages_.emplace_back(sp);
+  }
+  if (languages_.empty()) {
+    SAFTM_LOG(ERROR) << "Unable to find list of supported_languages";
+    return;
+  }
+
+  // Init nn_params_.
+  if (!InitNetworkParams()) {
+    // InitNetworkParams already performs error logging.
+    return;
+  }
+
+  // Everything looks fine.
+  valid_ = true;
+}
+
+bool ModelProviderFromFlatbuffer::InitNetworkParams() {
+  const string kInputName = "language-identifier-network";
+  StringPiece bytes =
+      saft_fbs::GetInputBytes(saft_fbs::GetInputByName(model_, kInputName));
+  if ((bytes.data() == nullptr) || bytes.empty()) {
+    SAFTM_LOG(ERROR) << "Unable to get bytes for model input " << kInputName;
+    return false;
+  }
+  std::unique_ptr<EmbeddingNetworkParamsFromFlatbuffer> nn_params_from_fb(
+      new EmbeddingNetworkParamsFromFlatbuffer(bytes));
+  if (!nn_params_from_fb->is_valid()) {
+    SAFTM_LOG(ERROR) << "EmbeddingNetworkParamsFromFlatbuffer not valid";
+    return false;
+  }
+  nn_params_ = std::move(nn_params_from_fb);
+  return true;
+}
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/fb_model/model-provider-from-fb.h b/lang_id/fb_model/model-provider-from-fb.h
new file mode 100644
index 0000000..d25c903
--- /dev/null
+++ b/lang_id/fb_model/model-provider-from-fb.h

@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_MODEL_PROVIDER_FROM_FB_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_MODEL_PROVIDER_FROM_FB_H_
+
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/file/mmap.h"
+#include "lang_id/common/flatbuffers/model_generated.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+#include "lang_id/model-provider.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// ModelProvider for LangId, based on a SAFT model in flatbuffer format.
+class ModelProviderFromFlatbuffer : public ModelProvider {
+ public:
+  // Constructs a model provider based on a flatbuffer-format SAFT model from
+  // |filename|.
+  explicit ModelProviderFromFlatbuffer(const string &filename);
+
+  // Constructs a model provider based on a flatbuffer-format SAFT model from
+  // file descriptor |fd|.
+  explicit ModelProviderFromFlatbuffer(int fd);
+
+  // Constructs a model provider from a flatbuffer-format SAFT model the bytes
+  // of which are already in RAM (size bytes starting from address data).
+  // Useful if you "transport" these bytes otherwise than via a normal file
+  // (e.g., if you embed them somehow in your binary).
+  //
+  // IMPORTANT: |data| should be alive during the lifetime of the
+  // newly-constructed ModelProviderFromFlatbuffer.  This is trivial to ensure
+  // for data that's statically embedded in your binary, but more complex in
+  // other cases.  To avoid overhead (e.g., heap allocation), this method does
+  // not make a private copy of the data.  In general, the ownership of the
+  // newly-constructed ModelProviderFromFlatbuffer is immediately passed to a
+  // LangId object (which doesn't pass it further); hence, one needs to make
+  // sure |data| is alive during the lifetime of that LangId object.
+  ModelProviderFromFlatbuffer(const char *data, std::size_t size) {
+    StringPiece model_bytes(data, size);
+    Initialize(model_bytes);
+  }
+
+  ~ModelProviderFromFlatbuffer() override = default;
+
+  const TaskContext *GetTaskContext() const override {
+    return &context_;
+  }
+
+  const EmbeddingNetworkParams *GetNnParams() const override {
+    return nn_params_.get();
+  }
+
+  std::vector<string> GetLanguages() const override {
+    return languages_;
+  }
+
+ private:
+  // Initializes the fields of this class based on the flatbuffer from
+  // |model_bytes|.  These bytes are supposed to be the representation of a
+  // Model flatbuffer and should be alive during the lifetime of this object.
+  void Initialize(StringPiece model_bytes);
+
+  // Initializes nn_params_ based on model_.
+  bool InitNetworkParams();
+
+  // If filename-based constructor is used, scoped_mmap_ keeps the file mmapped
+  // during the lifetime of this object, such that references inside the Model
+  // flatbuffer from those bytes remain valid.
+  const std::unique_ptr<ScopedMmap> scoped_mmap_;
+
+  // Pointer to the flatbuffer from
+  //
+  // (a) [if filename constructor was used:] the bytes mmapped by scoped_mmap_
+  // (for safety considerations, see comment for that field), or
+  //
+  // (b) [of (data, size) constructor was used:] the bytes from [data,
+  // data+size).  Please read carefully the doc for that constructor.
+  const saft_fbs::Model *model_;
+
+  // Context returned by this model provider.  We set its parameters based on
+  // model_, at construction time.
+  TaskContext context_;
+
+  // List of supported languages, see GetLanguages().  We expect this list to be
+  // specified by the ModelParameter named "supported_languages" from model_.
+  std::vector<string> languages_;
+
+  // EmbeddingNetworkParams, see GetNnParams().  Set based on the ModelInput
+  // named "language-identifier-network" from model_.
+  std::unique_ptr<EmbeddingNetworkParams> nn_params_;
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FB_MODEL_MODEL_PROVIDER_FROM_FB_H_

diff --git a/lang_id/features/char-ngram-feature.cc b/lang_id/features/char-ngram-feature.cc
new file mode 100644
index 0000000..e52b2f2
--- /dev/null
+++ b/lang_id/features/char-ngram-feature.cc

@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/features/char-ngram-feature.h"
+
+#include <utility>
+#include <vector>
+
+#include "lang_id/common/fel/feature-types.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/math/hash.h"
+#include "lang_id/common/utf8.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+bool ContinuousBagOfNgramsFunction::Setup(TaskContext *context) {
+  // Parameters in the feature function descriptor.
+  bool include_terminators = GetBoolParameter("include_terminators", false);
+  if (!include_terminators) {
+    SAFTM_LOG(ERROR) << "No support for include_terminators=true";
+    return false;
+  }
+
+  bool include_spaces = GetBoolParameter("include_spaces", false);
+  if (include_spaces) {
+    SAFTM_LOG(ERROR) << "No support for include_spaces=true";
+    return false;
+  }
+
+  bool use_equal_ngram_weight = GetBoolParameter("use_equal_weight", false);
+  if (use_equal_ngram_weight) {
+    SAFTM_LOG(ERROR) << "No support for use_equal_weight=true";
+    return false;
+  }
+
+  ngram_id_dimension_ = GetIntParameter("id_dim", 10000);
+  ngram_size_ = GetIntParameter("size", 3);
+
+  counts_.assign(ngram_id_dimension_, 0);
+  return true;
+}
+
+bool ContinuousBagOfNgramsFunction::Init(TaskContext *context) {
+  set_feature_type(new NumericFeatureType(name(), ngram_id_dimension_));
+  return true;
+}
+
+int ContinuousBagOfNgramsFunction::ComputeNgramCounts(
+    const LightSentence &sentence) const {
+  SAFTM_CHECK_EQ(counts_.size(), ngram_id_dimension_);
+  SAFTM_CHECK_EQ(non_zero_count_indices_.size(), 0);
+
+  int total_count = 0;
+
+  for (const string &word : sentence) {
+    const char *const word_end = word.data() + word.size();
+
+    // Set ngram_start at the start of the current token (word).
+    const char *ngram_start = word.data();
+
+    // Set ngram_end ngram_size UTF8 characters after ngram_start.  Note: each
+    // UTF8 character contains between 1 and 4 bytes.
+    const char *ngram_end = ngram_start;
+    int num_utf8_chars = 0;
+    do {
+      ngram_end += utils::OneCharLen(ngram_end);
+      num_utf8_chars++;
+    } while ((num_utf8_chars < ngram_size_) && (ngram_end < word_end));
+
+    if (num_utf8_chars < ngram_size_) {
+      // Current token is so small, it does not contain a single ngram of
+      // ngram_size UTF8 characters.  Not much we can do in this case ...
+      continue;
+    }
+
+    // At this point, [ngram_start, ngram_end) is the first ngram of ngram_size
+    // UTF8 characters from current token.
+    while (true) {
+      // Compute ngram id: hash(ngram) % ngram_id_dimension
+      int ngram_id = (
+          utils::Hash32WithDefaultSeed(ngram_start, ngram_end - ngram_start)
+          % ngram_id_dimension_);
+
+      // Use a reference to the actual count, such that we can both test whether
+      // the count was 0 and increment it without perfoming two lookups.
+      int &ref_to_count_for_ngram = counts_[ngram_id];
+      if (ref_to_count_for_ngram == 0) {
+        non_zero_count_indices_.push_back(ngram_id);
+      }
+      ref_to_count_for_ngram++;
+      total_count++;
+      if (ngram_end >= word_end) {
+        break;
+      }
+
+      // Advance both ngram_start and ngram_end by one UTF8 character.  This
+      // way, the number of UTF8 characters between them remains constant
+      // (ngram_size).
+      ngram_start += utils::OneCharLen(ngram_start);
+      ngram_end += utils::OneCharLen(ngram_end);
+    }
+  }  // end of loop over tokens.
+
+  return total_count;
+}
+
+void ContinuousBagOfNgramsFunction::Evaluate(const WorkspaceSet &workspaces,
+                                             const LightSentence &sentence,
+                                             FeatureVector *result) const {
+  // Find the char ngram counts.
+  int total_count = ComputeNgramCounts(sentence);
+
+  // Populate the feature vector.
+  const float norm = static_cast<float>(total_count);
+
+  // TODO(salcianu): explore treating dense vectors (i.e., many non-zero
+  // elements) separately.
+  for (int ngram_id : non_zero_count_indices_) {
+    const float weight = counts_[ngram_id] / norm;
+    FloatFeatureValue value(ngram_id, weight);
+    result->add(feature_type(), value.discrete_value);
+
+    // Clear up counts_, for the next invocation of Evaluate().
+    counts_[ngram_id] = 0;
+  }
+
+  // Clear up non_zero_count_indices_, for the next invocation of Evaluate().
+  non_zero_count_indices_.clear();
+}
+
+SAFTM_STATIC_REGISTRATION(ContinuousBagOfNgramsFunction);
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/features/char-ngram-feature.h b/lang_id/features/char-ngram-feature.h
new file mode 100644
index 0000000..8280bca
--- /dev/null
+++ b/lang_id/features/char-ngram-feature.h

@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_CHAR_NGRAM_FEATURE_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_CHAR_NGRAM_FEATURE_H_
+
+#include <string>
+
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/features/light-sentence-features.h"
+#include "lang_id/light-sentence.h"
+
+// TODO(abakalov): Add a test.
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Class for computing continuous char ngram features.
+//
+// Feature function descriptor parameters:
+//   include_terminators(bool, false):
+//     If 'true', then splits the text based on spaces to get tokens, adds "^"
+//     to the beginning of each token, and adds "$" to the end of each token.
+//     NOTE: currently, we support only include_terminators=true.
+//   include_spaces(bool, false):
+//     If 'true', then includes char ngrams containing spaces.
+//     NOTE: currently, we support only include_spaces=false.
+//   use_equal_weight(bool, false):
+//     If 'true', then weighs each unique ngram by 1.0 / (number of unique
+//     ngrams in the input). Otherwise, weighs each unique ngram by (ngram
+//     count) / (total number of ngrams).
+//     NOTE: currently, we support only use_equal_weight=false.
+//   id_dim(int, 10000):
+//     The integer id of each char ngram is computed as follows:
+//     Hash32WithDefault(char ngram) % id_dim.
+//   size(int, 3):
+//     Only ngrams of this size will be extracted.
+//
+// NOTE: this class is not thread-safe.  TODO(salcianu): make it thread-safe.
+class ContinuousBagOfNgramsFunction : public LightSentenceFeature {
+ public:
+  bool Setup(TaskContext *context) override;
+  bool Init(TaskContext *context) override;
+
+  // Appends the features computed from the sentence to the feature vector.
+  void Evaluate(const WorkspaceSet &workspaces, const LightSentence &sentence,
+                FeatureVector *result) const override;
+
+  SAFTM_DEFINE_REGISTRATION_METHOD("continuous-bag-of-ngrams",
+                                   ContinuousBagOfNgramsFunction);
+
+ private:
+  // Auxiliary for Evaluate().  Fills counts_ and non_zero_count_indices_ (see
+  // below), and returns the total ngram count.
+  int ComputeNgramCounts(const LightSentence &sentence) const;
+
+  // counts_[i] is the count of all ngrams with id i.  Work data for Evaluate().
+  // NOTE: we declare this vector as a field, such that its underlying capacity
+  // stays allocated in between calls to Evaluate().
+  mutable std::vector<int> counts_;
+
+  // Indices of non-zero elements of counts_.  See comments for counts_.
+  mutable std::vector<int> non_zero_count_indices_;
+
+  // The integer id of each char ngram is computed as follows:
+  // Hash32WithDefaultSeed(char_ngram) % ngram_id_dimension_.
+  int ngram_id_dimension_;
+
+  // Only ngrams of size ngram_size_ will be extracted.
+  int ngram_size_;
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_CHAR_NGRAM_FEATURE_H_

diff --git a/lang_id/features/light-sentence-features.cc b/lang_id/features/light-sentence-features.cc
new file mode 100644
index 0000000..7f1d878
--- /dev/null
+++ b/lang_id/features/light-sentence-features.cc

@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/features/light-sentence-features.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Registry for the features on whole light sentences.
+SAFTM_DEFINE_CLASS_REGISTRY_NAME("light sentence feature function",
+                                 lang_id::LightSentenceFeature);
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/features/light-sentence-features.h b/lang_id/features/light-sentence-features.h
new file mode 100644
index 0000000..cc85878
--- /dev/null
+++ b/lang_id/features/light-sentence-features.h

@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_LIGHT_SENTENCE_FEATURES_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_LIGHT_SENTENCE_FEATURES_H_
+
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/registry.h"
+#include "lang_id/light-sentence.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Feature function that extracts features from LightSentences.
+typedef FeatureFunction<LightSentence> LightSentenceFeature;
+
+// Feature extractor for LightSentences.
+typedef FeatureExtractor<LightSentence> LightSentenceExtractor;
+
+}  // namespace lang_id
+
+SAFTM_DECLARE_CLASS_REGISTRY_NAME(lang_id::LightSentenceFeature);
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_LIGHT_SENTENCE_FEATURES_H_

diff --git a/lang_id/features/relevant-script-feature.cc b/lang_id/features/relevant-script-feature.cc
new file mode 100644
index 0000000..0fde87b
--- /dev/null
+++ b/lang_id/features/relevant-script-feature.cc

@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/features/relevant-script-feature.h"
+
+#include <string>
+
+#include "lang_id/common/fel/feature-types.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/utf8.h"
+#include "lang_id/script/script-detector.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+bool RelevantScriptFeature::Setup(TaskContext *context) {
+  string script_detector_name = GetParameter(
+      "script_detector_name", /* default_value = */ "tiny-script-detector");
+
+  // We don't use absl::WrapUnique, nor the rest of absl, see http://b/71873194
+  script_detector_.reset(ScriptDetector::Create(script_detector_name));
+  if (script_detector_ == nullptr) {
+    // This means ScriptDetector::Create() could not find the requested
+    // script_detector_name.  In that case, Create() already logged an error
+    // message.
+    return false;
+  }
+
+  // We use default value 172 because this is the number of scripts supported by
+  // the first model we trained with this feature.  See http://b/70617713.
+  // Newer models may support more scripts.
+  num_supported_scripts_ = GetIntParameter("num_supported_scripts", 172);
+  return true;
+}
+
+bool RelevantScriptFeature::Init(TaskContext *context) {
+  set_feature_type(new NumericFeatureType(name(), num_supported_scripts_));
+  return true;
+}
+
+void RelevantScriptFeature::Evaluate(
+    const WorkspaceSet &workspaces, const LightSentence &sentence,
+    FeatureVector *result) const {
+  // counts[s] is the number of characters with script s.
+  std::vector<int> counts(num_supported_scripts_);
+  int total_count = 0;
+  for (const string &word : sentence) {
+    const char *const word_end = word.data() + word.size();
+    const char *curr = word.data();
+
+    // Skip over token start '^'.
+    SAFTM_DCHECK_EQ(*curr, '^');
+    curr += utils::OneCharLen(curr);
+    while (true) {
+      const int num_bytes = utils::OneCharLen(curr);
+
+      int script = script_detector_->GetScript(curr, num_bytes);
+
+      // We do this update and the if (...) break below *before* incrementing
+      // counts[script] in order to skip the token end '$'.
+      curr += num_bytes;
+      if (curr >= word_end) {
+        SAFTM_DCHECK_EQ(*(curr - num_bytes), '$');
+        break;
+      }
+      SAFTM_DCHECK_GE(script, 0);
+
+      if (script < num_supported_scripts_) {
+        counts[script]++;
+        total_count++;
+      } else {
+        // Unsupported script: this usually indicates a script that is
+        // recognized by newer versions of the code, after the model was
+        // trained.  E.g., new code running with old model.
+      }
+    }
+  }
+
+  for (int script_id = 0; script_id < num_supported_scripts_; ++script_id) {
+    int count = counts[script_id];
+    if (count > 0) {
+      const float weight = static_cast<float>(count) / total_count;
+      FloatFeatureValue value(script_id, weight);
+      result->add(feature_type(), value.discrete_value);
+    }
+  }
+}
+
+SAFTM_STATIC_REGISTRATION(RelevantScriptFeature);
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/features/relevant-script-feature.h b/lang_id/features/relevant-script-feature.h
new file mode 100644
index 0000000..57c5a1f
--- /dev/null
+++ b/lang_id/features/relevant-script-feature.h

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_RELEVANT_SCRIPT_FEATURE_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_RELEVANT_SCRIPT_FEATURE_H_
+
+#include <memory>
+
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/fel/task-context.h"
+#include "lang_id/common/fel/workspace.h"
+#include "lang_id/features/light-sentence-features.h"
+#include "lang_id/light-sentence.h"
+#include "lang_id/script/script-detector.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Given a sentence, generates one FloatFeatureValue for each "relevant" Unicode
+// script (see below): each such feature indicates the script and the ratio of
+// UTF8 characters in that script, in the given sentence.
+//
+// What is a relevant script?  Recognizing all 100+ Unicode scripts would
+// require too much code size and runtime.  Instead, we focus only on a few
+// scripts that communicate a lot of language information: e.g., the use of
+// Hiragana characters almost always indicates Japanese, so Hiragana is a
+// "relevant" script for us.  The Latin script is used by dozens of language, so
+// Latin is not relevant in this context.
+class RelevantScriptFeature : public LightSentenceFeature {
+ public:
+  bool Setup(TaskContext *context) override;
+  bool Init(TaskContext *context) override;
+
+  // Appends the features computed from the sentence to the feature vector.
+  void Evaluate(const WorkspaceSet &workspaces,
+                const LightSentence &sentence,
+                FeatureVector *result) const override;
+
+  SAFTM_DEFINE_REGISTRATION_METHOD("continuous-bag-of-relevant-scripts",
+                                   RelevantScriptFeature);
+
+ private:
+  // Detects script of individual UTF8 characters.
+  std::unique_ptr<ScriptDetector> script_detector_;
+
+  // Current model supports scripts in [0, num_supported_scripts_).
+  int num_supported_scripts_ = 0;
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_FEATURES_RELEVANT_SCRIPT_FEATURE_H_

diff --git a/lang_id/lang-id.cc b/lang_id/lang-id.cc
new file mode 100644
index 0000000..ebc88ec
--- /dev/null
+++ b/lang_id/lang-id.cc

@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/lang-id.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "lang_id/common/embedding-feature-interface.h"
+#include "lang_id/common/embedding-network-params.h"
+#include "lang_id/common/embedding-network.h"
+#include "lang_id/common/fel/feature-extractor.h"
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_strings/numbers.h"
+#include "lang_id/common/lite_strings/str-split.h"
+#include "lang_id/common/lite_strings/stringpiece.h"
+#include "lang_id/common/math/algorithm.h"
+#include "lang_id/common/math/softmax.h"
+#include "lang_id/custom-tokenizer.h"
+#include "lang_id/features/light-sentence-features.h"
+#include "lang_id/light-sentence.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+namespace {
+// Default value for the confidence threshold.  If the confidence of the top
+// prediction is below this threshold, then FindLanguage() returns
+// LangId::kUnknownLanguageCode.  Note: this is just a default value; if the
+// TaskSpec from the model specifies a "reliability_thresh" parameter, then we
+// use that value instead.  Note: for legacy reasons, our code and comments use
+// the terms "confidence", "probability" and "reliability" equivalently.
+static const float kDefaultConfidenceThreshold = 0.50f;
+}  // namespace
+
+// Class that performs all work behind LangId.
+class LangIdImpl {
+ public:
+  explicit LangIdImpl(std::unique_ptr<ModelProvider> model_provider)
+      : model_provider_(std::move(model_provider)),
+        lang_id_brain_interface_("language_identifier") {
+    // Note: in the code below, we set valid_ to true only if all initialization
+    // steps completed successfully.  Otherwise, we return early, leaving valid_
+    // to its default value false.
+    if (!model_provider_ || !model_provider_->is_valid()) {
+      SAFTM_LOG(ERROR) << "Invalid model provider";
+      return;
+    }
+
+    auto *nn_params = model_provider_->GetNnParams();
+    if (!nn_params) {
+      SAFTM_LOG(ERROR) << "No NN params";
+      return;
+    }
+    network_.reset(new EmbeddingNetwork(nn_params));
+
+    languages_ = model_provider_->GetLanguages();
+    if (languages_.empty()) {
+      SAFTM_LOG(ERROR) << "No known languages";
+      return;
+    }
+
+    TaskContext context = *model_provider_->GetTaskContext();
+    if (!Setup(&context)) {
+      SAFTM_LOG(ERROR) << "Unable to Setup() LangId";
+      return;
+    }
+    if (!Init(&context)) {
+      SAFTM_LOG(ERROR) << "Unable to Init() LangId";
+      return;
+    }
+    valid_ = true;
+  }
+
+  string FindLanguage(StringPiece text) const {
+    // NOTE: it would be wasteful to implement this method in terms of
+    // FindLanguages().  We just need the most likely language and its
+    // probability; no need to compute (and allocate) a vector of pairs for all
+    // languages, nor to compute probabilities for all non-top languages.
+    if (!is_valid()) {
+      return LangId::kUnknownLanguageCode;
+    }
+
+    std::vector<float> scores;
+    ComputeScores(text, &scores);
+
+    int prediction_id = GetArgMax(scores);
+    const string language = GetLanguageForSoftmaxLabel(prediction_id);
+    float probability = ComputeSoftmaxProbability(scores, prediction_id);
+    SAFTM_DLOG(INFO) << "Predicted " << language
+                     << " with prob: " << probability << " for \"" << text
+                     << "\"";
+
+    // Find confidence threshold for language.
+    float threshold = default_threshold_;
+    auto it = per_lang_thresholds_.find(language);
+    if (it != per_lang_thresholds_.end()) {
+      threshold = it->second;
+    }
+    if (probability < threshold) {
+      SAFTM_DLOG(INFO) << "  below threshold => "
+                       << LangId::kUnknownLanguageCode;
+      return LangId::kUnknownLanguageCode;
+    }
+    return language;
+  }
+
+  void FindLanguages(StringPiece text, LangIdResult *result) const {
+    if (result == nullptr) return;
+
+    result->predictions.clear();
+    if (!is_valid()) {
+      result->predictions.emplace_back(LangId::kUnknownLanguageCode, 1);
+      return;
+    }
+
+    std::vector<float> scores;
+    ComputeScores(text, &scores);
+
+    // Compute and sort softmax in descending order by probability and convert
+    // IDs to language code strings.  When probabilities are equal, we sort by
+    // language code string in ascending order.
+    std::vector<float> softmax = ComputeSoftmax(scores);
+
+    for (int i = 0; i < softmax.size(); ++i) {
+      result->predictions.emplace_back(GetLanguageForSoftmaxLabel(i),
+                                       softmax[i]);
+    }
+
+    // Sort the resulting language predictions by probability in descending
+    // order.
+    std::sort(result->predictions.begin(), result->predictions.end(),
+              [](const std::pair<string, float> &a,
+                 const std::pair<string, float> &b) {
+      if (a.second == b.second) {
+        return a.first.compare(b.first) < 0;
+      } else {
+        return a.second > b.second;
+      }
+    });
+  }
+
+  bool is_valid() const { return valid_; }
+
+  int GetModelVersion() const { return model_version_; }
+
+ private:
+  bool Setup(TaskContext *context) {
+    tokenizer_.Setup(context);
+    if (!lang_id_brain_interface_.SetupForProcessing(context)) return false;
+    default_threshold_ = context->Get(
+        "reliability_thresh", kDefaultConfidenceThreshold);
+
+    // Parse task parameter "per_lang_reliability_thresholds", fill
+    // per_lang_thresholds_.
+    const string thresholds_str =
+        context->Get("per_lang_reliability_thresholds", "");
+    std::vector<StringPiece> tokens = LiteStrSplit(thresholds_str, ',');
+    for (const auto &token : tokens) {
+      if (token.empty()) continue;
+      std::vector<StringPiece> parts = LiteStrSplit(token, '=');
+      float threshold = 0.0f;
+      if ((parts.size() == 2) && LiteAtof(parts[1], &threshold)) {
+        per_lang_thresholds_[string(parts[0])] = threshold;
+      } else {
+        SAFTM_LOG(ERROR) << "Broken token: \"" << token << "\"";
+      }
+    }
+    model_version_ = context->Get("model_version", model_version_);
+    return true;
+  }
+
+  bool Init(TaskContext *context) {
+    return lang_id_brain_interface_.InitForProcessing(context);
+  }
+
+  // Extracts features for |text|, runs them through the feed-forward neural
+  // network, and computes the output scores (activations from the last layer).
+  // These scores can be used to compute the softmax probabilities for our
+  // labels (in this case, the languages).
+  void ComputeScores(StringPiece text, std::vector<float> *scores) const {
+    // Create a Sentence storing the input text.
+    LightSentence sentence;
+    tokenizer_.Tokenize(text, &sentence);
+
+    std::vector<FeatureVector> features =
+        lang_id_brain_interface_.GetFeaturesNoCaching(&sentence);
+
+    // Run feed-forward neural network to compute scores.
+    network_->ComputeFinalScores(features, scores);
+  }
+
+  // Returns language code for a softmax label.  See comments for languages_
+  // field.  If label is out of range, returns LangId::kUnknownLanguageCode.
+  string GetLanguageForSoftmaxLabel(int label) const {
+    if ((label >= 0) && (label < languages_.size())) {
+      return languages_[label];
+    } else {
+      SAFTM_LOG(ERROR) << "Softmax label " << label << " outside range [0, "
+                       << languages_.size() << ")";
+      return LangId::kUnknownLanguageCode;
+    }
+  }
+
+  std::unique_ptr<ModelProvider> model_provider_;
+
+  TokenizerForLangId tokenizer_;
+
+  EmbeddingFeatureInterface<LightSentenceExtractor, LightSentence>
+      lang_id_brain_interface_;
+
+  // Neural network to use for scoring.
+  std::unique_ptr<EmbeddingNetwork> network_;
+
+  // True if this object is ready to perform language predictions.
+  bool valid_ = false;
+
+  // Only predictions with a probability (confidence) above this threshold are
+  // reported.  Otherwise, we report LangId::kUnknownLanguageCode.
+  float default_threshold_ = kDefaultConfidenceThreshold;
+
+  std::unordered_map<string, float> per_lang_thresholds_;
+
+  // Recognized languages: softmax label i means languages_[i] (something like
+  // "en", "fr", "ru", etc).
+  std::vector<string> languages_;
+
+  // Version of the model used by this LangIdImpl object.  Zero means that the
+  // model version could not be determined.
+  int model_version_ = 0;
+};
+
+const char LangId::kUnknownLanguageCode[] = "und";
+
+LangId::LangId(std::unique_ptr<ModelProvider> model_provider)
+    : pimpl_(new LangIdImpl(std::move(model_provider))) {
+}
+
+LangId::~LangId() = default;
+
+string LangId::FindLanguage(const char *data, size_t num_bytes) const {
+  StringPiece text(data, num_bytes);
+  return pimpl_->FindLanguage(text);
+}
+
+void LangId::FindLanguages(const char *data, size_t num_bytes,
+                           LangIdResult *result) const {
+  SAFTM_DCHECK(result) << "LangIdResult must not be null.";
+  StringPiece text(data, num_bytes);
+  pimpl_->FindLanguages(text, result);
+}
+
+bool LangId::is_valid() const {
+  return pimpl_->is_valid();
+}
+
+int LangId::GetModelVersion() const { return pimpl_->GetModelVersion(); }
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/lang-id.h b/lang_id/lang-id.h
new file mode 100644
index 0000000..3f656f2
--- /dev/null
+++ b/lang_id/lang-id.h

@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LANG_ID_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LANG_ID_H_
+
+
+#include <stddef.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "lang_id/common/lite_base/macros.h"
+#include "lang_id/model-provider.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Forward-declaration of the class that performs all underlying work.
+class LangIdImpl;
+
+struct LangIdResult {
+  // An n-best list of possible language codes for a given input sorted in
+  // descending order according to each code's respective probability.
+  //
+  // This list is guaranteed to be non-empty after calling
+  // LangId::FindLanguages.  The most likely language code is always the first
+  // item in this array.
+  //
+  // If the model cannot make a prediction, this array contains a single result:
+  // a language code LangId::kUnknownLanguageCode with probability 1.
+  std::vector<std::pair<string, float>> predictions;
+};
+
+// Class for detecting the language of a document.
+//
+// Note: this class does not handle the details of loading the actual model.
+// Those details have been "outsourced" to the ModelProvider class.
+//
+// Note: this class is thread-unsafe.
+class LangId {
+ public:
+  // Standard BCP-47 language code for Unknown/Undetermined language.
+  static const char kUnknownLanguageCode[];
+
+  // Constructs a LangId object, based on |model_provider|.
+  //
+  // Note: we don't crash if we detect a problem at construction time (e.g., the
+  // model provider can't read an underlying file).  Instead, we mark the
+  // newly-constructed object as invalid; clients can invoke FindLanguage() on
+  // an invalid object: nothing crashes, but accuracy will be bad.
+  explicit LangId(std::unique_ptr<ModelProvider> model_provider);
+
+  virtual ~LangId();
+
+  // Computes the an n-best list of language codes and probabilities
+  // corresponding to the most likely languages the given input text is written
+  // in. The list is sorted in descending order by language probability.
+  //
+  // The input text consists of the |num_bytes| bytes that starts at |data|.
+  //
+  // Note: If this LangId object is not valid (see is_valid()) or if this LangId
+  // object can't make a prediction, this method sets the LangIdResult to
+  // contain a single entry with kUnknownLanguageCode with probability 1.
+  void FindLanguages(const char *data, size_t num_bytes,
+                     LangIdResult *result) const;
+
+  // Convenience version of FindLanguages(const char *, size_t, LangIdResult *).
+  void FindLanguages(const string &text, LangIdResult *result) const {
+    FindLanguages(text.data(), text.size(), result);
+  }
+
+  // Returns language code for the most likely language for a piece of text.
+  //
+  // The input text consists of the |num_bytes| bytes that start at |data|.
+  //
+  // Note: this method reports the most likely (1-best) language only if its
+  // probability is high enough; otherwise, it returns
+  // LangId::kUnknownLanguageCode.  The specific probability threshold is tuned
+  // to the needs of an early client.  If you need a different threshold, you
+  // can use FindLanguages (plural) to get the full LangIdResult, and apply your
+  // own threshold.
+  //
+  // Note: if this LangId object is not valid (see is_valid()) or if this LangId
+  // object can't make a prediction, then this method returns
+  // LangId::kUnknownLanguageCode.
+  //
+  string FindLanguage(const char *data, size_t num_bytes) const;
+
+  // Convenience version of FindLanguage(const char *, size_t).
+  string FindLanguage(const string &text) const {
+    return FindLanguage(text.data(), text.size());
+  }
+
+  // Returns true if this object has been correctly initialized and is ready to
+  // perform predictions.  For more info, see doc for LangId
+  // constructor above.
+  bool is_valid() const;
+
+  // Returns the version of the model used by this LangId object.  On success,
+  // the returned version number is a strictly positive integer.  Returns 0 if
+  // the model version can not be determined (e.g., for old models that do not
+  // specify a version number).
+  int GetModelVersion() const;
+
+ private:
+  // Pimpl ("pointer to implementation") pattern, to hide all internals from our
+  // clients.
+  std::unique_ptr<LangIdImpl> pimpl_;
+
+  SAFTM_DISALLOW_COPY_AND_ASSIGN(LangId);
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LANG_ID_H_

diff --git a/lang_id/lang-id_jni.cc b/lang_id/lang-id_jni.cc
new file mode 100644
index 0000000..7026417
--- /dev/null
+++ b/lang_id/lang-id_jni.cc

@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/lang-id_jni.h"
+
+#include <jni.h>
+#include <type_traits>
+#include <vector>
+
+#include "utils/base/logging.h"
+#include "utils/java/scoped_local_ref.h"
+#include "lang_id/fb_model/lang-id-from-fb.h"
+#include "lang_id/lang-id.h"
+
+using libtextclassifier3::ScopedLocalRef;
+using libtextclassifier3::ToStlString;
+using libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFile;
+using libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFileDescriptor;
+using libtextclassifier3::mobile::lang_id::LangId;
+using libtextclassifier3::mobile::lang_id::LangIdResult;
+
+namespace {
+jobjectArray LangIdResultToJObjectArray(JNIEnv* env,
+                                        const LangIdResult& lang_id_result) {
+  const ScopedLocalRef<jclass> result_class(
+      env->FindClass(TC3_PACKAGE_PATH TC3_LANG_ID_CLASS_NAME_STR
+                     "$LanguageResult"),
+      env);
+  if (!result_class) {
+    TC3_LOG(ERROR) << "Couldn't find LanguageResult class.";
+    return nullptr;
+  }
+
+  // clang-format off
+  const std::vector<std::pair<std::string, float>>& predictions =
+      lang_id_result.predictions;
+  // clang-format on
+  const jmethodID result_class_constructor =
+      env->GetMethodID(result_class.get(), "<init>", "(Ljava/lang/String;F)V");
+  const jobjectArray results =
+      env->NewObjectArray(predictions.size(), result_class.get(), nullptr);
+  for (int i = 0; i < predictions.size(); i++) {
+    ScopedLocalRef<jobject> result(
+        env->NewObject(result_class.get(), result_class_constructor,
+                       env->NewStringUTF(predictions[i].first.c_str()),
+                       static_cast<jfloat>(predictions[i].second)));
+    env->SetObjectArrayElement(results, i, result.get());
+  }
+  return results;
+}
+}  // namespace
+
+TC3_JNI_METHOD(jlong, TC3_LANG_ID_CLASS_NAME, nativeNew)
+(JNIEnv* env, jobject thiz, jint fd) {
+  std::unique_ptr<LangId> lang_id = GetLangIdFromFlatbufferFileDescriptor(fd);
+  if (!lang_id->is_valid()) {
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  return reinterpret_cast<jlong>(lang_id.release());
+}
+
+TC3_JNI_METHOD(jlong, TC3_LANG_ID_CLASS_NAME, nativeNewFromPath)
+(JNIEnv* env, jobject thiz, jstring path) {
+  const std::string path_str = ToStlString(env, path);
+  std::unique_ptr<LangId> lang_id = GetLangIdFromFlatbufferFile(path_str);
+  if (!lang_id->is_valid()) {
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  return reinterpret_cast<jlong>(lang_id.release());
+}
+
+TC3_JNI_METHOD(jobjectArray, TC3_LANG_ID_CLASS_NAME, nativeDetectLanguages)
+(JNIEnv* env, jobject clazz, jlong ptr, jstring text) {
+  LangId* model = reinterpret_cast<LangId*>(ptr);
+  if (!model) {
+    return nullptr;
+  }
+
+  const std::string text_str = ToStlString(env, text);
+  LangIdResult result;
+  model->FindLanguages(text_str, &result);
+
+  return LangIdResultToJObjectArray(env, result);
+}
+
+TC3_JNI_METHOD(void, TC3_LANG_ID_CLASS_NAME, nativeClose)
+(JNIEnv* env, jobject clazz, jlong ptr) {
+  if (!ptr) {
+    TC3_LOG(ERROR) << "Trying to close null LangId.";
+    return;
+  }
+  LangId* model = reinterpret_cast<LangId*>(ptr);
+  delete model;
+}
+
+TC3_JNI_METHOD(jint, TC3_LANG_ID_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jlong ptr) {
+  if (!ptr) {
+    return -1;
+  }
+  LangId* model = reinterpret_cast<LangId*>(ptr);
+  return model->GetModelVersion();
+}
+
+TC3_JNI_METHOD(jint, TC3_LANG_ID_CLASS_NAME, nativeGetVersionFromFd)
+(JNIEnv* env, jobject clazz, jint fd) {
+  std::unique_ptr<LangId> lang_id = GetLangIdFromFlatbufferFileDescriptor(fd);
+  if (!lang_id->is_valid()) {
+    return -1;
+  }
+  return lang_id->GetModelVersion();
+}

diff --git a/lang_id/lang-id_jni.h b/lang_id/lang-id_jni.h
new file mode 100644
index 0000000..74a7e2d
--- /dev/null
+++ b/lang_id/lang-id_jni.h

@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// JNI wrapper for LangId.
+
+#ifndef LIBTEXTCLASSIFIER_LANG_ID_LANG_ID_JNI_H_
+#define LIBTEXTCLASSIFIER_LANG_ID_LANG_ID_JNI_H_
+
+#include <jni.h>
+#include <string>
+#include "utils/java/jni-base.h"
+
+#ifndef TC3_LANG_ID_CLASS_NAME
+#define TC3_LANG_ID_CLASS_NAME LangIdModel
+#endif
+
+#define TC3_LANG_ID_CLASS_NAME_STR TC3_ADD_QUOTES(TC3_LANG_ID_CLASS_NAME)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+TC3_JNI_METHOD(jlong, TC3_LANG_ID_CLASS_NAME, nativeNew)
+(JNIEnv* env, jobject clazz, jint fd);
+
+TC3_JNI_METHOD(jlong, TC3_LANG_ID_CLASS_NAME, nativeNewFromPath)
+(JNIEnv* env, jobject clazz, jstring path);
+
+TC3_JNI_METHOD(jobjectArray, TC3_LANG_ID_CLASS_NAME, nativeDetectLanguages)
+(JNIEnv* env, jobject clazz, jlong ptr, jstring text);
+
+TC3_JNI_METHOD(void, TC3_LANG_ID_CLASS_NAME, nativeClose)
+(JNIEnv* env, jobject clazz, jlong ptr);
+
+TC3_JNI_METHOD(jint, TC3_LANG_ID_CLASS_NAME, nativeGetVersion)
+(JNIEnv* env, jobject clazz, jlong ptr);
+
+TC3_JNI_METHOD(jint, TC3_LANG_ID_CLASS_NAME, nativeGetVersionFromFd)
+(JNIEnv* env, jobject clazz, jint fd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // LIBTEXTCLASSIFIER_LANG_ID_LANG_ID_JNI_H_

diff --git a/lang_id/light-sentence.h b/lang_id/light-sentence.h
new file mode 100644
index 0000000..2937549
--- /dev/null
+++ b/lang_id/light-sentence.h

@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LIGHT_SENTENCE_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LIGHT_SENTENCE_H_
+
+#include <string>
+#include <vector>
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Very simplified alternative to heavy sentence.proto, for the purpose of
+// LangId.  It turns out that in this case, all we need is a vector of strings,
+// which uses a lot less code size than a Sentence proto.
+using LightSentence = std::vector<string>;
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_LIGHT_SENTENCE_H_

diff --git a/lang_id/model-provider.h b/lang_id/model-provider.h
new file mode 100644
index 0000000..a076871
--- /dev/null
+++ b/lang_id/model-provider.h

@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_MODEL_PROVIDER_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_MODEL_PROVIDER_H_
+
+#include <string>
+#include <vector>
+
+#include "lang_id/common/embedding-network-params.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Interface for accessing parameters for the LangId model.
+//
+// Note: some clients prefer to include the model parameters in the binary,
+// others prefer loading them from a separate file.  This file provides a common
+// interface for these alternative mechanisms.
+class ModelProvider {
+ public:
+  virtual ~ModelProvider() = default;
+
+  // Returns true if this ModelProvider has been succesfully constructed (e.g.,
+  // can return false if an underlying model file could not be read).  Clients
+  // should not use invalid ModelProviders.
+  bool is_valid() { return valid_; }
+
+  // Returns the TaskContext with parameters for the LangId model.  E.g., one
+  // important parameter specifies the features to use.
+  virtual const TaskContext *GetTaskContext() const = 0;
+
+  // Returns parameters for the underlying Neurosis feed-forward neural network.
+  virtual const EmbeddingNetworkParams *GetNnParams() const = 0;
+
+  // Returns list of languages recognized by the model.  Each element of the
+  // returned vector should be a BCP-47 language code (e.g., "en", "ro", etc).
+  // Language at index i from the returned vector corresponds to softmax label
+  // i.
+  virtual std::vector<string> GetLanguages() const = 0;
+
+ protected:
+  bool valid_ = false;
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_MODEL_PROVIDER_H_

diff --git a/lang_id/script/approx-script-data.cc b/lang_id/script/approx-script-data.cc
new file mode 100755
index 0000000..1ac5cb6
--- /dev/null
+++ b/lang_id/script/approx-script-data.cc

@@ -0,0 +1,1122 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Internal data for approx-script.cc; see approx-script-data.h
+//
+// DO NOT EDIT BY HAND
+//
+// Generated by
+// lang_id/script/update-script-data.sh
+
+#include "lang_id/script/approx-script-data.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace approx_script_internal {
+
+const int kNumRanges = 359;
+
+const uint32 kRangeFirst[] = {
+  65,  // Range #0: [65, 90, Latin]
+  97,  // Range #1: [97, 122, Latin]
+  170,  // Range #2: [170, 170, Latin]
+  186,  // Range #3: [186, 186, Latin]
+  192,  // Range #4: [192, 214, Latin]
+  216,  // Range #5: [216, 246, Latin]
+  248,  // Range #6: [248, 696, Latin]
+  736,  // Range #7: [736, 740, Latin]
+  746,  // Range #8: [746, 747, Bopomofo]
+  880,  // Range #9: [880, 883, Greek]
+  885,  // Range #10: [885, 893, Greek]
+  895,  // Range #11: [895, 900, Greek]
+  902,  // Range #12: [902, 902, Greek]
+  904,  // Range #13: [904, 993, Greek]
+  994,  // Range #14: [994, 1007, Coptic]
+  1008,  // Range #15: [1008, 1023, Greek]
+  1024,  // Range #16: [1024, 1156, Cyrillic]
+  1159,  // Range #17: [1159, 1327, Cyrillic]
+  1329,  // Range #18: [1329, 1416, Armenian]
+  1418,  // Range #19: [1418, 1423, Armenian]
+  1425,  // Range #20: [1425, 1479, Hebrew]
+  1488,  // Range #21: [1488, 1524, Hebrew]
+  1536,  // Range #22: [1536, 1540, Arabic]
+  1542,  // Range #23: [1542, 1547, Arabic]
+  1549,  // Range #24: [1549, 1562, Arabic]
+  1564,  // Range #25: [1564, 1566, Arabic]
+  1568,  // Range #26: [1568, 1599, Arabic]
+  1601,  // Range #27: [1601, 1610, Arabic]
+  1622,  // Range #28: [1622, 1647, Arabic]
+  1649,  // Range #29: [1649, 1756, Arabic]
+  1758,  // Range #30: [1758, 1791, Arabic]
+  1792,  // Range #31: [1792, 1871, Syriac]
+  1872,  // Range #32: [1872, 1919, Arabic]
+  1920,  // Range #33: [1920, 1969, Thaana]
+  1984,  // Range #34: [1984, 2047, Nko]
+  2048,  // Range #35: [2048, 2110, Samaritan]
+  2112,  // Range #36: [2112, 2142, Mandaic]
+  2144,  // Range #37: [2144, 2154, Syriac]
+  2208,  // Range #38: [2208, 2237, Arabic]
+  2259,  // Range #39: [2259, 2273, Arabic]
+  2275,  // Range #40: [2275, 2303, Arabic]
+  2304,  // Range #41: [2304, 2384, Devanagari]
+  2387,  // Range #42: [2387, 2403, Devanagari]
+  2406,  // Range #43: [2406, 2431, Devanagari]
+  2432,  // Range #44: [2432, 2510, Bengali]
+  2519,  // Range #45: [2519, 2558, Bengali]
+  2561,  // Range #46: [2561, 2641, Gurmukhi]
+  2649,  // Range #47: [2649, 2654, Gurmukhi]
+  2662,  // Range #48: [2662, 2678, Gurmukhi]
+  2689,  // Range #49: [2689, 2768, Gujarati]
+  2784,  // Range #50: [2784, 2801, Gujarati]
+  2809,  // Range #51: [2809, 2815, Gujarati]
+  2817,  // Range #52: [2817, 2893, Oriya]
+  2902,  // Range #53: [2902, 2935, Oriya]
+  2946,  // Range #54: [2946, 3024, Tamil]
+  3031,  // Range #55: [3031, 3031, Tamil]
+  3046,  // Range #56: [3046, 3066, Tamil]
+  3072,  // Range #57: [3072, 3149, Telugu]
+  3157,  // Range #58: [3157, 3162, Telugu]
+  3168,  // Range #59: [3168, 3183, Telugu]
+  3192,  // Range #60: [3192, 3199, Telugu]
+  3200,  // Range #61: [3200, 3277, Kannada]
+  3285,  // Range #62: [3285, 3286, Kannada]
+  3294,  // Range #63: [3294, 3314, Kannada]
+  3328,  // Range #64: [3328, 3455, Malayalam]
+  3458,  // Range #65: [3458, 3551, Sinhala]
+  3558,  // Range #66: [3558, 3572, Sinhala]
+  3585,  // Range #67: [3585, 3642, Thai]
+  3648,  // Range #68: [3648, 3675, Thai]
+  3713,  // Range #69: [3713, 3725, Lao]
+  3732,  // Range #70: [3732, 3807, Lao]
+  3840,  // Range #71: [3840, 4052, Tibetan]
+  4057,  // Range #72: [4057, 4058, Tibetan]
+  4096,  // Range #73: [4096, 4255, Myanmar]
+  4256,  // Range #74: [4256, 4295, Georgian]
+  4301,  // Range #75: [4301, 4346, Georgian]
+  4348,  // Range #76: [4348, 4351, Georgian]
+  4352,  // Range #77: [4352, 4607, Hangul]
+  4608,  // Range #78: [4608, 5017, Ethiopic]
+  5024,  // Range #79: [5024, 5117, Cherokee]
+  5120,  // Range #80: [5120, 5759, Canadian_Aboriginal]
+  5760,  // Range #81: [5760, 5788, Ogham]
+  5792,  // Range #82: [5792, 5866, Runic]
+  5870,  // Range #83: [5870, 5880, Runic]
+  5888,  // Range #84: [5888, 5908, Tagalog]
+  5920,  // Range #85: [5920, 5940, Hanunoo]
+  5952,  // Range #86: [5952, 5971, Buhid]
+  5984,  // Range #87: [5984, 6003, Tagbanwa]
+  6016,  // Range #88: [6016, 6121, Khmer]
+  6128,  // Range #89: [6128, 6137, Khmer]
+  6144,  // Range #90: [6144, 6145, Mongolian]
+  6148,  // Range #91: [6148, 6148, Mongolian]
+  6150,  // Range #92: [6150, 6169, Mongolian]
+  6176,  // Range #93: [6176, 6264, Mongolian]
+  6272,  // Range #94: [6272, 6314, Mongolian]
+  6320,  // Range #95: [6320, 6389, Canadian_Aboriginal]
+  6400,  // Range #96: [6400, 6479, Limbu]
+  6480,  // Range #97: [6480, 6516, Tai_Le]
+  6528,  // Range #98: [6528, 6601, New_Tai_Lue]
+  6608,  // Range #99: [6608, 6623, New_Tai_Lue]
+  6624,  // Range #100: [6624, 6655, Khmer]
+  6656,  // Range #101: [6656, 6687, Buginese]
+  6688,  // Range #102: [6688, 6793, Tai_Tham]
+  6800,  // Range #103: [6800, 6809, Tai_Tham]
+  6816,  // Range #104: [6816, 6829, Tai_Tham]
+  6912,  // Range #105: [6912, 7036, Balinese]
+  7040,  // Range #106: [7040, 7103, Sundanese]
+  7104,  // Range #107: [7104, 7155, Batak]
+  7164,  // Range #108: [7164, 7167, Batak]
+  7168,  // Range #109: [7168, 7247, Lepcha]
+  7248,  // Range #110: [7248, 7295, Ol_Chiki]
+  7296,  // Range #111: [7296, 7304, Cyrillic]
+  7312,  // Range #112: [7312, 7359, Georgian]
+  7360,  // Range #113: [7360, 7367, Sundanese]
+  7424,  // Range #114: [7424, 7461, Latin]
+  7462,  // Range #115: [7462, 7466, Greek]
+  7467,  // Range #116: [7467, 7467, Cyrillic]
+  7468,  // Range #117: [7468, 7516, Latin]
+  7517,  // Range #118: [7517, 7521, Greek]
+  7522,  // Range #119: [7522, 7525, Latin]
+  7526,  // Range #120: [7526, 7530, Greek]
+  7531,  // Range #121: [7531, 7543, Latin]
+  7544,  // Range #122: [7544, 7544, Cyrillic]
+  7545,  // Range #123: [7545, 7614, Latin]
+  7615,  // Range #124: [7615, 7615, Greek]
+  7680,  // Range #125: [7680, 7935, Latin]
+  7936,  // Range #126: [7936, 8190, Greek]
+  8305,  // Range #127: [8305, 8305, Latin]
+  8319,  // Range #128: [8319, 8319, Latin]
+  8336,  // Range #129: [8336, 8348, Latin]
+  8486,  // Range #130: [8486, 8486, Greek]
+  8490,  // Range #131: [8490, 8491, Latin]
+  8498,  // Range #132: [8498, 8498, Latin]
+  8526,  // Range #133: [8526, 8526, Latin]
+  8544,  // Range #134: [8544, 8584, Latin]
+  10240,  // Range #135: [10240, 10495, Braille]
+  11264,  // Range #136: [11264, 11358, Glagolitic]
+  11360,  // Range #137: [11360, 11391, Latin]
+  11392,  // Range #138: [11392, 11507, Coptic]
+  11513,  // Range #139: [11513, 11519, Coptic]
+  11520,  // Range #140: [11520, 11559, Georgian]
+  11565,  // Range #141: [11565, 11565, Georgian]
+  11568,  // Range #142: [11568, 11623, Tifinagh]
+  11631,  // Range #143: [11631, 11632, Tifinagh]
+  11647,  // Range #144: [11647, 11647, Tifinagh]
+  11648,  // Range #145: [11648, 11670, Ethiopic]
+  11680,  // Range #146: [11680, 11742, Ethiopic]
+  11744,  // Range #147: [11744, 11775, Cyrillic]
+  11904,  // Range #148: [11904, 12019, Han]
+  12032,  // Range #149: [12032, 12245, Han]
+  12293,  // Range #150: [12293, 12293, Han]
+  12295,  // Range #151: [12295, 12295, Han]
+  12321,  // Range #152: [12321, 12329, Han]
+  12334,  // Range #153: [12334, 12335, Hangul]
+  12344,  // Range #154: [12344, 12347, Han]
+  12353,  // Range #155: [12353, 12438, Hiragana]
+  12445,  // Range #156: [12445, 12447, Hiragana]
+  12449,  // Range #157: [12449, 12538, Katakana]
+  12541,  // Range #158: [12541, 12543, Katakana]
+  12549,  // Range #159: [12549, 12591, Bopomofo]
+  12593,  // Range #160: [12593, 12686, Hangul]
+  12704,  // Range #161: [12704, 12730, Bopomofo]
+  12784,  // Range #162: [12784, 12799, Katakana]
+  12800,  // Range #163: [12800, 12830, Hangul]
+  12896,  // Range #164: [12896, 12926, Hangul]
+  13008,  // Range #165: [13008, 13143, Katakana]
+  13312,  // Range #166: [13312, 19893, Han]
+  19968,  // Range #167: [19968, 40943, Han]
+  40960,  // Range #168: [40960, 42182, Yi]
+  42192,  // Range #169: [42192, 42239, Lisu]
+  42240,  // Range #170: [42240, 42539, Vai]
+  42560,  // Range #171: [42560, 42655, Cyrillic]
+  42656,  // Range #172: [42656, 42743, Bamum]
+  42786,  // Range #173: [42786, 42887, Latin]
+  42891,  // Range #174: [42891, 42937, Latin]
+  42999,  // Range #175: [42999, 43007, Latin]
+  43008,  // Range #176: [43008, 43051, Syloti_Nagri]
+  43072,  // Range #177: [43072, 43127, Phags_Pa]
+  43136,  // Range #178: [43136, 43205, Saurashtra]
+  43214,  // Range #179: [43214, 43225, Saurashtra]
+  43232,  // Range #180: [43232, 43263, Devanagari]
+  43264,  // Range #181: [43264, 43309, Kayah_Li]
+  43311,  // Range #182: [43311, 43311, Kayah_Li]
+  43312,  // Range #183: [43312, 43347, Rejang]
+  43359,  // Range #184: [43359, 43359, Rejang]
+  43360,  // Range #185: [43360, 43388, Hangul]
+  43392,  // Range #186: [43392, 43469, Javanese]
+  43472,  // Range #187: [43472, 43487, Javanese]
+  43488,  // Range #188: [43488, 43518, Myanmar]
+  43520,  // Range #189: [43520, 43574, Cham]
+  43584,  // Range #190: [43584, 43615, Cham]
+  43616,  // Range #191: [43616, 43647, Myanmar]
+  43648,  // Range #192: [43648, 43714, Tai_Viet]
+  43739,  // Range #193: [43739, 43743, Tai_Viet]
+  43744,  // Range #194: [43744, 43766, Meetei_Mayek]
+  43777,  // Range #195: [43777, 43798, Ethiopic]
+  43808,  // Range #196: [43808, 43822, Ethiopic]
+  43824,  // Range #197: [43824, 43866, Latin]
+  43868,  // Range #198: [43868, 43876, Latin]
+  43877,  // Range #199: [43877, 43877, Greek]
+  43888,  // Range #200: [43888, 43967, Cherokee]
+  43968,  // Range #201: [43968, 44025, Meetei_Mayek]
+  44032,  // Range #202: [44032, 55203, Hangul]
+  55216,  // Range #203: [55216, 55291, Hangul]
+  63744,  // Range #204: [63744, 64217, Han]
+  64256,  // Range #205: [64256, 64262, Latin]
+  64275,  // Range #206: [64275, 64279, Armenian]
+  64285,  // Range #207: [64285, 64335, Hebrew]
+  64336,  // Range #208: [64336, 64449, Arabic]
+  64467,  // Range #209: [64467, 64829, Arabic]
+  64848,  // Range #210: [64848, 64967, Arabic]
+  65008,  // Range #211: [65008, 65021, Arabic]
+  65070,  // Range #212: [65070, 65071, Cyrillic]
+  65136,  // Range #213: [65136, 65276, Arabic]
+  65313,  // Range #214: [65313, 65338, Latin]
+  65345,  // Range #215: [65345, 65370, Latin]
+  65382,  // Range #216: [65382, 65391, Katakana]
+  65393,  // Range #217: [65393, 65437, Katakana]
+  65440,  // Range #218: [65440, 65500, Hangul]
+  65536,  // Range #219: [65536, 65629, Linear_B]
+  65664,  // Range #220: [65664, 65786, Linear_B]
+  65856,  // Range #221: [65856, 65934, Greek]
+  65952,  // Range #222: [65952, 65952, Greek]
+  66176,  // Range #223: [66176, 66204, Lycian]
+  66208,  // Range #224: [66208, 66256, Carian]
+  66304,  // Range #225: [66304, 66339, Old_Italic]
+  66349,  // Range #226: [66349, 66351, Old_Italic]
+  66352,  // Range #227: [66352, 66378, Gothic]
+  66384,  // Range #228: [66384, 66426, Old_Permic]
+  66432,  // Range #229: [66432, 66463, Ugaritic]
+  66464,  // Range #230: [66464, 66517, Old_Persian]
+  66560,  // Range #231: [66560, 66639, Deseret]
+  66640,  // Range #232: [66640, 66687, Shavian]
+  66688,  // Range #233: [66688, 66729, Osmanya]
+  66736,  // Range #234: [66736, 66811, Osage]
+  66816,  // Range #235: [66816, 66855, Elbasan]
+  66864,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  66927,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  67072,  // Range #238: [67072, 67382, Linear_A]
+  67392,  // Range #239: [67392, 67413, Linear_A]
+  67424,  // Range #240: [67424, 67431, Linear_A]
+  67584,  // Range #241: [67584, 67647, Cypriot]
+  67648,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  67680,  // Range #243: [67680, 67711, Palmyrene]
+  67712,  // Range #244: [67712, 67742, Nabataean]
+  67751,  // Range #245: [67751, 67759, Nabataean]
+  67808,  // Range #246: [67808, 67829, Hatran]
+  67835,  // Range #247: [67835, 67839, Hatran]
+  67840,  // Range #248: [67840, 67871, Phoenician]
+  67872,  // Range #249: [67872, 67897, Lydian]
+  67903,  // Range #250: [67903, 67903, Lydian]
+  67968,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  68000,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  68096,  // Range #253: [68096, 68102, Kharoshthi]
+  68108,  // Range #254: [68108, 68168, Kharoshthi]
+  68176,  // Range #255: [68176, 68184, Kharoshthi]
+  68192,  // Range #256: [68192, 68223, Old_South_Arabian]
+  68224,  // Range #257: [68224, 68255, Old_North_Arabian]
+  68288,  // Range #258: [68288, 68342, Manichaean]
+  68352,  // Range #259: [68352, 68415, Avestan]
+  68416,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  68448,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  68472,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  68480,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  68505,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  68521,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  68608,  // Range #266: [68608, 68680, Old_Turkic]
+  68736,  // Range #267: [68736, 68786, Old_Hungarian]
+  68800,  // Range #268: [68800, 68850, Old_Hungarian]
+  68858,  // Range #269: [68858, 68863, Old_Hungarian]
+  68864,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  68912,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  69216,  // Range #272: [69216, 69246, Arabic]
+  69376,  // Range #273: [69376, 69415, Old_Sogdian]
+  69424,  // Range #274: [69424, 69465, Sogdian]
+  69632,  // Range #275: [69632, 69743, Brahmi]
+  69759,  // Range #276: [69759, 69759, Brahmi]
+  69760,  // Range #277: [69760, 69825, Kaithi]
+  69837,  // Range #278: [69837, 69837, Kaithi]
+  69840,  // Range #279: [69840, 69864, Sora_Sompeng]
+  69872,  // Range #280: [69872, 69881, Sora_Sompeng]
+  69888,  // Range #281: [69888, 69958, Chakma]
+  69968,  // Range #282: [69968, 70006, Mahajani]
+  70016,  // Range #283: [70016, 70111, Sharada]
+  70113,  // Range #284: [70113, 70132, Sinhala]
+  70144,  // Range #285: [70144, 70206, Khojki]
+  70272,  // Range #286: [70272, 70313, Multani]
+  70320,  // Range #287: [70320, 70378, Khudawadi]
+  70384,  // Range #288: [70384, 70393, Khudawadi]
+  70400,  // Range #289: [70400, 70457, Grantha]
+  70460,  // Range #290: [70460, 70480, Grantha]
+  70487,  // Range #291: [70487, 70487, Grantha]
+  70493,  // Range #292: [70493, 70516, Grantha]
+  70656,  // Range #293: [70656, 70750, Newa]
+  70784,  // Range #294: [70784, 70855, Tirhuta]
+  70864,  // Range #295: [70864, 70873, Tirhuta]
+  71040,  // Range #296: [71040, 71133, Siddham]
+  71168,  // Range #297: [71168, 71236, Modi]
+  71248,  // Range #298: [71248, 71257, Modi]
+  71264,  // Range #299: [71264, 71276, Mongolian]
+  71296,  // Range #300: [71296, 71351, Takri]
+  71360,  // Range #301: [71360, 71369, Takri]
+  71424,  // Range #302: [71424, 71487, Ahom]
+  71680,  // Range #303: [71680, 71739, Dogra]
+  71840,  // Range #304: [71840, 71922, Warang_Citi]
+  71935,  // Range #305: [71935, 71935, Warang_Citi]
+  72192,  // Range #306: [72192, 72263, Zanabazar_Square]
+  72272,  // Range #307: [72272, 72354, Soyombo]
+  72384,  // Range #308: [72384, 72440, Pau_Cin_Hau]
+  72704,  // Range #309: [72704, 72773, Bhaiksuki]
+  72784,  // Range #310: [72784, 72812, Bhaiksuki]
+  72816,  // Range #311: [72816, 72886, Marchen]
+  72960,  // Range #312: [72960, 73031, Masaram_Gondi]
+  73040,  // Range #313: [73040, 73049, Masaram_Gondi]
+  73056,  // Range #314: [73056, 73112, Gunjala_Gondi]
+  73120,  // Range #315: [73120, 73129, Gunjala_Gondi]
+  73440,  // Range #316: [73440, 73464, Makasar]
+  73728,  // Range #317: [73728, 74649, Cuneiform]
+  74752,  // Range #318: [74752, 74868, Cuneiform]
+  74880,  // Range #319: [74880, 75075, Cuneiform]
+  77824,  // Range #320: [77824, 78894, Egyptian_Hieroglyphs]
+  82944,  // Range #321: [82944, 83526, Anatolian_Hieroglyphs]
+  92160,  // Range #322: [92160, 92728, Bamum]
+  92736,  // Range #323: [92736, 92783, Mro]
+  92880,  // Range #324: [92880, 92917, Bassa_Vah]
+  92928,  // Range #325: [92928, 92997, Pahawh_Hmong]
+  93008,  // Range #326: [93008, 93047, Pahawh_Hmong]
+  93053,  // Range #327: [93053, 93071, Pahawh_Hmong]
+  93760,  // Range #328: [93760, 93850, Medefaidrin]
+  93952,  // Range #329: [93952, 94020, Miao]
+  94032,  // Range #330: [94032, 94078, Miao]
+  94095,  // Range #331: [94095, 94111, Miao]
+  94176,  // Range #332: [94176, 94176, Tangut]
+  94177,  // Range #333: [94177, 94177, Nushu]
+  94208,  // Range #334: [94208, 100337, Tangut]
+  100352,  // Range #335: [100352, 101106, Tangut]
+  110592,  // Range #336: [110592, 110592, Katakana]
+  110593,  // Range #337: [110593, 110878, Hiragana]
+  110960,  // Range #338: [110960, 111355, Nushu]
+  113664,  // Range #339: [113664, 113770, Duployan]
+  113776,  // Range #340: [113776, 113800, Duployan]
+  113808,  // Range #341: [113808, 113823, Duployan]
+  119296,  // Range #342: [119296, 119365, Greek]
+  120832,  // Range #343: [120832, 121483, SignWriting]
+  121499,  // Range #344: [121499, 121519, SignWriting]
+  122880,  // Range #345: [122880, 122922, Glagolitic]
+  124928,  // Range #346: [124928, 125142, Mende_Kikakui]
+  125184,  // Range #347: [125184, 125258, Adlam]
+  125264,  // Range #348: [125264, 125279, Adlam]
+  126464,  // Range #349: [126464, 126523, Arabic]
+  126530,  // Range #350: [126530, 126619, Arabic]
+  126625,  // Range #351: [126625, 126651, Arabic]
+  126704,  // Range #352: [126704, 126705, Arabic]
+  127488,  // Range #353: [127488, 127488, Hiragana]
+  131072,  // Range #354: [131072, 173782, Han]
+  173824,  // Range #355: [173824, 177972, Han]
+  177984,  // Range #356: [177984, 183969, Han]
+  183984,  // Range #357: [183984, 191456, Han]
+  194560,  // Range #358: [194560, 195101, Han]
+};
+
+const uint16 kRangeSizeMinusOne[] = {
+  25,  // Range #0: [65, 90, Latin]
+  25,  // Range #1: [97, 122, Latin]
+  0,  // Range #2: [170, 170, Latin]
+  0,  // Range #3: [186, 186, Latin]
+  22,  // Range #4: [192, 214, Latin]
+  30,  // Range #5: [216, 246, Latin]
+  448,  // Range #6: [248, 696, Latin]
+  4,  // Range #7: [736, 740, Latin]
+  1,  // Range #8: [746, 747, Bopomofo]
+  3,  // Range #9: [880, 883, Greek]
+  8,  // Range #10: [885, 893, Greek]
+  5,  // Range #11: [895, 900, Greek]
+  0,  // Range #12: [902, 902, Greek]
+  89,  // Range #13: [904, 993, Greek]
+  13,  // Range #14: [994, 1007, Coptic]
+  15,  // Range #15: [1008, 1023, Greek]
+  132,  // Range #16: [1024, 1156, Cyrillic]
+  168,  // Range #17: [1159, 1327, Cyrillic]
+  87,  // Range #18: [1329, 1416, Armenian]
+  5,  // Range #19: [1418, 1423, Armenian]
+  54,  // Range #20: [1425, 1479, Hebrew]
+  36,  // Range #21: [1488, 1524, Hebrew]
+  4,  // Range #22: [1536, 1540, Arabic]
+  5,  // Range #23: [1542, 1547, Arabic]
+  13,  // Range #24: [1549, 1562, Arabic]
+  2,  // Range #25: [1564, 1566, Arabic]
+  31,  // Range #26: [1568, 1599, Arabic]
+  9,  // Range #27: [1601, 1610, Arabic]
+  25,  // Range #28: [1622, 1647, Arabic]
+  107,  // Range #29: [1649, 1756, Arabic]
+  33,  // Range #30: [1758, 1791, Arabic]
+  79,  // Range #31: [1792, 1871, Syriac]
+  47,  // Range #32: [1872, 1919, Arabic]
+  49,  // Range #33: [1920, 1969, Thaana]
+  63,  // Range #34: [1984, 2047, Nko]
+  62,  // Range #35: [2048, 2110, Samaritan]
+  30,  // Range #36: [2112, 2142, Mandaic]
+  10,  // Range #37: [2144, 2154, Syriac]
+  29,  // Range #38: [2208, 2237, Arabic]
+  14,  // Range #39: [2259, 2273, Arabic]
+  28,  // Range #40: [2275, 2303, Arabic]
+  80,  // Range #41: [2304, 2384, Devanagari]
+  16,  // Range #42: [2387, 2403, Devanagari]
+  25,  // Range #43: [2406, 2431, Devanagari]
+  78,  // Range #44: [2432, 2510, Bengali]
+  39,  // Range #45: [2519, 2558, Bengali]
+  80,  // Range #46: [2561, 2641, Gurmukhi]
+  5,  // Range #47: [2649, 2654, Gurmukhi]
+  16,  // Range #48: [2662, 2678, Gurmukhi]
+  79,  // Range #49: [2689, 2768, Gujarati]
+  17,  // Range #50: [2784, 2801, Gujarati]
+  6,  // Range #51: [2809, 2815, Gujarati]
+  76,  // Range #52: [2817, 2893, Oriya]
+  33,  // Range #53: [2902, 2935, Oriya]
+  78,  // Range #54: [2946, 3024, Tamil]
+  0,  // Range #55: [3031, 3031, Tamil]
+  20,  // Range #56: [3046, 3066, Tamil]
+  77,  // Range #57: [3072, 3149, Telugu]
+  5,  // Range #58: [3157, 3162, Telugu]
+  15,  // Range #59: [3168, 3183, Telugu]
+  7,  // Range #60: [3192, 3199, Telugu]
+  77,  // Range #61: [3200, 3277, Kannada]
+  1,  // Range #62: [3285, 3286, Kannada]
+  20,  // Range #63: [3294, 3314, Kannada]
+  127,  // Range #64: [3328, 3455, Malayalam]
+  93,  // Range #65: [3458, 3551, Sinhala]
+  14,  // Range #66: [3558, 3572, Sinhala]
+  57,  // Range #67: [3585, 3642, Thai]
+  27,  // Range #68: [3648, 3675, Thai]
+  12,  // Range #69: [3713, 3725, Lao]
+  75,  // Range #70: [3732, 3807, Lao]
+  212,  // Range #71: [3840, 4052, Tibetan]
+  1,  // Range #72: [4057, 4058, Tibetan]
+  159,  // Range #73: [4096, 4255, Myanmar]
+  39,  // Range #74: [4256, 4295, Georgian]
+  45,  // Range #75: [4301, 4346, Georgian]
+  3,  // Range #76: [4348, 4351, Georgian]
+  255,  // Range #77: [4352, 4607, Hangul]
+  409,  // Range #78: [4608, 5017, Ethiopic]
+  93,  // Range #79: [5024, 5117, Cherokee]
+  639,  // Range #80: [5120, 5759, Canadian_Aboriginal]
+  28,  // Range #81: [5760, 5788, Ogham]
+  74,  // Range #82: [5792, 5866, Runic]
+  10,  // Range #83: [5870, 5880, Runic]
+  20,  // Range #84: [5888, 5908, Tagalog]
+  20,  // Range #85: [5920, 5940, Hanunoo]
+  19,  // Range #86: [5952, 5971, Buhid]
+  19,  // Range #87: [5984, 6003, Tagbanwa]
+  105,  // Range #88: [6016, 6121, Khmer]
+  9,  // Range #89: [6128, 6137, Khmer]
+  1,  // Range #90: [6144, 6145, Mongolian]
+  0,  // Range #91: [6148, 6148, Mongolian]
+  19,  // Range #92: [6150, 6169, Mongolian]
+  88,  // Range #93: [6176, 6264, Mongolian]
+  42,  // Range #94: [6272, 6314, Mongolian]
+  69,  // Range #95: [6320, 6389, Canadian_Aboriginal]
+  79,  // Range #96: [6400, 6479, Limbu]
+  36,  // Range #97: [6480, 6516, Tai_Le]
+  73,  // Range #98: [6528, 6601, New_Tai_Lue]
+  15,  // Range #99: [6608, 6623, New_Tai_Lue]
+  31,  // Range #100: [6624, 6655, Khmer]
+  31,  // Range #101: [6656, 6687, Buginese]
+  105,  // Range #102: [6688, 6793, Tai_Tham]
+  9,  // Range #103: [6800, 6809, Tai_Tham]
+  13,  // Range #104: [6816, 6829, Tai_Tham]
+  124,  // Range #105: [6912, 7036, Balinese]
+  63,  // Range #106: [7040, 7103, Sundanese]
+  51,  // Range #107: [7104, 7155, Batak]
+  3,  // Range #108: [7164, 7167, Batak]
+  79,  // Range #109: [7168, 7247, Lepcha]
+  47,  // Range #110: [7248, 7295, Ol_Chiki]
+  8,  // Range #111: [7296, 7304, Cyrillic]
+  47,  // Range #112: [7312, 7359, Georgian]
+  7,  // Range #113: [7360, 7367, Sundanese]
+  37,  // Range #114: [7424, 7461, Latin]
+  4,  // Range #115: [7462, 7466, Greek]
+  0,  // Range #116: [7467, 7467, Cyrillic]
+  48,  // Range #117: [7468, 7516, Latin]
+  4,  // Range #118: [7517, 7521, Greek]
+  3,  // Range #119: [7522, 7525, Latin]
+  4,  // Range #120: [7526, 7530, Greek]
+  12,  // Range #121: [7531, 7543, Latin]
+  0,  // Range #122: [7544, 7544, Cyrillic]
+  69,  // Range #123: [7545, 7614, Latin]
+  0,  // Range #124: [7615, 7615, Greek]
+  255,  // Range #125: [7680, 7935, Latin]
+  254,  // Range #126: [7936, 8190, Greek]
+  0,  // Range #127: [8305, 8305, Latin]
+  0,  // Range #128: [8319, 8319, Latin]
+  12,  // Range #129: [8336, 8348, Latin]
+  0,  // Range #130: [8486, 8486, Greek]
+  1,  // Range #131: [8490, 8491, Latin]
+  0,  // Range #132: [8498, 8498, Latin]
+  0,  // Range #133: [8526, 8526, Latin]
+  40,  // Range #134: [8544, 8584, Latin]
+  255,  // Range #135: [10240, 10495, Braille]
+  94,  // Range #136: [11264, 11358, Glagolitic]
+  31,  // Range #137: [11360, 11391, Latin]
+  115,  // Range #138: [11392, 11507, Coptic]
+  6,  // Range #139: [11513, 11519, Coptic]
+  39,  // Range #140: [11520, 11559, Georgian]
+  0,  // Range #141: [11565, 11565, Georgian]
+  55,  // Range #142: [11568, 11623, Tifinagh]
+  1,  // Range #143: [11631, 11632, Tifinagh]
+  0,  // Range #144: [11647, 11647, Tifinagh]
+  22,  // Range #145: [11648, 11670, Ethiopic]
+  62,  // Range #146: [11680, 11742, Ethiopic]
+  31,  // Range #147: [11744, 11775, Cyrillic]
+  115,  // Range #148: [11904, 12019, Han]
+  213,  // Range #149: [12032, 12245, Han]
+  0,  // Range #150: [12293, 12293, Han]
+  0,  // Range #151: [12295, 12295, Han]
+  8,  // Range #152: [12321, 12329, Han]
+  1,  // Range #153: [12334, 12335, Hangul]
+  3,  // Range #154: [12344, 12347, Han]
+  85,  // Range #155: [12353, 12438, Hiragana]
+  2,  // Range #156: [12445, 12447, Hiragana]
+  89,  // Range #157: [12449, 12538, Katakana]
+  2,  // Range #158: [12541, 12543, Katakana]
+  42,  // Range #159: [12549, 12591, Bopomofo]
+  93,  // Range #160: [12593, 12686, Hangul]
+  26,  // Range #161: [12704, 12730, Bopomofo]
+  15,  // Range #162: [12784, 12799, Katakana]
+  30,  // Range #163: [12800, 12830, Hangul]
+  30,  // Range #164: [12896, 12926, Hangul]
+  135,  // Range #165: [13008, 13143, Katakana]
+  6581,  // Range #166: [13312, 19893, Han]
+  20975,  // Range #167: [19968, 40943, Han]
+  1222,  // Range #168: [40960, 42182, Yi]
+  47,  // Range #169: [42192, 42239, Lisu]
+  299,  // Range #170: [42240, 42539, Vai]
+  95,  // Range #171: [42560, 42655, Cyrillic]
+  87,  // Range #172: [42656, 42743, Bamum]
+  101,  // Range #173: [42786, 42887, Latin]
+  46,  // Range #174: [42891, 42937, Latin]
+  8,  // Range #175: [42999, 43007, Latin]
+  43,  // Range #176: [43008, 43051, Syloti_Nagri]
+  55,  // Range #177: [43072, 43127, Phags_Pa]
+  69,  // Range #178: [43136, 43205, Saurashtra]
+  11,  // Range #179: [43214, 43225, Saurashtra]
+  31,  // Range #180: [43232, 43263, Devanagari]
+  45,  // Range #181: [43264, 43309, Kayah_Li]
+  0,  // Range #182: [43311, 43311, Kayah_Li]
+  35,  // Range #183: [43312, 43347, Rejang]
+  0,  // Range #184: [43359, 43359, Rejang]
+  28,  // Range #185: [43360, 43388, Hangul]
+  77,  // Range #186: [43392, 43469, Javanese]
+  15,  // Range #187: [43472, 43487, Javanese]
+  30,  // Range #188: [43488, 43518, Myanmar]
+  54,  // Range #189: [43520, 43574, Cham]
+  31,  // Range #190: [43584, 43615, Cham]
+  31,  // Range #191: [43616, 43647, Myanmar]
+  66,  // Range #192: [43648, 43714, Tai_Viet]
+  4,  // Range #193: [43739, 43743, Tai_Viet]
+  22,  // Range #194: [43744, 43766, Meetei_Mayek]
+  21,  // Range #195: [43777, 43798, Ethiopic]
+  14,  // Range #196: [43808, 43822, Ethiopic]
+  42,  // Range #197: [43824, 43866, Latin]
+  8,  // Range #198: [43868, 43876, Latin]
+  0,  // Range #199: [43877, 43877, Greek]
+  79,  // Range #200: [43888, 43967, Cherokee]
+  57,  // Range #201: [43968, 44025, Meetei_Mayek]
+  11171,  // Range #202: [44032, 55203, Hangul]
+  75,  // Range #203: [55216, 55291, Hangul]
+  473,  // Range #204: [63744, 64217, Han]
+  6,  // Range #205: [64256, 64262, Latin]
+  4,  // Range #206: [64275, 64279, Armenian]
+  50,  // Range #207: [64285, 64335, Hebrew]
+  113,  // Range #208: [64336, 64449, Arabic]
+  362,  // Range #209: [64467, 64829, Arabic]
+  119,  // Range #210: [64848, 64967, Arabic]
+  13,  // Range #211: [65008, 65021, Arabic]
+  1,  // Range #212: [65070, 65071, Cyrillic]
+  140,  // Range #213: [65136, 65276, Arabic]
+  25,  // Range #214: [65313, 65338, Latin]
+  25,  // Range #215: [65345, 65370, Latin]
+  9,  // Range #216: [65382, 65391, Katakana]
+  44,  // Range #217: [65393, 65437, Katakana]
+  60,  // Range #218: [65440, 65500, Hangul]
+  93,  // Range #219: [65536, 65629, Linear_B]
+  122,  // Range #220: [65664, 65786, Linear_B]
+  78,  // Range #221: [65856, 65934, Greek]
+  0,  // Range #222: [65952, 65952, Greek]
+  28,  // Range #223: [66176, 66204, Lycian]
+  48,  // Range #224: [66208, 66256, Carian]
+  35,  // Range #225: [66304, 66339, Old_Italic]
+  2,  // Range #226: [66349, 66351, Old_Italic]
+  26,  // Range #227: [66352, 66378, Gothic]
+  42,  // Range #228: [66384, 66426, Old_Permic]
+  31,  // Range #229: [66432, 66463, Ugaritic]
+  53,  // Range #230: [66464, 66517, Old_Persian]
+  79,  // Range #231: [66560, 66639, Deseret]
+  47,  // Range #232: [66640, 66687, Shavian]
+  41,  // Range #233: [66688, 66729, Osmanya]
+  75,  // Range #234: [66736, 66811, Osage]
+  39,  // Range #235: [66816, 66855, Elbasan]
+  51,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  0,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  310,  // Range #238: [67072, 67382, Linear_A]
+  21,  // Range #239: [67392, 67413, Linear_A]
+  7,  // Range #240: [67424, 67431, Linear_A]
+  63,  // Range #241: [67584, 67647, Cypriot]
+  31,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  31,  // Range #243: [67680, 67711, Palmyrene]
+  30,  // Range #244: [67712, 67742, Nabataean]
+  8,  // Range #245: [67751, 67759, Nabataean]
+  21,  // Range #246: [67808, 67829, Hatran]
+  4,  // Range #247: [67835, 67839, Hatran]
+  31,  // Range #248: [67840, 67871, Phoenician]
+  25,  // Range #249: [67872, 67897, Lydian]
+  0,  // Range #250: [67903, 67903, Lydian]
+  31,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  95,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  6,  // Range #253: [68096, 68102, Kharoshthi]
+  60,  // Range #254: [68108, 68168, Kharoshthi]
+  8,  // Range #255: [68176, 68184, Kharoshthi]
+  31,  // Range #256: [68192, 68223, Old_South_Arabian]
+  31,  // Range #257: [68224, 68255, Old_North_Arabian]
+  54,  // Range #258: [68288, 68342, Manichaean]
+  63,  // Range #259: [68352, 68415, Avestan]
+  31,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  18,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  7,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  17,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  3,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  6,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  72,  // Range #266: [68608, 68680, Old_Turkic]
+  50,  // Range #267: [68736, 68786, Old_Hungarian]
+  50,  // Range #268: [68800, 68850, Old_Hungarian]
+  5,  // Range #269: [68858, 68863, Old_Hungarian]
+  39,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  9,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  30,  // Range #272: [69216, 69246, Arabic]
+  39,  // Range #273: [69376, 69415, Old_Sogdian]
+  41,  // Range #274: [69424, 69465, Sogdian]
+  111,  // Range #275: [69632, 69743, Brahmi]
+  0,  // Range #276: [69759, 69759, Brahmi]
+  65,  // Range #277: [69760, 69825, Kaithi]
+  0,  // Range #278: [69837, 69837, Kaithi]
+  24,  // Range #279: [69840, 69864, Sora_Sompeng]
+  9,  // Range #280: [69872, 69881, Sora_Sompeng]
+  70,  // Range #281: [69888, 69958, Chakma]
+  38,  // Range #282: [69968, 70006, Mahajani]
+  95,  // Range #283: [70016, 70111, Sharada]
+  19,  // Range #284: [70113, 70132, Sinhala]
+  62,  // Range #285: [70144, 70206, Khojki]
+  41,  // Range #286: [70272, 70313, Multani]
+  58,  // Range #287: [70320, 70378, Khudawadi]
+  9,  // Range #288: [70384, 70393, Khudawadi]
+  57,  // Range #289: [70400, 70457, Grantha]
+  20,  // Range #290: [70460, 70480, Grantha]
+  0,  // Range #291: [70487, 70487, Grantha]
+  23,  // Range #292: [70493, 70516, Grantha]
+  94,  // Range #293: [70656, 70750, Newa]
+  71,  // Range #294: [70784, 70855, Tirhuta]
+  9,  // Range #295: [70864, 70873, Tirhuta]
+  93,  // Range #296: [71040, 71133, Siddham]
+  68,  // Range #297: [71168, 71236, Modi]
+  9,  // Range #298: [71248, 71257, Modi]
+  12,  // Range #299: [71264, 71276, Mongolian]
+  55,  // Range #300: [71296, 71351, Takri]
+  9,  // Range #301: [71360, 71369, Takri]
+  63,  // Range #302: [71424, 71487, Ahom]
+  59,  // Range #303: [71680, 71739, Dogra]
+  82,  // Range #304: [71840, 71922, Warang_Citi]
+  0,  // Range #305: [71935, 71935, Warang_Citi]
+  71,  // Range #306: [72192, 72263, Zanabazar_Square]
+  82,  // Range #307: [72272, 72354, Soyombo]
+  56,  // Range #308: [72384, 72440, Pau_Cin_Hau]
+  69,  // Range #309: [72704, 72773, Bhaiksuki]
+  28,  // Range #310: [72784, 72812, Bhaiksuki]
+  70,  // Range #311: [72816, 72886, Marchen]
+  71,  // Range #312: [72960, 73031, Masaram_Gondi]
+  9,  // Range #313: [73040, 73049, Masaram_Gondi]
+  56,  // Range #314: [73056, 73112, Gunjala_Gondi]
+  9,  // Range #315: [73120, 73129, Gunjala_Gondi]
+  24,  // Range #316: [73440, 73464, Makasar]
+  921,  // Range #317: [73728, 74649, Cuneiform]
+  116,  // Range #318: [74752, 74868, Cuneiform]
+  195,  // Range #319: [74880, 75075, Cuneiform]
+  1070,  // Range #320: [77824, 78894, Egyptian_Hieroglyphs]
+  582,  // Range #321: [82944, 83526, Anatolian_Hieroglyphs]
+  568,  // Range #322: [92160, 92728, Bamum]
+  47,  // Range #323: [92736, 92783, Mro]
+  37,  // Range #324: [92880, 92917, Bassa_Vah]
+  69,  // Range #325: [92928, 92997, Pahawh_Hmong]
+  39,  // Range #326: [93008, 93047, Pahawh_Hmong]
+  18,  // Range #327: [93053, 93071, Pahawh_Hmong]
+  90,  // Range #328: [93760, 93850, Medefaidrin]
+  68,  // Range #329: [93952, 94020, Miao]
+  46,  // Range #330: [94032, 94078, Miao]
+  16,  // Range #331: [94095, 94111, Miao]
+  0,  // Range #332: [94176, 94176, Tangut]
+  0,  // Range #333: [94177, 94177, Nushu]
+  6129,  // Range #334: [94208, 100337, Tangut]
+  754,  // Range #335: [100352, 101106, Tangut]
+  0,  // Range #336: [110592, 110592, Katakana]
+  285,  // Range #337: [110593, 110878, Hiragana]
+  395,  // Range #338: [110960, 111355, Nushu]
+  106,  // Range #339: [113664, 113770, Duployan]
+  24,  // Range #340: [113776, 113800, Duployan]
+  15,  // Range #341: [113808, 113823, Duployan]
+  69,  // Range #342: [119296, 119365, Greek]
+  651,  // Range #343: [120832, 121483, SignWriting]
+  20,  // Range #344: [121499, 121519, SignWriting]
+  42,  // Range #345: [122880, 122922, Glagolitic]
+  214,  // Range #346: [124928, 125142, Mende_Kikakui]
+  74,  // Range #347: [125184, 125258, Adlam]
+  15,  // Range #348: [125264, 125279, Adlam]
+  59,  // Range #349: [126464, 126523, Arabic]
+  89,  // Range #350: [126530, 126619, Arabic]
+  26,  // Range #351: [126625, 126651, Arabic]
+  1,  // Range #352: [126704, 126705, Arabic]
+  0,  // Range #353: [127488, 127488, Hiragana]
+  42710,  // Range #354: [131072, 173782, Han]
+  4148,  // Range #355: [173824, 177972, Han]
+  5985,  // Range #356: [177984, 183969, Han]
+  7472,  // Range #357: [183984, 191456, Han]
+  541,  // Range #358: [194560, 195101, Han]
+};
+
+const uint8 kRangeScript[] = {
+  25,  // Range #0: [65, 90, Latin]
+  25,  // Range #1: [97, 122, Latin]
+  25,  // Range #2: [170, 170, Latin]
+  25,  // Range #3: [186, 186, Latin]
+  25,  // Range #4: [192, 214, Latin]
+  25,  // Range #5: [216, 246, Latin]
+  25,  // Range #6: [248, 696, Latin]
+  25,  // Range #7: [736, 740, Latin]
+  5,  // Range #8: [746, 747, Bopomofo]
+  14,  // Range #9: [880, 883, Greek]
+  14,  // Range #10: [885, 893, Greek]
+  14,  // Range #11: [895, 900, Greek]
+  14,  // Range #12: [902, 902, Greek]
+  14,  // Range #13: [904, 993, Greek]
+  7,  // Range #14: [994, 1007, Coptic]
+  14,  // Range #15: [1008, 1023, Greek]
+  8,  // Range #16: [1024, 1156, Cyrillic]
+  8,  // Range #17: [1159, 1327, Cyrillic]
+  3,  // Range #18: [1329, 1416, Armenian]
+  3,  // Range #19: [1418, 1423, Armenian]
+  19,  // Range #20: [1425, 1479, Hebrew]
+  19,  // Range #21: [1488, 1524, Hebrew]
+  2,  // Range #22: [1536, 1540, Arabic]
+  2,  // Range #23: [1542, 1547, Arabic]
+  2,  // Range #24: [1549, 1562, Arabic]
+  2,  // Range #25: [1564, 1566, Arabic]
+  2,  // Range #26: [1568, 1599, Arabic]
+  2,  // Range #27: [1601, 1610, Arabic]
+  2,  // Range #28: [1622, 1647, Arabic]
+  2,  // Range #29: [1649, 1756, Arabic]
+  2,  // Range #30: [1758, 1791, Arabic]
+  34,  // Range #31: [1792, 1871, Syriac]
+  2,  // Range #32: [1872, 1919, Arabic]
+  37,  // Range #33: [1920, 1969, Thaana]
+  87,  // Range #34: [1984, 2047, Nko]
+  126,  // Range #35: [2048, 2110, Samaritan]
+  84,  // Range #36: [2112, 2142, Mandaic]
+  34,  // Range #37: [2144, 2154, Syriac]
+  2,  // Range #38: [2208, 2237, Arabic]
+  2,  // Range #39: [2259, 2273, Arabic]
+  2,  // Range #40: [2275, 2303, Arabic]
+  10,  // Range #41: [2304, 2384, Devanagari]
+  10,  // Range #42: [2387, 2403, Devanagari]
+  10,  // Range #43: [2406, 2431, Devanagari]
+  4,  // Range #44: [2432, 2510, Bengali]
+  4,  // Range #45: [2519, 2558, Bengali]
+  16,  // Range #46: [2561, 2641, Gurmukhi]
+  16,  // Range #47: [2649, 2654, Gurmukhi]
+  16,  // Range #48: [2662, 2678, Gurmukhi]
+  15,  // Range #49: [2689, 2768, Gujarati]
+  15,  // Range #50: [2784, 2801, Gujarati]
+  15,  // Range #51: [2809, 2815, Gujarati]
+  31,  // Range #52: [2817, 2893, Oriya]
+  31,  // Range #53: [2902, 2935, Oriya]
+  35,  // Range #54: [2946, 3024, Tamil]
+  35,  // Range #55: [3031, 3031, Tamil]
+  35,  // Range #56: [3046, 3066, Tamil]
+  36,  // Range #57: [3072, 3149, Telugu]
+  36,  // Range #58: [3157, 3162, Telugu]
+  36,  // Range #59: [3168, 3183, Telugu]
+  36,  // Range #60: [3192, 3199, Telugu]
+  21,  // Range #61: [3200, 3277, Kannada]
+  21,  // Range #62: [3285, 3286, Kannada]
+  21,  // Range #63: [3294, 3314, Kannada]
+  26,  // Range #64: [3328, 3455, Malayalam]
+  33,  // Range #65: [3458, 3551, Sinhala]
+  33,  // Range #66: [3558, 3572, Sinhala]
+  38,  // Range #67: [3585, 3642, Thai]
+  38,  // Range #68: [3648, 3675, Thai]
+  24,  // Range #69: [3713, 3725, Lao]
+  24,  // Range #70: [3732, 3807, Lao]
+  39,  // Range #71: [3840, 4052, Tibetan]
+  39,  // Range #72: [4057, 4058, Tibetan]
+  28,  // Range #73: [4096, 4255, Myanmar]
+  12,  // Range #74: [4256, 4295, Georgian]
+  12,  // Range #75: [4301, 4346, Georgian]
+  12,  // Range #76: [4348, 4351, Georgian]
+  18,  // Range #77: [4352, 4607, Hangul]
+  11,  // Range #78: [4608, 5017, Ethiopic]
+  6,  // Range #79: [5024, 5117, Cherokee]
+  40,  // Range #80: [5120, 5759, Canadian_Aboriginal]
+  29,  // Range #81: [5760, 5788, Ogham]
+  32,  // Range #82: [5792, 5866, Runic]
+  32,  // Range #83: [5870, 5880, Runic]
+  42,  // Range #84: [5888, 5908, Tagalog]
+  43,  // Range #85: [5920, 5940, Hanunoo]
+  44,  // Range #86: [5952, 5971, Buhid]
+  45,  // Range #87: [5984, 6003, Tagbanwa]
+  23,  // Range #88: [6016, 6121, Khmer]
+  23,  // Range #89: [6128, 6137, Khmer]
+  27,  // Range #90: [6144, 6145, Mongolian]
+  27,  // Range #91: [6148, 6148, Mongolian]
+  27,  // Range #92: [6150, 6169, Mongolian]
+  27,  // Range #93: [6176, 6264, Mongolian]
+  27,  // Range #94: [6272, 6314, Mongolian]
+  40,  // Range #95: [6320, 6389, Canadian_Aboriginal]
+  48,  // Range #96: [6400, 6479, Limbu]
+  52,  // Range #97: [6480, 6516, Tai_Le]
+  59,  // Range #98: [6528, 6601, New_Tai_Lue]
+  59,  // Range #99: [6608, 6623, New_Tai_Lue]
+  23,  // Range #100: [6624, 6655, Khmer]
+  55,  // Range #101: [6656, 6687, Buginese]
+  106,  // Range #102: [6688, 6793, Tai_Tham]
+  106,  // Range #103: [6800, 6809, Tai_Tham]
+  106,  // Range #104: [6816, 6829, Tai_Tham]
+  62,  // Range #105: [6912, 7036, Balinese]
+  113,  // Range #106: [7040, 7103, Sundanese]
+  63,  // Range #107: [7104, 7155, Batak]
+  63,  // Range #108: [7164, 7167, Batak]
+  82,  // Range #109: [7168, 7247, Lepcha]
+  109,  // Range #110: [7248, 7295, Ol_Chiki]
+  8,  // Range #111: [7296, 7304, Cyrillic]
+  12,  // Range #112: [7312, 7359, Georgian]
+  113,  // Range #113: [7360, 7367, Sundanese]
+  25,  // Range #114: [7424, 7461, Latin]
+  14,  // Range #115: [7462, 7466, Greek]
+  8,  // Range #116: [7467, 7467, Cyrillic]
+  25,  // Range #117: [7468, 7516, Latin]
+  14,  // Range #118: [7517, 7521, Greek]
+  25,  // Range #119: [7522, 7525, Latin]
+  14,  // Range #120: [7526, 7530, Greek]
+  25,  // Range #121: [7531, 7543, Latin]
+  8,  // Range #122: [7544, 7544, Cyrillic]
+  25,  // Range #123: [7545, 7614, Latin]
+  14,  // Range #124: [7615, 7615, Greek]
+  25,  // Range #125: [7680, 7935, Latin]
+  14,  // Range #126: [7936, 8190, Greek]
+  25,  // Range #127: [8305, 8305, Latin]
+  25,  // Range #128: [8319, 8319, Latin]
+  25,  // Range #129: [8336, 8348, Latin]
+  14,  // Range #130: [8486, 8486, Greek]
+  25,  // Range #131: [8490, 8491, Latin]
+  25,  // Range #132: [8498, 8498, Latin]
+  25,  // Range #133: [8526, 8526, Latin]
+  25,  // Range #134: [8544, 8584, Latin]
+  46,  // Range #135: [10240, 10495, Braille]
+  56,  // Range #136: [11264, 11358, Glagolitic]
+  25,  // Range #137: [11360, 11391, Latin]
+  7,  // Range #138: [11392, 11507, Coptic]
+  7,  // Range #139: [11513, 11519, Coptic]
+  12,  // Range #140: [11520, 11559, Georgian]
+  12,  // Range #141: [11565, 11565, Georgian]
+  60,  // Range #142: [11568, 11623, Tifinagh]
+  60,  // Range #143: [11631, 11632, Tifinagh]
+  60,  // Range #144: [11647, 11647, Tifinagh]
+  11,  // Range #145: [11648, 11670, Ethiopic]
+  11,  // Range #146: [11680, 11742, Ethiopic]
+  8,  // Range #147: [11744, 11775, Cyrillic]
+  17,  // Range #148: [11904, 12019, Han]
+  17,  // Range #149: [12032, 12245, Han]
+  17,  // Range #150: [12293, 12293, Han]
+  17,  // Range #151: [12295, 12295, Han]
+  17,  // Range #152: [12321, 12329, Han]
+  18,  // Range #153: [12334, 12335, Hangul]
+  17,  // Range #154: [12344, 12347, Han]
+  20,  // Range #155: [12353, 12438, Hiragana]
+  20,  // Range #156: [12445, 12447, Hiragana]
+  22,  // Range #157: [12449, 12538, Katakana]
+  22,  // Range #158: [12541, 12543, Katakana]
+  5,  // Range #159: [12549, 12591, Bopomofo]
+  18,  // Range #160: [12593, 12686, Hangul]
+  5,  // Range #161: [12704, 12730, Bopomofo]
+  22,  // Range #162: [12784, 12799, Katakana]
+  18,  // Range #163: [12800, 12830, Hangul]
+  18,  // Range #164: [12896, 12926, Hangul]
+  22,  // Range #165: [13008, 13143, Katakana]
+  17,  // Range #166: [13312, 19893, Han]
+  17,  // Range #167: [19968, 40943, Han]
+  41,  // Range #168: [40960, 42182, Yi]
+  131,  // Range #169: [42192, 42239, Lisu]
+  99,  // Range #170: [42240, 42539, Vai]
+  8,  // Range #171: [42560, 42655, Cyrillic]
+  130,  // Range #172: [42656, 42743, Bamum]
+  25,  // Range #173: [42786, 42887, Latin]
+  25,  // Range #174: [42891, 42937, Latin]
+  25,  // Range #175: [42999, 43007, Latin]
+  58,  // Range #176: [43008, 43051, Syloti_Nagri]
+  90,  // Range #177: [43072, 43127, Phags_Pa]
+  111,  // Range #178: [43136, 43205, Saurashtra]
+  111,  // Range #179: [43214, 43225, Saurashtra]
+  10,  // Range #180: [43232, 43263, Devanagari]
+  79,  // Range #181: [43264, 43309, Kayah_Li]
+  79,  // Range #182: [43311, 43311, Kayah_Li]
+  110,  // Range #183: [43312, 43347, Rejang]
+  110,  // Range #184: [43359, 43359, Rejang]
+  18,  // Range #185: [43360, 43388, Hangul]
+  78,  // Range #186: [43392, 43469, Javanese]
+  78,  // Range #187: [43472, 43487, Javanese]
+  28,  // Range #188: [43488, 43518, Myanmar]
+  66,  // Range #189: [43520, 43574, Cham]
+  66,  // Range #190: [43584, 43615, Cham]
+  28,  // Range #191: [43616, 43647, Myanmar]
+  127,  // Range #192: [43648, 43714, Tai_Viet]
+  127,  // Range #193: [43739, 43743, Tai_Viet]
+  115,  // Range #194: [43744, 43766, Meetei_Mayek]
+  11,  // Range #195: [43777, 43798, Ethiopic]
+  11,  // Range #196: [43808, 43822, Ethiopic]
+  25,  // Range #197: [43824, 43866, Latin]
+  25,  // Range #198: [43868, 43876, Latin]
+  14,  // Range #199: [43877, 43877, Greek]
+  6,  // Range #200: [43888, 43967, Cherokee]
+  115,  // Range #201: [43968, 44025, Meetei_Mayek]
+  18,  // Range #202: [44032, 55203, Hangul]
+  18,  // Range #203: [55216, 55291, Hangul]
+  17,  // Range #204: [63744, 64217, Han]
+  25,  // Range #205: [64256, 64262, Latin]
+  3,  // Range #206: [64275, 64279, Armenian]
+  19,  // Range #207: [64285, 64335, Hebrew]
+  2,  // Range #208: [64336, 64449, Arabic]
+  2,  // Range #209: [64467, 64829, Arabic]
+  2,  // Range #210: [64848, 64967, Arabic]
+  2,  // Range #211: [65008, 65021, Arabic]
+  8,  // Range #212: [65070, 65071, Cyrillic]
+  2,  // Range #213: [65136, 65276, Arabic]
+  25,  // Range #214: [65313, 65338, Latin]
+  25,  // Range #215: [65345, 65370, Latin]
+  22,  // Range #216: [65382, 65391, Katakana]
+  22,  // Range #217: [65393, 65437, Katakana]
+  18,  // Range #218: [65440, 65500, Hangul]
+  49,  // Range #219: [65536, 65629, Linear_B]
+  49,  // Range #220: [65664, 65786, Linear_B]
+  14,  // Range #221: [65856, 65934, Greek]
+  14,  // Range #222: [65952, 65952, Greek]
+  107,  // Range #223: [66176, 66204, Lycian]
+  104,  // Range #224: [66208, 66256, Carian]
+  30,  // Range #225: [66304, 66339, Old_Italic]
+  30,  // Range #226: [66349, 66351, Old_Italic]
+  13,  // Range #227: [66352, 66378, Gothic]
+  89,  // Range #228: [66384, 66426, Old_Permic]
+  53,  // Range #229: [66432, 66463, Ugaritic]
+  61,  // Range #230: [66464, 66517, Old_Persian]
+  9,  // Range #231: [66560, 66639, Deseret]
+  51,  // Range #232: [66640, 66687, Shavian]
+  50,  // Range #233: [66688, 66729, Osmanya]
+  171,  // Range #234: [66736, 66811, Osage]
+  136,  // Range #235: [66816, 66855, Elbasan]
+  159,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  159,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  83,  // Range #238: [67072, 67382, Linear_A]
+  83,  // Range #239: [67392, 67413, Linear_A]
+  83,  // Range #240: [67424, 67431, Linear_A]
+  47,  // Range #241: [67584, 67647, Cypriot]
+  116,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  144,  // Range #243: [67680, 67711, Palmyrene]
+  143,  // Range #244: [67712, 67742, Nabataean]
+  143,  // Range #245: [67751, 67759, Nabataean]
+  162,  // Range #246: [67808, 67829, Hatran]
+  162,  // Range #247: [67835, 67839, Hatran]
+  91,  // Range #248: [67840, 67871, Phoenician]
+  108,  // Range #249: [67872, 67897, Lydian]
+  108,  // Range #250: [67903, 67903, Lydian]
+  86,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  141,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  57,  // Range #253: [68096, 68102, Kharoshthi]
+  57,  // Range #254: [68108, 68168, Kharoshthi]
+  57,  // Range #255: [68176, 68184, Kharoshthi]
+  133,  // Range #256: [68192, 68223, Old_South_Arabian]
+  142,  // Range #257: [68224, 68255, Old_North_Arabian]
+  121,  // Range #258: [68288, 68342, Manichaean]
+  117,  // Range #259: [68352, 68415, Avestan]
+  125,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  122,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  122,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  123,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  123,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  123,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  88,  // Range #266: [68608, 68680, Old_Turkic]
+  76,  // Range #267: [68736, 68786, Old_Hungarian]
+  76,  // Range #268: [68800, 68850, Old_Hungarian]
+  76,  // Range #269: [68858, 68863, Old_Hungarian]
+  182,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  182,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  2,  // Range #272: [69216, 69246, Arabic]
+  184,  // Range #273: [69376, 69415, Old_Sogdian]
+  183,  // Range #274: [69424, 69465, Sogdian]
+  65,  // Range #275: [69632, 69743, Brahmi]
+  65,  // Range #276: [69759, 69759, Brahmi]
+  120,  // Range #277: [69760, 69825, Kaithi]
+  120,  // Range #278: [69837, 69837, Kaithi]
+  152,  // Range #279: [69840, 69864, Sora_Sompeng]
+  152,  // Range #280: [69872, 69881, Sora_Sompeng]
+  118,  // Range #281: [69888, 69958, Chakma]
+  160,  // Range #282: [69968, 70006, Mahajani]
+  151,  // Range #283: [70016, 70111, Sharada]
+  33,  // Range #284: [70113, 70132, Sinhala]
+  157,  // Range #285: [70144, 70206, Khojki]
+  164,  // Range #286: [70272, 70313, Multani]
+  145,  // Range #287: [70320, 70378, Khudawadi]
+  145,  // Range #288: [70384, 70393, Khudawadi]
+  137,  // Range #289: [70400, 70457, Grantha]
+  137,  // Range #290: [70460, 70480, Grantha]
+  137,  // Range #291: [70487, 70487, Grantha]
+  137,  // Range #292: [70493, 70516, Grantha]
+  170,  // Range #293: [70656, 70750, Newa]
+  158,  // Range #294: [70784, 70855, Tirhuta]
+  158,  // Range #295: [70864, 70873, Tirhuta]
+  166,  // Range #296: [71040, 71133, Siddham]
+  163,  // Range #297: [71168, 71236, Modi]
+  163,  // Range #298: [71248, 71257, Modi]
+  27,  // Range #299: [71264, 71276, Mongolian]
+  153,  // Range #300: [71296, 71351, Takri]
+  153,  // Range #301: [71360, 71369, Takri]
+  161,  // Range #302: [71424, 71487, Ahom]
+  178,  // Range #303: [71680, 71739, Dogra]
+  146,  // Range #304: [71840, 71922, Warang_Citi]
+  146,  // Range #305: [71935, 71935, Warang_Citi]
+  177,  // Range #306: [72192, 72263, Zanabazar_Square]
+  176,  // Range #307: [72272, 72354, Soyombo]
+  165,  // Range #308: [72384, 72440, Pau_Cin_Hau]
+  168,  // Range #309: [72704, 72773, Bhaiksuki]
+  168,  // Range #310: [72784, 72812, Bhaiksuki]
+  169,  // Range #311: [72816, 72886, Marchen]
+  175,  // Range #312: [72960, 73031, Masaram_Gondi]
+  175,  // Range #313: [73040, 73049, Masaram_Gondi]
+  179,  // Range #314: [73056, 73112, Gunjala_Gondi]
+  179,  // Range #315: [73120, 73129, Gunjala_Gondi]
+  180,  // Range #316: [73440, 73464, Makasar]
+  101,  // Range #317: [73728, 74649, Cuneiform]
+  101,  // Range #318: [74752, 74868, Cuneiform]
+  101,  // Range #319: [74880, 75075, Cuneiform]
+  71,  // Range #320: [77824, 78894, Egyptian_Hieroglyphs]
+  156,  // Range #321: [82944, 83526, Anatolian_Hieroglyphs]
+  130,  // Range #322: [92160, 92728, Bamum]
+  149,  // Range #323: [92736, 92783, Mro]
+  134,  // Range #324: [92880, 92917, Bassa_Vah]
+  75,  // Range #325: [92928, 92997, Pahawh_Hmong]
+  75,  // Range #326: [93008, 93047, Pahawh_Hmong]
+  75,  // Range #327: [93053, 93071, Pahawh_Hmong]
+  181,  // Range #328: [93760, 93850, Medefaidrin]
+  92,  // Range #329: [93952, 94020, Miao]
+  92,  // Range #330: [94032, 94078, Miao]
+  92,  // Range #331: [94095, 94111, Miao]
+  154,  // Range #332: [94176, 94176, Tangut]
+  150,  // Range #333: [94177, 94177, Nushu]
+  154,  // Range #334: [94208, 100337, Tangut]
+  154,  // Range #335: [100352, 101106, Tangut]
+  22,  // Range #336: [110592, 110592, Katakana]
+  20,  // Range #337: [110593, 110878, Hiragana]
+  150,  // Range #338: [110960, 111355, Nushu]
+  135,  // Range #339: [113664, 113770, Duployan]
+  135,  // Range #340: [113776, 113800, Duployan]
+  135,  // Range #341: [113808, 113823, Duployan]
+  14,  // Range #342: [119296, 119365, Greek]
+  112,  // Range #343: [120832, 121483, SignWriting]
+  112,  // Range #344: [121499, 121519, SignWriting]
+  56,  // Range #345: [122880, 122922, Glagolitic]
+  140,  // Range #346: [124928, 125142, Mende_Kikakui]
+  167,  // Range #347: [125184, 125258, Adlam]
+  167,  // Range #348: [125264, 125279, Adlam]
+  2,  // Range #349: [126464, 126523, Arabic]
+  2,  // Range #350: [126530, 126619, Arabic]
+  2,  // Range #351: [126625, 126651, Arabic]
+  2,  // Range #352: [126704, 126705, Arabic]
+  20,  // Range #353: [127488, 127488, Hiragana]
+  17,  // Range #354: [131072, 173782, Han]
+  17,  // Range #355: [173824, 177972, Han]
+  17,  // Range #356: [177984, 183969, Han]
+  17,  // Range #357: [183984, 191456, Han]
+  17,  // Range #358: [194560, 195101, Han]
+};
+
+const uint8 kMaxScript = 184;
+
+}  // namespace approx_script_internal
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/script/approx-script-data.h b/lang_id/script/approx-script-data.h
new file mode 100644
index 0000000..3eceed8
--- /dev/null
+++ b/lang_id/script/approx-script-data.h

@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_DATA_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_DATA_H_
+
+#include "lang_id/common/lite_base/integral-types.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace approx_script_internal {
+
+// Number of contiguous ranges of same-script codepoints (see below).
+extern const int kNumRanges;
+
+// Non-overlapping ranges of unicode characters.  Characters from each range has
+// the same script (see kRangeScripts below).  Multiple ranges may have the same
+// script.  Note: we represent the kNumRanges ranges as an array with their
+// first codepoints, and a separate array with their sizes (see kRangeSize
+// below).  This leads to better memory locality during the binary search (which
+// uses only the first codepoints, up until the very end).
+//
+// kRangeFirst[i] = first codepoint from range #i, \forall 0 <= i < kNumRanges.
+extern const uint32 kRangeFirst[];
+
+// kRangeSize[i] > 0 is the number of consecutive codepoints in range #i *minus*
+// 1, \forall 0 <= i < kNumRanges.  I.e., 0 means that the range contains 1
+// codepoints.  Since we don't have empty ranges, this "minus one" convention
+// allows us to use all 2^16 values here.
+extern const uint16 kRangeSizeMinusOne[];
+
+// Scripts for the ranges from kRanges.  For each i such that 0 <= i <
+// kNumRanges, the range #i has the script kRangeScript[i].  Each uint8 element
+// can be casted to an UScriptCode enum value (see
+// unicode/uscript.h).
+//
+// NOTE: we don't use directly UScriptCode here, as that requires a full int
+// (due to USCRIPT_INVALID_CODE = -1).  uint8 is enough for us (and shorter!)
+extern const uint8 kRangeScript[];
+
+// Max value from kRangeScript[].  Scripts are guaranteed to be in the interval
+// [0, kMaxScript] (inclusive on both sides).  Can be used to e.g., set the
+// number of rows in an embedding table for a script-based feature.
+extern const uint8 kMaxScript;
+
+}  // namespace approx_script_internal
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_DATA_H_

diff --git a/lang_id/script/approx-script.cc b/lang_id/script/approx-script.cc
new file mode 100644
index 0000000..10afa9c
--- /dev/null
+++ b/lang_id/script/approx-script.cc

@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "lang_id/script/approx-script.h"
+
+#include "lang_id/common/lite_base/logging.h"
+#include "lang_id/common/lite_base/integral-types.h"
+#include "lang_id/common/utf8.h"
+#include "lang_id/script/approx-script-data.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// int value of USCRIPT_UNKNOWN from enum UScriptCode (from
+// unicode/uscript.h).  Note: we do have a test that
+// USCRIPT_UNKNOWN evaluates to 103.
+const int kUnknownUscript = 103;
+
+namespace {
+using approx_script_internal::kNumRanges;
+using approx_script_internal::kRangeFirst;
+using approx_script_internal::kRangeScript;
+using approx_script_internal::kRangeSizeMinusOne;
+
+uint32 Utf8ToCodepoint(const unsigned char *s, int num_bytes) {
+  switch (num_bytes) {
+    case 1:
+      return s[0];
+    case 2:
+      return ((s[0] & 0x1F) << 6) | (s[1] & 0x3F);
+    case 3:
+      return (((s[0] & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F));
+    case 4:
+      return (((s[0] & 0x07) << 18) | ((s[1] & 0x3F) << 12) |
+              ((s[2] & 0x3F) << 6) | (s[3] & 0x3F));
+    default:
+      SAFTM_DLOG(FATAL) << "Illegal num_bytes: " << num_bytes;
+      return 0;
+  }
+}
+
+inline int BinarySearch(uint32 codepoint, int start, int end) {
+  while (end > start + 1) {
+    // Due to the while loop condition, middle > start and middle < end.  Hence,
+    // on both branches of the if below, we strictly reduce the end - start
+    // value, so we eventually get that difference below 1 and complete the
+    // while loop.
+    int middle = (start + end) / 2;
+    if (codepoint < kRangeFirst[middle]) {
+      end = middle;
+    } else {
+      start = middle;
+    }
+  }
+
+  if (end == start + 1) {
+    const uint32 range_start = kRangeFirst[start];
+    if ((codepoint >= range_start) &&
+        (codepoint <= range_start + kRangeSizeMinusOne[start])) {
+      return kRangeScript[start];
+    }
+  }
+
+  return kUnknownUscript;
+}
+}  // namespace
+
+int GetApproxScript(const unsigned char *s, int num_bytes) {
+  SAFTM_DCHECK_NE(s, nullptr);
+  SAFTM_DCHECK_EQ(num_bytes,
+                  utils::OneCharLen(reinterpret_cast<const char *>(s)));
+  uint32 codepoint = Utf8ToCodepoint(s, num_bytes);
+  return BinarySearch(codepoint, 0, kNumRanges);
+}
+
+int GetMaxApproxScriptResult() { return approx_script_internal::kMaxScript; }
+
+SAFTM_STATIC_REGISTRATION(ApproxScriptDetector);
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/script/approx-script.h b/lang_id/script/approx-script.h
new file mode 100644
index 0000000..2472e86
--- /dev/null
+++ b/lang_id/script/approx-script.h

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_H_
+
+#include "lang_id/common/utf8.h"
+#include "lang_id/script/script-detector.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Returns script for the UTF-8 character that starts at address |s| and has
+// |num_bytes| bytes.  Note: behavior is unspecified if s points to a UTF-8
+// character that has a different number of bytes.  If you don't know
+// |num_bytes|, call GetApproxScript(const char *s).
+//
+// NOTE: to keep BUILD deps small, this function returns an int, but you can
+// assume it's an enum UScriptCode (unicode/uscript.h)
+//
+// If unable to determine the script, this function returns kUnknownUscript, the
+// int value of USCRIPT_UNKNOWN from enum UScriptCode.
+int GetApproxScript(const unsigned char *s, int num_bytes);
+
+// See comments for GetApproxScript() above.
+extern const int kUnknownUscript;
+
+// Same as before, but s is a const char *pointer (no unsigned).  Internally, we
+// prefer "unsigned char" (the signed status of char is ambiguous), so we cast
+// and call the previous version (with const unsigned char *).
+inline int GetApproxScript(const char *s, int num_bytes) {
+  return GetApproxScript(reinterpret_cast<const unsigned char *>(s), num_bytes);
+}
+
+// Returns script for the UTF-8 character that starts at address |s|.  NOTE:
+// UTF-8 is a var-length encoding, taking between 1 and 4 bytes per Unicode
+// character.  We infer the number of bytes based on s[0].  If that number is k,
+// we expect to be able to read k bytes starting from address |s|.  I.e., do not
+// call this function on broken UTF-8.
+inline int GetApproxScript(const char *s) {
+  return GetApproxScript(s, utils::OneCharLen(s));
+}
+
+// Returns max value returned by the GetApproxScript() functions.
+int GetMaxApproxScriptResult();
+
+class ApproxScriptDetector : public ScriptDetector {
+ public:
+  ~ApproxScriptDetector() override = default;
+
+  // Note: the int result of this method is actually a UScriptCode enum value.
+  // We return int to match the general case from the base class ScriptDetector
+  // (some script detectors do not use UScriptCode).
+  int GetScript(const char *s, int num_bytes) const override {
+    return GetApproxScript(s, num_bytes);
+  }
+
+  int GetMaxScript() const override {
+    return GetMaxApproxScriptResult();
+  }
+
+  SAFTM_DEFINE_REGISTRATION_METHOD("approx-unicode-script-detector",
+                                   ApproxScriptDetector);
+};
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_APPROX_SCRIPT_H_

diff --git a/util/calendar/calendar.h b/lang_id/script/script-detector.cc
similarity index 67%
rename from util/calendar/calendar.h
rename to lang_id/script/script-detector.cc
index b0cf2e6..6c19883 100644
--- a/util/calendar/calendar.h
+++ b/lang_id/script/script-detector.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#include "lang_id/script/script-detector.h"
 
-#include "util/calendar/calendar-icu.h"
+namespace libtextclassifier3 {
+namespace mobile {
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+SAFTM_DEFINE_CLASS_REGISTRY_NAME("script detector", ScriptDetector);
+
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/script/script-detector.h b/lang_id/script/script-detector.h
new file mode 100644
index 0000000..12a7888
--- /dev/null
+++ b/lang_id/script/script-detector.h

@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_SCRIPT_DETECTOR_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_SCRIPT_DETECTOR_H_
+
+#include "lang_id/common/registry.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+
+// Base class for Unicode script detectors.  Individual detectors may differ in
+// code size, speed, precision, etc.  You can use the registration mechanism to
+// get the ScriptDetector that's most appropriate to your application.
+class ScriptDetector : public RegisterableClass<ScriptDetector> {
+ public:
+  virtual ~ScriptDetector() = default;
+
+  // Returns a number between 0 and GetMaxScript() (inclusive on both ends) that
+  // indicates the script of the UTF8 character that starts at address |s| and
+  // has |num_bytes|.
+  virtual int GetScript(const char *s, int num_bytes) const = 0;
+
+  // Returns max result that can be returned by GetScript().
+  virtual int GetMaxScript() const = 0;
+};
+
+SAFTM_DECLARE_CLASS_REGISTRY_NAME(ScriptDetector);
+
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_SCRIPT_DETECTOR_H_

diff --git a/util/calendar/calendar.h b/lang_id/script/tiny-script-detector.cc
similarity index 65%
copy from util/calendar/calendar.h
copy to lang_id/script/tiny-script-detector.cc
index b0cf2e6..2f0dd98 100644
--- a/util/calendar/calendar.h
+++ b/lang_id/script/tiny-script-detector.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#include "lang_id/script/tiny-script-detector.h"
 
-#include "util/calendar/calendar-icu.h"
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+SAFTM_STATIC_REGISTRATION(TinyScriptDetector);
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft

diff --git a/lang_id/script/tiny-script-detector.h b/lang_id/script/tiny-script-detector.h
new file mode 100644
index 0000000..a55da04
--- /dev/null
+++ b/lang_id/script/tiny-script-detector.h

@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_TINY_SCRIPT_DETECTOR_H_
+#define NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_TINY_SCRIPT_DETECTOR_H_
+
+#include "lang_id/script/script-detector.h"
+
+namespace libtextclassifier3 {
+namespace mobile {
+namespace lang_id {
+
+// Unicode scripts we care about.  To get compact and fast code, we detect only
+// a few Unicode scripts that offer a strong indication about the language of
+// the text (e.g., Hiragana -> Japanese).
+enum Script {
+  // Special value to indicate internal errors in the script detection code.
+  kScriptError,
+
+  // Special values for all Unicode scripts that we do not detect.  One special
+  // value for Unicode characters of 1, 2, 3, respectively 4 bytes (as we
+  // already have that information, we use it).  kScriptOtherUtf8OneByte means
+  // ~Latin and kScriptOtherUtf8FourBytes means ~Han.
+  kScriptOtherUtf8OneByte,
+  kScriptOtherUtf8TwoBytes,
+  kScriptOtherUtf8ThreeBytes,
+  kScriptOtherUtf8FourBytes,
+
+  kScriptGreek,
+  kScriptCyrillic,
+  kScriptHebrew,
+  kScriptArabic,
+  kScriptHangulJamo,  // Used primarily for Korean.
+  kScriptHiragana,    // Used primarily for Japanese.
+  kScriptKatakana,    // Used primarily for Japanese.
+
+  // Add new scripts here.
+
+  // Do not add any script after kNumRelevantScripts.  This value indicates the
+  // number of elements in this enum Script (except this value) such that we can
+  // easily iterate over the scripts.
+  kNumRelevantScripts,
+};
+
+template<typename IntType>
+inline bool InRange(IntType value, IntType low, IntType hi) {
+  return (value >= low) && (value <= hi);
+}
+
+// Returns Script for the UTF8 character that starts at address p.
+// Precondition: p points to a valid UTF8 character of num_bytes bytes.
+inline Script GetScript(const unsigned char *p, int num_bytes) {
+  switch (num_bytes) {
+    case 1:
+      return kScriptOtherUtf8OneByte;
+
+    case 2: {
+      // 2-byte UTF8 characters have 11 bits of information.  unsigned int has
+      // at least 16 bits (http://en.cppreference.com/w/cpp/language/types) so
+      // it's enough.  It's also usually the fastest int type on the current
+      // CPU, so it's better to use than int32.
+      static const unsigned int kGreekStart = 0x370;
+
+      // Commented out (unsued in the code): kGreekEnd = 0x3FF;
+      static const unsigned int kCyrillicStart = 0x400;
+      static const unsigned int kCyrillicEnd = 0x4FF;
+      static const unsigned int kHebrewStart = 0x590;
+
+      // Commented out (unsued in the code): kHebrewEnd = 0x5FF;
+      static const unsigned int kArabicStart = 0x600;
+      static const unsigned int kArabicEnd = 0x6FF;
+      const unsigned int codepoint = ((p[0] & 0x1F) << 6) | (p[1] & 0x3F);
+      if (codepoint > kCyrillicEnd) {
+        if (codepoint >= kArabicStart) {
+          if (codepoint <= kArabicEnd) {
+            return kScriptArabic;
+          }
+        } else {
+          // At this point, codepoint < kArabicStart = kHebrewEnd + 1, so
+          // codepoint <= kHebrewEnd.
+          if (codepoint >= kHebrewStart) {
+            return kScriptHebrew;
+          }
+        }
+      } else {
+        if (codepoint >= kCyrillicStart) {
+          return kScriptCyrillic;
+        } else {
+          // At this point, codepoint < kCyrillicStart = kGreekEnd + 1, so
+          // codepoint <= kGreekEnd.
+          if (codepoint >= kGreekStart) {
+            return kScriptGreek;
+          }
+        }
+      }
+      return kScriptOtherUtf8TwoBytes;
+    }
+
+    case 3: {
+      // 3-byte UTF8 characters have 16 bits of information.  unsigned int has
+      // at least 16 bits.
+      static const unsigned int kHangulJamoStart = 0x1100;
+      static const unsigned int kHangulJamoEnd = 0x11FF;
+      static const unsigned int kHiraganaStart = 0x3041;
+      static const unsigned int kHiraganaEnd = 0x309F;
+
+      // Commented out (unsued in the code): kKatakanaStart = 0x30A0;
+      static const unsigned int kKatakanaEnd = 0x30FF;
+      const unsigned int codepoint =
+          ((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F);
+      if (codepoint > kHiraganaEnd) {
+        // On this branch, codepoint > kHiraganaEnd = kKatakanaStart - 1, so
+        // codepoint >= kKatakanaStart.
+        if (codepoint <= kKatakanaEnd) {
+          return kScriptKatakana;
+        }
+      } else {
+        if (codepoint >= kHiraganaStart) {
+          return kScriptHiragana;
+        } else {
+          if (InRange(codepoint, kHangulJamoStart, kHangulJamoEnd)) {
+            return kScriptHangulJamo;
+          }
+        }
+      }
+      return kScriptOtherUtf8ThreeBytes;
+    }
+
+    case 4:
+      return kScriptOtherUtf8FourBytes;
+
+    default:
+      return kScriptError;
+  }
+}
+
+// Returns Script for the UTF8 character that starts at address p.  Similar to
+// the previous version of GetScript, except for "char" vs "unsigned char".
+// Most code works with "char *" pointers, ignoring the fact that char is
+// unsigned (by default) on most platforms, but signed on iOS.  This code takes
+// care of making sure we always treat chars as unsigned.
+inline Script GetScript(const char *p, int num_bytes) {
+  return GetScript(reinterpret_cast<const unsigned char *>(p),
+                   num_bytes);
+}
+
+class TinyScriptDetector : public ScriptDetector {
+ public:
+  ~TinyScriptDetector() override = default;
+
+  int GetScript(const char *s, int num_bytes) const override {
+    // Add the namespace in indicate that we want to call the method outside
+    // this class, instead of performing an infinite recursive call.
+    return libtextclassifier3::mobile::lang_id::GetScript(s, num_bytes);
+  }
+
+  int GetMaxScript() const override {
+    return kNumRelevantScripts - 1;
+  }
+
+  SAFTM_DEFINE_REGISTRATION_METHOD("tiny-script-detector", TinyScriptDetector);
+};
+
+}  // namespace lang_id
+}  // namespace mobile
+}  // namespace nlp_saft
+
+#endif  // NLP_SAFT_COMPONENTS_LANG_ID_MOBILE_SCRIPT_TINY_SCRIPT_DETECTOR_H_

diff --git a/model-executor.cc b/model-executor.cc
deleted file mode 100644
index 69931cb..0000000
--- a/model-executor.cc
+++ /dev/null

@@ -1,162 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "model-executor.h"
-
-#include "quantization.h"
-#include "util/base/logging.h"
-
-namespace libtextclassifier2 {
-namespace internal {
-bool FromModelSpec(const tflite::Model* model_spec,
-                   std::unique_ptr<const tflite::FlatBufferModel>* model) {
-  *model = tflite::FlatBufferModel::BuildFromModel(model_spec);
-  if (!(*model) || !(*model)->initialized()) {
-    TC_LOG(ERROR) << "Could not build TFLite model from a model spec. ";
-    return false;
-  }
-  return true;
-}
-}  // namespace internal
-
-std::unique_ptr<tflite::Interpreter> ModelExecutor::CreateInterpreter() const {
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  tflite::InterpreterBuilder(*model_, builtins_)(&interpreter);
-  return interpreter;
-}
-
-std::unique_ptr<TFLiteEmbeddingExecutor> TFLiteEmbeddingExecutor::Instance(
-    const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
-    int quantization_bits) {
-  const tflite::Model* model_spec =
-      flatbuffers::GetRoot<tflite::Model>(model_spec_buffer->data());
-  flatbuffers::Verifier verifier(model_spec_buffer->data(),
-                                 model_spec_buffer->Length());
-  std::unique_ptr<const tflite::FlatBufferModel> model;
-  if (!model_spec->Verify(verifier) ||
-      !internal::FromModelSpec(model_spec, &model)) {
-    TC_LOG(ERROR) << "Could not load TFLite model.";
-    return nullptr;
-  }
-
-  std::unique_ptr<tflite::Interpreter> interpreter;
-  tflite::ops::builtin::BuiltinOpResolver builtins;
-  tflite::InterpreterBuilder(*model, builtins)(&interpreter);
-  if (!interpreter) {
-    TC_LOG(ERROR) << "Could not build TFLite interpreter for embeddings.";
-    return nullptr;
-  }
-
-  if (interpreter->tensors_size() != 2) {
-    return nullptr;
-  }
-  const TfLiteTensor* embeddings = interpreter->tensor(0);
-  if (embeddings->dims->size != 2) {
-    return nullptr;
-  }
-  int num_buckets = embeddings->dims->data[0];
-  const TfLiteTensor* scales = interpreter->tensor(1);
-  if (scales->dims->size != 2 || scales->dims->data[0] != num_buckets ||
-      scales->dims->data[1] != 1) {
-    return nullptr;
-  }
-  int bytes_per_embedding = embeddings->dims->data[1];
-  if (!CheckQuantizationParams(bytes_per_embedding, quantization_bits,
-                               embedding_size)) {
-    TC_LOG(ERROR) << "Mismatch in quantization parameters.";
-    return nullptr;
-  }
-
-  return std::unique_ptr<TFLiteEmbeddingExecutor>(new TFLiteEmbeddingExecutor(
-      std::move(model), quantization_bits, num_buckets, bytes_per_embedding,
-      embedding_size, scales, embeddings, std::move(interpreter)));
-}
-
-TFLiteEmbeddingExecutor::TFLiteEmbeddingExecutor(
-    std::unique_ptr<const tflite::FlatBufferModel> model, int quantization_bits,
-    int num_buckets, int bytes_per_embedding, int output_embedding_size,
-    const TfLiteTensor* scales, const TfLiteTensor* embeddings,
-    std::unique_ptr<tflite::Interpreter> interpreter)
-    : model_(std::move(model)),
-      quantization_bits_(quantization_bits),
-      num_buckets_(num_buckets),
-      bytes_per_embedding_(bytes_per_embedding),
-      output_embedding_size_(output_embedding_size),
-      scales_(scales),
-      embeddings_(embeddings),
-      interpreter_(std::move(interpreter)) {}
-
-bool TFLiteEmbeddingExecutor::AddEmbedding(
-    const TensorView<int>& sparse_features, float* dest, int dest_size) const {
-  if (dest_size != output_embedding_size_) {
-    TC_LOG(ERROR) << "Mismatching dest_size and output_embedding_size: "
-                  << dest_size << " " << output_embedding_size_;
-    return false;
-  }
-  const int num_sparse_features = sparse_features.size();
-  for (int i = 0; i < num_sparse_features; ++i) {
-    const int bucket_id = sparse_features.data()[i];
-    if (bucket_id >= num_buckets_) {
-      return false;
-    }
-
-    if (!DequantizeAdd(scales_->data.f, embeddings_->data.uint8,
-                       bytes_per_embedding_, num_sparse_features,
-                       quantization_bits_, bucket_id, dest, dest_size)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-TensorView<float> ComputeLogitsHelper(const int input_index_features,
-                                      const int output_index_logits,
-                                      const TensorView<float>& features,
-                                      tflite::Interpreter* interpreter) {
-  if (!interpreter) {
-    return TensorView<float>::Invalid();
-  }
-  interpreter->ResizeInputTensor(input_index_features, features.shape());
-  if (interpreter->AllocateTensors() != kTfLiteOk) {
-    TC_VLOG(1) << "Allocation failed.";
-    return TensorView<float>::Invalid();
-  }
-
-  TfLiteTensor* features_tensor =
-      interpreter->tensor(interpreter->inputs()[input_index_features]);
-  int size = 1;
-  for (int i = 0; i < features_tensor->dims->size; ++i) {
-    size *= features_tensor->dims->data[i];
-  }
-  features.copy_to(features_tensor->data.f, size);
-
-  if (interpreter->Invoke() != kTfLiteOk) {
-    TC_VLOG(1) << "Interpreter failed.";
-    return TensorView<float>::Invalid();
-  }
-
-  TfLiteTensor* logits_tensor =
-      interpreter->tensor(interpreter->outputs()[output_index_logits]);
-
-  std::vector<int> output_shape(logits_tensor->dims->size);
-  for (int i = 0; i < logits_tensor->dims->size; ++i) {
-    output_shape[i] = logits_tensor->dims->data[i];
-  }
-
-  return TensorView<float>(logits_tensor->data.f, output_shape);
-}
-
-}  // namespace libtextclassifier2

diff --git a/model-executor.h b/model-executor.h
deleted file mode 100644
index ef6d36f..0000000
--- a/model-executor.h
+++ /dev/null

@@ -1,137 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Contains classes that can execute different models/parts of a model.
-
-#ifndef LIBTEXTCLASSIFIER_MODEL_EXECUTOR_H_
-#define LIBTEXTCLASSIFIER_MODEL_EXECUTOR_H_
-
-#include <memory>
-
-#include "tensor-view.h"
-#include "types.h"
-#include "util/base/logging.h"
-#include "tensorflow/contrib/lite/interpreter.h"
-#include "tensorflow/contrib/lite/kernels/register.h"
-#include "tensorflow/contrib/lite/model.h"
-
-namespace libtextclassifier2 {
-
-namespace internal {
-bool FromModelSpec(const tflite::Model* model_spec,
-                   std::unique_ptr<const tflite::FlatBufferModel>* model);
-}  // namespace internal
-
-// A helper function that given indices of feature and logits tensor, feature
-// values computes the logits using given interpreter.
-TensorView<float> ComputeLogitsHelper(const int input_index_features,
-                                      const int output_index_logits,
-                                      const TensorView<float>& features,
-                                      tflite::Interpreter* interpreter);
-
-// Executor for the text selection prediction and classification models.
-class ModelExecutor {
- public:
-  static std::unique_ptr<const ModelExecutor> Instance(
-      const flatbuffers::Vector<uint8_t>* model_spec_buffer) {
-    const tflite::Model* model =
-        flatbuffers::GetRoot<tflite::Model>(model_spec_buffer->data());
-    flatbuffers::Verifier verifier(model_spec_buffer->data(),
-                                   model_spec_buffer->Length());
-    if (!model->Verify(verifier)) {
-      return nullptr;
-    }
-    return Instance(model);
-  }
-
-  static std::unique_ptr<const ModelExecutor> Instance(
-      const tflite::Model* model_spec) {
-    std::unique_ptr<const tflite::FlatBufferModel> model;
-    if (!internal::FromModelSpec(model_spec, &model)) {
-      return nullptr;
-    }
-    return std::unique_ptr<ModelExecutor>(new ModelExecutor(std::move(model)));
-  }
-
-  // Creates an Interpreter for the model that serves as a scratch-pad for the
-  // inference. The Interpreter is NOT thread-safe.
-  std::unique_ptr<tflite::Interpreter> CreateInterpreter() const;
-
-  TensorView<float> ComputeLogits(const TensorView<float>& features,
-                                  tflite::Interpreter* interpreter) const {
-    return ComputeLogitsHelper(kInputIndexFeatures, kOutputIndexLogits,
-                               features, interpreter);
-  }
-
- protected:
-  explicit ModelExecutor(std::unique_ptr<const tflite::FlatBufferModel> model)
-      : model_(std::move(model)) {}
-
-  static const int kInputIndexFeatures = 0;
-  static const int kOutputIndexLogits = 0;
-
-  std::unique_ptr<const tflite::FlatBufferModel> model_;
-  tflite::ops::builtin::BuiltinOpResolver builtins_;
-};
-
-// Executor for embedding sparse features into a dense vector.
-class EmbeddingExecutor {
- public:
-  virtual ~EmbeddingExecutor() {}
-
-  // Embeds the sparse_features into a dense embedding and adds (+) it
-  // element-wise to the dest vector.
-  virtual bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
-                            int dest_size) const = 0;
-
-  // Returns true when the model is ready to be used, false otherwise.
-  virtual bool IsReady() const { return true; }
-};
-
-class TFLiteEmbeddingExecutor : public EmbeddingExecutor {
- public:
-  static std::unique_ptr<TFLiteEmbeddingExecutor> Instance(
-      const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
-      int quantization_bits);
-
-  bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
-                    int dest_size) const override;
-
- protected:
-  explicit TFLiteEmbeddingExecutor(
-      std::unique_ptr<const tflite::FlatBufferModel> model,
-      int quantization_bits, int num_buckets, int bytes_per_embedding,
-      int output_embedding_size, const TfLiteTensor* scales,
-      const TfLiteTensor* embeddings,
-      std::unique_ptr<tflite::Interpreter> interpreter);
-
-  std::unique_ptr<const tflite::FlatBufferModel> model_;
-
-  int quantization_bits_;
-  int num_buckets_ = -1;
-  int bytes_per_embedding_ = -1;
-  int output_embedding_size_ = -1;
-  const TfLiteTensor* scales_ = nullptr;
-  const TfLiteTensor* embeddings_ = nullptr;
-
-  // NOTE: This interpreter is used in a read-only way (as a storage for the
-  // model params), thus is still thread-safe.
-  std::unique_ptr<tflite::Interpreter> interpreter_;
-};
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_MODEL_EXECUTOR_H_

diff --git a/model_generated.h b/model_generated.h
deleted file mode 100755
index 6ef75f6..0000000
--- a/model_generated.h
+++ /dev/null

@@ -1,3718 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// automatically generated by the FlatBuffers compiler, do not modify
-
-
-#ifndef FLATBUFFERS_GENERATED_MODEL_LIBTEXTCLASSIFIER2_H_
-#define FLATBUFFERS_GENERATED_MODEL_LIBTEXTCLASSIFIER2_H_
-
-#include "flatbuffers/flatbuffers.h"
-
-namespace libtextclassifier2 {
-
-struct CompressedBuffer;
-struct CompressedBufferT;
-
-struct SelectionModelOptions;
-struct SelectionModelOptionsT;
-
-struct ClassificationModelOptions;
-struct ClassificationModelOptionsT;
-
-namespace RegexModel_ {
-
-struct Pattern;
-struct PatternT;
-
-}  // namespace RegexModel_
-
-struct RegexModel;
-struct RegexModelT;
-
-namespace DatetimeModelPattern_ {
-
-struct Regex;
-struct RegexT;
-
-}  // namespace DatetimeModelPattern_
-
-struct DatetimeModelPattern;
-struct DatetimeModelPatternT;
-
-struct DatetimeModelExtractor;
-struct DatetimeModelExtractorT;
-
-struct DatetimeModel;
-struct DatetimeModelT;
-
-namespace DatetimeModelLibrary_ {
-
-struct Item;
-struct ItemT;
-
-}  // namespace DatetimeModelLibrary_
-
-struct DatetimeModelLibrary;
-struct DatetimeModelLibraryT;
-
-struct ModelTriggeringOptions;
-struct ModelTriggeringOptionsT;
-
-struct OutputOptions;
-struct OutputOptionsT;
-
-struct Model;
-struct ModelT;
-
-struct TokenizationCodepointRange;
-struct TokenizationCodepointRangeT;
-
-namespace FeatureProcessorOptions_ {
-
-struct CodepointRange;
-struct CodepointRangeT;
-
-struct BoundsSensitiveFeatures;
-struct BoundsSensitiveFeaturesT;
-
-struct AlternativeCollectionMapEntry;
-struct AlternativeCollectionMapEntryT;
-
-}  // namespace FeatureProcessorOptions_
-
-struct FeatureProcessorOptions;
-struct FeatureProcessorOptionsT;
-
-enum ModeFlag {
-  ModeFlag_NONE = 0,
-  ModeFlag_ANNOTATION = 1,
-  ModeFlag_CLASSIFICATION = 2,
-  ModeFlag_ANNOTATION_AND_CLASSIFICATION = 3,
-  ModeFlag_SELECTION = 4,
-  ModeFlag_ANNOTATION_AND_SELECTION = 5,
-  ModeFlag_CLASSIFICATION_AND_SELECTION = 6,
-  ModeFlag_ALL = 7,
-  ModeFlag_MIN = ModeFlag_NONE,
-  ModeFlag_MAX = ModeFlag_ALL
-};
-
-inline ModeFlag (&EnumValuesModeFlag())[8] {
-  static ModeFlag values[] = {
-    ModeFlag_NONE,
-    ModeFlag_ANNOTATION,
-    ModeFlag_CLASSIFICATION,
-    ModeFlag_ANNOTATION_AND_CLASSIFICATION,
-    ModeFlag_SELECTION,
-    ModeFlag_ANNOTATION_AND_SELECTION,
-    ModeFlag_CLASSIFICATION_AND_SELECTION,
-    ModeFlag_ALL
-  };
-  return values;
-}
-
-inline const char **EnumNamesModeFlag() {
-  static const char *names[] = {
-    "NONE",
-    "ANNOTATION",
-    "CLASSIFICATION",
-    "ANNOTATION_AND_CLASSIFICATION",
-    "SELECTION",
-    "ANNOTATION_AND_SELECTION",
-    "CLASSIFICATION_AND_SELECTION",
-    "ALL",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameModeFlag(ModeFlag e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesModeFlag()[index];
-}
-
-enum DatetimeExtractorType {
-  DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE = 0,
-  DatetimeExtractorType_AM = 1,
-  DatetimeExtractorType_PM = 2,
-  DatetimeExtractorType_JANUARY = 3,
-  DatetimeExtractorType_FEBRUARY = 4,
-  DatetimeExtractorType_MARCH = 5,
-  DatetimeExtractorType_APRIL = 6,
-  DatetimeExtractorType_MAY = 7,
-  DatetimeExtractorType_JUNE = 8,
-  DatetimeExtractorType_JULY = 9,
-  DatetimeExtractorType_AUGUST = 10,
-  DatetimeExtractorType_SEPTEMBER = 11,
-  DatetimeExtractorType_OCTOBER = 12,
-  DatetimeExtractorType_NOVEMBER = 13,
-  DatetimeExtractorType_DECEMBER = 14,
-  DatetimeExtractorType_NEXT = 15,
-  DatetimeExtractorType_NEXT_OR_SAME = 16,
-  DatetimeExtractorType_LAST = 17,
-  DatetimeExtractorType_NOW = 18,
-  DatetimeExtractorType_TOMORROW = 19,
-  DatetimeExtractorType_YESTERDAY = 20,
-  DatetimeExtractorType_PAST = 21,
-  DatetimeExtractorType_FUTURE = 22,
-  DatetimeExtractorType_DAY = 23,
-  DatetimeExtractorType_WEEK = 24,
-  DatetimeExtractorType_MONTH = 25,
-  DatetimeExtractorType_YEAR = 26,
-  DatetimeExtractorType_MONDAY = 27,
-  DatetimeExtractorType_TUESDAY = 28,
-  DatetimeExtractorType_WEDNESDAY = 29,
-  DatetimeExtractorType_THURSDAY = 30,
-  DatetimeExtractorType_FRIDAY = 31,
-  DatetimeExtractorType_SATURDAY = 32,
-  DatetimeExtractorType_SUNDAY = 33,
-  DatetimeExtractorType_DAYS = 34,
-  DatetimeExtractorType_WEEKS = 35,
-  DatetimeExtractorType_MONTHS = 36,
-  DatetimeExtractorType_HOURS = 37,
-  DatetimeExtractorType_MINUTES = 38,
-  DatetimeExtractorType_SECONDS = 39,
-  DatetimeExtractorType_YEARS = 40,
-  DatetimeExtractorType_DIGITS = 41,
-  DatetimeExtractorType_SIGNEDDIGITS = 42,
-  DatetimeExtractorType_ZERO = 43,
-  DatetimeExtractorType_ONE = 44,
-  DatetimeExtractorType_TWO = 45,
-  DatetimeExtractorType_THREE = 46,
-  DatetimeExtractorType_FOUR = 47,
-  DatetimeExtractorType_FIVE = 48,
-  DatetimeExtractorType_SIX = 49,
-  DatetimeExtractorType_SEVEN = 50,
-  DatetimeExtractorType_EIGHT = 51,
-  DatetimeExtractorType_NINE = 52,
-  DatetimeExtractorType_TEN = 53,
-  DatetimeExtractorType_ELEVEN = 54,
-  DatetimeExtractorType_TWELVE = 55,
-  DatetimeExtractorType_THIRTEEN = 56,
-  DatetimeExtractorType_FOURTEEN = 57,
-  DatetimeExtractorType_FIFTEEN = 58,
-  DatetimeExtractorType_SIXTEEN = 59,
-  DatetimeExtractorType_SEVENTEEN = 60,
-  DatetimeExtractorType_EIGHTEEN = 61,
-  DatetimeExtractorType_NINETEEN = 62,
-  DatetimeExtractorType_TWENTY = 63,
-  DatetimeExtractorType_THIRTY = 64,
-  DatetimeExtractorType_FORTY = 65,
-  DatetimeExtractorType_FIFTY = 66,
-  DatetimeExtractorType_SIXTY = 67,
-  DatetimeExtractorType_SEVENTY = 68,
-  DatetimeExtractorType_EIGHTY = 69,
-  DatetimeExtractorType_NINETY = 70,
-  DatetimeExtractorType_HUNDRED = 71,
-  DatetimeExtractorType_THOUSAND = 72,
-  DatetimeExtractorType_MIN = DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE,
-  DatetimeExtractorType_MAX = DatetimeExtractorType_THOUSAND
-};
-
-inline DatetimeExtractorType (&EnumValuesDatetimeExtractorType())[73] {
-  static DatetimeExtractorType values[] = {
-    DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE,
-    DatetimeExtractorType_AM,
-    DatetimeExtractorType_PM,
-    DatetimeExtractorType_JANUARY,
-    DatetimeExtractorType_FEBRUARY,
-    DatetimeExtractorType_MARCH,
-    DatetimeExtractorType_APRIL,
-    DatetimeExtractorType_MAY,
-    DatetimeExtractorType_JUNE,
-    DatetimeExtractorType_JULY,
-    DatetimeExtractorType_AUGUST,
-    DatetimeExtractorType_SEPTEMBER,
-    DatetimeExtractorType_OCTOBER,
-    DatetimeExtractorType_NOVEMBER,
-    DatetimeExtractorType_DECEMBER,
-    DatetimeExtractorType_NEXT,
-    DatetimeExtractorType_NEXT_OR_SAME,
-    DatetimeExtractorType_LAST,
-    DatetimeExtractorType_NOW,
-    DatetimeExtractorType_TOMORROW,
-    DatetimeExtractorType_YESTERDAY,
-    DatetimeExtractorType_PAST,
-    DatetimeExtractorType_FUTURE,
-    DatetimeExtractorType_DAY,
-    DatetimeExtractorType_WEEK,
-    DatetimeExtractorType_MONTH,
-    DatetimeExtractorType_YEAR,
-    DatetimeExtractorType_MONDAY,
-    DatetimeExtractorType_TUESDAY,
-    DatetimeExtractorType_WEDNESDAY,
-    DatetimeExtractorType_THURSDAY,
-    DatetimeExtractorType_FRIDAY,
-    DatetimeExtractorType_SATURDAY,
-    DatetimeExtractorType_SUNDAY,
-    DatetimeExtractorType_DAYS,
-    DatetimeExtractorType_WEEKS,
-    DatetimeExtractorType_MONTHS,
-    DatetimeExtractorType_HOURS,
-    DatetimeExtractorType_MINUTES,
-    DatetimeExtractorType_SECONDS,
-    DatetimeExtractorType_YEARS,
-    DatetimeExtractorType_DIGITS,
-    DatetimeExtractorType_SIGNEDDIGITS,
-    DatetimeExtractorType_ZERO,
-    DatetimeExtractorType_ONE,
-    DatetimeExtractorType_TWO,
-    DatetimeExtractorType_THREE,
-    DatetimeExtractorType_FOUR,
-    DatetimeExtractorType_FIVE,
-    DatetimeExtractorType_SIX,
-    DatetimeExtractorType_SEVEN,
-    DatetimeExtractorType_EIGHT,
-    DatetimeExtractorType_NINE,
-    DatetimeExtractorType_TEN,
-    DatetimeExtractorType_ELEVEN,
-    DatetimeExtractorType_TWELVE,
-    DatetimeExtractorType_THIRTEEN,
-    DatetimeExtractorType_FOURTEEN,
-    DatetimeExtractorType_FIFTEEN,
-    DatetimeExtractorType_SIXTEEN,
-    DatetimeExtractorType_SEVENTEEN,
-    DatetimeExtractorType_EIGHTEEN,
-    DatetimeExtractorType_NINETEEN,
-    DatetimeExtractorType_TWENTY,
-    DatetimeExtractorType_THIRTY,
-    DatetimeExtractorType_FORTY,
-    DatetimeExtractorType_FIFTY,
-    DatetimeExtractorType_SIXTY,
-    DatetimeExtractorType_SEVENTY,
-    DatetimeExtractorType_EIGHTY,
-    DatetimeExtractorType_NINETY,
-    DatetimeExtractorType_HUNDRED,
-    DatetimeExtractorType_THOUSAND
-  };
-  return values;
-}
-
-inline const char **EnumNamesDatetimeExtractorType() {
-  static const char *names[] = {
-    "UNKNOWN_DATETIME_EXTRACTOR_TYPE",
-    "AM",
-    "PM",
-    "JANUARY",
-    "FEBRUARY",
-    "MARCH",
-    "APRIL",
-    "MAY",
-    "JUNE",
-    "JULY",
-    "AUGUST",
-    "SEPTEMBER",
-    "OCTOBER",
-    "NOVEMBER",
-    "DECEMBER",
-    "NEXT",
-    "NEXT_OR_SAME",
-    "LAST",
-    "NOW",
-    "TOMORROW",
-    "YESTERDAY",
-    "PAST",
-    "FUTURE",
-    "DAY",
-    "WEEK",
-    "MONTH",
-    "YEAR",
-    "MONDAY",
-    "TUESDAY",
-    "WEDNESDAY",
-    "THURSDAY",
-    "FRIDAY",
-    "SATURDAY",
-    "SUNDAY",
-    "DAYS",
-    "WEEKS",
-    "MONTHS",
-    "HOURS",
-    "MINUTES",
-    "SECONDS",
-    "YEARS",
-    "DIGITS",
-    "SIGNEDDIGITS",
-    "ZERO",
-    "ONE",
-    "TWO",
-    "THREE",
-    "FOUR",
-    "FIVE",
-    "SIX",
-    "SEVEN",
-    "EIGHT",
-    "NINE",
-    "TEN",
-    "ELEVEN",
-    "TWELVE",
-    "THIRTEEN",
-    "FOURTEEN",
-    "FIFTEEN",
-    "SIXTEEN",
-    "SEVENTEEN",
-    "EIGHTEEN",
-    "NINETEEN",
-    "TWENTY",
-    "THIRTY",
-    "FORTY",
-    "FIFTY",
-    "SIXTY",
-    "SEVENTY",
-    "EIGHTY",
-    "NINETY",
-    "HUNDRED",
-    "THOUSAND",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameDatetimeExtractorType(DatetimeExtractorType e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesDatetimeExtractorType()[index];
-}
-
-enum DatetimeGroupType {
-  DatetimeGroupType_GROUP_UNKNOWN = 0,
-  DatetimeGroupType_GROUP_UNUSED = 1,
-  DatetimeGroupType_GROUP_YEAR = 2,
-  DatetimeGroupType_GROUP_MONTH = 3,
-  DatetimeGroupType_GROUP_DAY = 4,
-  DatetimeGroupType_GROUP_HOUR = 5,
-  DatetimeGroupType_GROUP_MINUTE = 6,
-  DatetimeGroupType_GROUP_SECOND = 7,
-  DatetimeGroupType_GROUP_AMPM = 8,
-  DatetimeGroupType_GROUP_RELATIONDISTANCE = 9,
-  DatetimeGroupType_GROUP_RELATION = 10,
-  DatetimeGroupType_GROUP_RELATIONTYPE = 11,
-  DatetimeGroupType_GROUP_DUMMY1 = 12,
-  DatetimeGroupType_GROUP_DUMMY2 = 13,
-  DatetimeGroupType_MIN = DatetimeGroupType_GROUP_UNKNOWN,
-  DatetimeGroupType_MAX = DatetimeGroupType_GROUP_DUMMY2
-};
-
-inline DatetimeGroupType (&EnumValuesDatetimeGroupType())[14] {
-  static DatetimeGroupType values[] = {
-    DatetimeGroupType_GROUP_UNKNOWN,
-    DatetimeGroupType_GROUP_UNUSED,
-    DatetimeGroupType_GROUP_YEAR,
-    DatetimeGroupType_GROUP_MONTH,
-    DatetimeGroupType_GROUP_DAY,
-    DatetimeGroupType_GROUP_HOUR,
-    DatetimeGroupType_GROUP_MINUTE,
-    DatetimeGroupType_GROUP_SECOND,
-    DatetimeGroupType_GROUP_AMPM,
-    DatetimeGroupType_GROUP_RELATIONDISTANCE,
-    DatetimeGroupType_GROUP_RELATION,
-    DatetimeGroupType_GROUP_RELATIONTYPE,
-    DatetimeGroupType_GROUP_DUMMY1,
-    DatetimeGroupType_GROUP_DUMMY2
-  };
-  return values;
-}
-
-inline const char **EnumNamesDatetimeGroupType() {
-  static const char *names[] = {
-    "GROUP_UNKNOWN",
-    "GROUP_UNUSED",
-    "GROUP_YEAR",
-    "GROUP_MONTH",
-    "GROUP_DAY",
-    "GROUP_HOUR",
-    "GROUP_MINUTE",
-    "GROUP_SECOND",
-    "GROUP_AMPM",
-    "GROUP_RELATIONDISTANCE",
-    "GROUP_RELATION",
-    "GROUP_RELATIONTYPE",
-    "GROUP_DUMMY1",
-    "GROUP_DUMMY2",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameDatetimeGroupType(DatetimeGroupType e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesDatetimeGroupType()[index];
-}
-
-namespace TokenizationCodepointRange_ {
-
-enum Role {
-  Role_DEFAULT_ROLE = 0,
-  Role_SPLIT_BEFORE = 1,
-  Role_SPLIT_AFTER = 2,
-  Role_TOKEN_SEPARATOR = 3,
-  Role_DISCARD_CODEPOINT = 4,
-  Role_WHITESPACE_SEPARATOR = 7,
-  Role_MIN = Role_DEFAULT_ROLE,
-  Role_MAX = Role_WHITESPACE_SEPARATOR
-};
-
-inline Role (&EnumValuesRole())[6] {
-  static Role values[] = {
-    Role_DEFAULT_ROLE,
-    Role_SPLIT_BEFORE,
-    Role_SPLIT_AFTER,
-    Role_TOKEN_SEPARATOR,
-    Role_DISCARD_CODEPOINT,
-    Role_WHITESPACE_SEPARATOR
-  };
-  return values;
-}
-
-inline const char **EnumNamesRole() {
-  static const char *names[] = {
-    "DEFAULT_ROLE",
-    "SPLIT_BEFORE",
-    "SPLIT_AFTER",
-    "TOKEN_SEPARATOR",
-    "DISCARD_CODEPOINT",
-    "",
-    "",
-    "WHITESPACE_SEPARATOR",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameRole(Role e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesRole()[index];
-}
-
-}  // namespace TokenizationCodepointRange_
-
-namespace FeatureProcessorOptions_ {
-
-enum CenterTokenSelectionMethod {
-  CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD = 0,
-  CenterTokenSelectionMethod_CENTER_TOKEN_FROM_CLICK = 1,
-  CenterTokenSelectionMethod_CENTER_TOKEN_MIDDLE_OF_SELECTION = 2,
-  CenterTokenSelectionMethod_MIN = CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD,
-  CenterTokenSelectionMethod_MAX = CenterTokenSelectionMethod_CENTER_TOKEN_MIDDLE_OF_SELECTION
-};
-
-inline CenterTokenSelectionMethod (&EnumValuesCenterTokenSelectionMethod())[3] {
-  static CenterTokenSelectionMethod values[] = {
-    CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD,
-    CenterTokenSelectionMethod_CENTER_TOKEN_FROM_CLICK,
-    CenterTokenSelectionMethod_CENTER_TOKEN_MIDDLE_OF_SELECTION
-  };
-  return values;
-}
-
-inline const char **EnumNamesCenterTokenSelectionMethod() {
-  static const char *names[] = {
-    "DEFAULT_CENTER_TOKEN_METHOD",
-    "CENTER_TOKEN_FROM_CLICK",
-    "CENTER_TOKEN_MIDDLE_OF_SELECTION",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameCenterTokenSelectionMethod(CenterTokenSelectionMethod e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesCenterTokenSelectionMethod()[index];
-}
-
-enum TokenizationType {
-  TokenizationType_INVALID_TOKENIZATION_TYPE = 0,
-  TokenizationType_INTERNAL_TOKENIZER = 1,
-  TokenizationType_ICU = 2,
-  TokenizationType_MIXED = 3,
-  TokenizationType_MIN = TokenizationType_INVALID_TOKENIZATION_TYPE,
-  TokenizationType_MAX = TokenizationType_MIXED
-};
-
-inline TokenizationType (&EnumValuesTokenizationType())[4] {
-  static TokenizationType values[] = {
-    TokenizationType_INVALID_TOKENIZATION_TYPE,
-    TokenizationType_INTERNAL_TOKENIZER,
-    TokenizationType_ICU,
-    TokenizationType_MIXED
-  };
-  return values;
-}
-
-inline const char **EnumNamesTokenizationType() {
-  static const char *names[] = {
-    "INVALID_TOKENIZATION_TYPE",
-    "INTERNAL_TOKENIZER",
-    "ICU",
-    "MIXED",
-    nullptr
-  };
-  return names;
-}
-
-inline const char *EnumNameTokenizationType(TokenizationType e) {
-  const size_t index = static_cast<int>(e);
-  return EnumNamesTokenizationType()[index];
-}
-
-}  // namespace FeatureProcessorOptions_
-
-struct CompressedBufferT : public flatbuffers::NativeTable {
-  typedef CompressedBuffer TableType;
-  std::vector<uint8_t> buffer;
-  int32_t uncompressed_size;
-  CompressedBufferT()
-      : uncompressed_size(0) {
-  }
-};
-
-struct CompressedBuffer FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CompressedBufferT NativeTableType;
-  enum {
-    VT_BUFFER = 4,
-    VT_UNCOMPRESSED_SIZE = 6
-  };
-  const flatbuffers::Vector<uint8_t> *buffer() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_BUFFER);
-  }
-  int32_t uncompressed_size() const {
-    return GetField<int32_t>(VT_UNCOMPRESSED_SIZE, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_BUFFER) &&
-           verifier.Verify(buffer()) &&
-           VerifyField<int32_t>(verifier, VT_UNCOMPRESSED_SIZE) &&
-           verifier.EndTable();
-  }
-  CompressedBufferT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CompressedBufferT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CompressedBuffer> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CompressedBufferT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CompressedBufferBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_buffer(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> buffer) {
-    fbb_.AddOffset(CompressedBuffer::VT_BUFFER, buffer);
-  }
-  void add_uncompressed_size(int32_t uncompressed_size) {
-    fbb_.AddElement<int32_t>(CompressedBuffer::VT_UNCOMPRESSED_SIZE, uncompressed_size, 0);
-  }
-  explicit CompressedBufferBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CompressedBufferBuilder &operator=(const CompressedBufferBuilder &);
-  flatbuffers::Offset<CompressedBuffer> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CompressedBuffer>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CompressedBuffer> CreateCompressedBuffer(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> buffer = 0,
-    int32_t uncompressed_size = 0) {
-  CompressedBufferBuilder builder_(_fbb);
-  builder_.add_uncompressed_size(uncompressed_size);
-  builder_.add_buffer(buffer);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<CompressedBuffer> CreateCompressedBufferDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<uint8_t> *buffer = nullptr,
-    int32_t uncompressed_size = 0) {
-  return libtextclassifier2::CreateCompressedBuffer(
-      _fbb,
-      buffer ? _fbb.CreateVector<uint8_t>(*buffer) : 0,
-      uncompressed_size);
-}
-
-flatbuffers::Offset<CompressedBuffer> CreateCompressedBuffer(flatbuffers::FlatBufferBuilder &_fbb, const CompressedBufferT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct SelectionModelOptionsT : public flatbuffers::NativeTable {
-  typedef SelectionModelOptions TableType;
-  bool strip_unpaired_brackets;
-  int32_t symmetry_context_size;
-  int32_t batch_size;
-  bool always_classify_suggested_selection;
-  SelectionModelOptionsT()
-      : strip_unpaired_brackets(true),
-        symmetry_context_size(0),
-        batch_size(1024),
-        always_classify_suggested_selection(false) {
-  }
-};
-
-struct SelectionModelOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef SelectionModelOptionsT NativeTableType;
-  enum {
-    VT_STRIP_UNPAIRED_BRACKETS = 4,
-    VT_SYMMETRY_CONTEXT_SIZE = 6,
-    VT_BATCH_SIZE = 8,
-    VT_ALWAYS_CLASSIFY_SUGGESTED_SELECTION = 10
-  };
-  bool strip_unpaired_brackets() const {
-    return GetField<uint8_t>(VT_STRIP_UNPAIRED_BRACKETS, 1) != 0;
-  }
-  int32_t symmetry_context_size() const {
-    return GetField<int32_t>(VT_SYMMETRY_CONTEXT_SIZE, 0);
-  }
-  int32_t batch_size() const {
-    return GetField<int32_t>(VT_BATCH_SIZE, 1024);
-  }
-  bool always_classify_suggested_selection() const {
-    return GetField<uint8_t>(VT_ALWAYS_CLASSIFY_SUGGESTED_SELECTION, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_STRIP_UNPAIRED_BRACKETS) &&
-           VerifyField<int32_t>(verifier, VT_SYMMETRY_CONTEXT_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_BATCH_SIZE) &&
-           VerifyField<uint8_t>(verifier, VT_ALWAYS_CLASSIFY_SUGGESTED_SELECTION) &&
-           verifier.EndTable();
-  }
-  SelectionModelOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(SelectionModelOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<SelectionModelOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectionModelOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct SelectionModelOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_strip_unpaired_brackets(bool strip_unpaired_brackets) {
-    fbb_.AddElement<uint8_t>(SelectionModelOptions::VT_STRIP_UNPAIRED_BRACKETS, static_cast<uint8_t>(strip_unpaired_brackets), 1);
-  }
-  void add_symmetry_context_size(int32_t symmetry_context_size) {
-    fbb_.AddElement<int32_t>(SelectionModelOptions::VT_SYMMETRY_CONTEXT_SIZE, symmetry_context_size, 0);
-  }
-  void add_batch_size(int32_t batch_size) {
-    fbb_.AddElement<int32_t>(SelectionModelOptions::VT_BATCH_SIZE, batch_size, 1024);
-  }
-  void add_always_classify_suggested_selection(bool always_classify_suggested_selection) {
-    fbb_.AddElement<uint8_t>(SelectionModelOptions::VT_ALWAYS_CLASSIFY_SUGGESTED_SELECTION, static_cast<uint8_t>(always_classify_suggested_selection), 0);
-  }
-  explicit SelectionModelOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  SelectionModelOptionsBuilder &operator=(const SelectionModelOptionsBuilder &);
-  flatbuffers::Offset<SelectionModelOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<SelectionModelOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<SelectionModelOptions> CreateSelectionModelOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool strip_unpaired_brackets = true,
-    int32_t symmetry_context_size = 0,
-    int32_t batch_size = 1024,
-    bool always_classify_suggested_selection = false) {
-  SelectionModelOptionsBuilder builder_(_fbb);
-  builder_.add_batch_size(batch_size);
-  builder_.add_symmetry_context_size(symmetry_context_size);
-  builder_.add_always_classify_suggested_selection(always_classify_suggested_selection);
-  builder_.add_strip_unpaired_brackets(strip_unpaired_brackets);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<SelectionModelOptions> CreateSelectionModelOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectionModelOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ClassificationModelOptionsT : public flatbuffers::NativeTable {
-  typedef ClassificationModelOptions TableType;
-  int32_t phone_min_num_digits;
-  int32_t phone_max_num_digits;
-  int32_t address_min_num_tokens;
-  int32_t max_num_tokens;
-  ClassificationModelOptionsT()
-      : phone_min_num_digits(7),
-        phone_max_num_digits(15),
-        address_min_num_tokens(0),
-        max_num_tokens(-1) {
-  }
-};
-
-struct ClassificationModelOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ClassificationModelOptionsT NativeTableType;
-  enum {
-    VT_PHONE_MIN_NUM_DIGITS = 4,
-    VT_PHONE_MAX_NUM_DIGITS = 6,
-    VT_ADDRESS_MIN_NUM_TOKENS = 8,
-    VT_MAX_NUM_TOKENS = 10
-  };
-  int32_t phone_min_num_digits() const {
-    return GetField<int32_t>(VT_PHONE_MIN_NUM_DIGITS, 7);
-  }
-  int32_t phone_max_num_digits() const {
-    return GetField<int32_t>(VT_PHONE_MAX_NUM_DIGITS, 15);
-  }
-  int32_t address_min_num_tokens() const {
-    return GetField<int32_t>(VT_ADDRESS_MIN_NUM_TOKENS, 0);
-  }
-  int32_t max_num_tokens() const {
-    return GetField<int32_t>(VT_MAX_NUM_TOKENS, -1);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_PHONE_MIN_NUM_DIGITS) &&
-           VerifyField<int32_t>(verifier, VT_PHONE_MAX_NUM_DIGITS) &&
-           VerifyField<int32_t>(verifier, VT_ADDRESS_MIN_NUM_TOKENS) &&
-           VerifyField<int32_t>(verifier, VT_MAX_NUM_TOKENS) &&
-           verifier.EndTable();
-  }
-  ClassificationModelOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ClassificationModelOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ClassificationModelOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ClassificationModelOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ClassificationModelOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_phone_min_num_digits(int32_t phone_min_num_digits) {
-    fbb_.AddElement<int32_t>(ClassificationModelOptions::VT_PHONE_MIN_NUM_DIGITS, phone_min_num_digits, 7);
-  }
-  void add_phone_max_num_digits(int32_t phone_max_num_digits) {
-    fbb_.AddElement<int32_t>(ClassificationModelOptions::VT_PHONE_MAX_NUM_DIGITS, phone_max_num_digits, 15);
-  }
-  void add_address_min_num_tokens(int32_t address_min_num_tokens) {
-    fbb_.AddElement<int32_t>(ClassificationModelOptions::VT_ADDRESS_MIN_NUM_TOKENS, address_min_num_tokens, 0);
-  }
-  void add_max_num_tokens(int32_t max_num_tokens) {
-    fbb_.AddElement<int32_t>(ClassificationModelOptions::VT_MAX_NUM_TOKENS, max_num_tokens, -1);
-  }
-  explicit ClassificationModelOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ClassificationModelOptionsBuilder &operator=(const ClassificationModelOptionsBuilder &);
-  flatbuffers::Offset<ClassificationModelOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ClassificationModelOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ClassificationModelOptions> CreateClassificationModelOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t phone_min_num_digits = 7,
-    int32_t phone_max_num_digits = 15,
-    int32_t address_min_num_tokens = 0,
-    int32_t max_num_tokens = -1) {
-  ClassificationModelOptionsBuilder builder_(_fbb);
-  builder_.add_max_num_tokens(max_num_tokens);
-  builder_.add_address_min_num_tokens(address_min_num_tokens);
-  builder_.add_phone_max_num_digits(phone_max_num_digits);
-  builder_.add_phone_min_num_digits(phone_min_num_digits);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ClassificationModelOptions> CreateClassificationModelOptions(flatbuffers::FlatBufferBuilder &_fbb, const ClassificationModelOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-namespace RegexModel_ {
-
-struct PatternT : public flatbuffers::NativeTable {
-  typedef Pattern TableType;
-  std::string collection_name;
-  std::string pattern;
-  libtextclassifier2::ModeFlag enabled_modes;
-  float target_classification_score;
-  float priority_score;
-  bool use_approximate_matching;
-  std::unique_ptr<libtextclassifier2::CompressedBufferT> compressed_pattern;
-  PatternT()
-      : enabled_modes(libtextclassifier2::ModeFlag_ALL),
-        target_classification_score(1.0f),
-        priority_score(0.0f),
-        use_approximate_matching(false) {
-  }
-};
-
-struct Pattern FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef PatternT NativeTableType;
-  enum {
-    VT_COLLECTION_NAME = 4,
-    VT_PATTERN = 6,
-    VT_ENABLED_MODES = 8,
-    VT_TARGET_CLASSIFICATION_SCORE = 10,
-    VT_PRIORITY_SCORE = 12,
-    VT_USE_APPROXIMATE_MATCHING = 14,
-    VT_COMPRESSED_PATTERN = 16
-  };
-  const flatbuffers::String *collection_name() const {
-    return GetPointer<const flatbuffers::String *>(VT_COLLECTION_NAME);
-  }
-  const flatbuffers::String *pattern() const {
-    return GetPointer<const flatbuffers::String *>(VT_PATTERN);
-  }
-  libtextclassifier2::ModeFlag enabled_modes() const {
-    return static_cast<libtextclassifier2::ModeFlag>(GetField<int32_t>(VT_ENABLED_MODES, 7));
-  }
-  float target_classification_score() const {
-    return GetField<float>(VT_TARGET_CLASSIFICATION_SCORE, 1.0f);
-  }
-  float priority_score() const {
-    return GetField<float>(VT_PRIORITY_SCORE, 0.0f);
-  }
-  bool use_approximate_matching() const {
-    return GetField<uint8_t>(VT_USE_APPROXIMATE_MATCHING, 0) != 0;
-  }
-  const libtextclassifier2::CompressedBuffer *compressed_pattern() const {
-    return GetPointer<const libtextclassifier2::CompressedBuffer *>(VT_COMPRESSED_PATTERN);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_COLLECTION_NAME) &&
-           verifier.Verify(collection_name()) &&
-           VerifyOffset(verifier, VT_PATTERN) &&
-           verifier.Verify(pattern()) &&
-           VerifyField<int32_t>(verifier, VT_ENABLED_MODES) &&
-           VerifyField<float>(verifier, VT_TARGET_CLASSIFICATION_SCORE) &&
-           VerifyField<float>(verifier, VT_PRIORITY_SCORE) &&
-           VerifyField<uint8_t>(verifier, VT_USE_APPROXIMATE_MATCHING) &&
-           VerifyOffset(verifier, VT_COMPRESSED_PATTERN) &&
-           verifier.VerifyTable(compressed_pattern()) &&
-           verifier.EndTable();
-  }
-  PatternT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(PatternT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Pattern> Pack(flatbuffers::FlatBufferBuilder &_fbb, const PatternT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct PatternBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_collection_name(flatbuffers::Offset<flatbuffers::String> collection_name) {
-    fbb_.AddOffset(Pattern::VT_COLLECTION_NAME, collection_name);
-  }
-  void add_pattern(flatbuffers::Offset<flatbuffers::String> pattern) {
-    fbb_.AddOffset(Pattern::VT_PATTERN, pattern);
-  }
-  void add_enabled_modes(libtextclassifier2::ModeFlag enabled_modes) {
-    fbb_.AddElement<int32_t>(Pattern::VT_ENABLED_MODES, static_cast<int32_t>(enabled_modes), 7);
-  }
-  void add_target_classification_score(float target_classification_score) {
-    fbb_.AddElement<float>(Pattern::VT_TARGET_CLASSIFICATION_SCORE, target_classification_score, 1.0f);
-  }
-  void add_priority_score(float priority_score) {
-    fbb_.AddElement<float>(Pattern::VT_PRIORITY_SCORE, priority_score, 0.0f);
-  }
-  void add_use_approximate_matching(bool use_approximate_matching) {
-    fbb_.AddElement<uint8_t>(Pattern::VT_USE_APPROXIMATE_MATCHING, static_cast<uint8_t>(use_approximate_matching), 0);
-  }
-  void add_compressed_pattern(flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern) {
-    fbb_.AddOffset(Pattern::VT_COMPRESSED_PATTERN, compressed_pattern);
-  }
-  explicit PatternBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  PatternBuilder &operator=(const PatternBuilder &);
-  flatbuffers::Offset<Pattern> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Pattern>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Pattern> CreatePattern(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> collection_name = 0,
-    flatbuffers::Offset<flatbuffers::String> pattern = 0,
-    libtextclassifier2::ModeFlag enabled_modes = libtextclassifier2::ModeFlag_ALL,
-    float target_classification_score = 1.0f,
-    float priority_score = 0.0f,
-    bool use_approximate_matching = false,
-    flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern = 0) {
-  PatternBuilder builder_(_fbb);
-  builder_.add_compressed_pattern(compressed_pattern);
-  builder_.add_priority_score(priority_score);
-  builder_.add_target_classification_score(target_classification_score);
-  builder_.add_enabled_modes(enabled_modes);
-  builder_.add_pattern(pattern);
-  builder_.add_collection_name(collection_name);
-  builder_.add_use_approximate_matching(use_approximate_matching);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Pattern> CreatePatternDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *collection_name = nullptr,
-    const char *pattern = nullptr,
-    libtextclassifier2::ModeFlag enabled_modes = libtextclassifier2::ModeFlag_ALL,
-    float target_classification_score = 1.0f,
-    float priority_score = 0.0f,
-    bool use_approximate_matching = false,
-    flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern = 0) {
-  return libtextclassifier2::RegexModel_::CreatePattern(
-      _fbb,
-      collection_name ? _fbb.CreateString(collection_name) : 0,
-      pattern ? _fbb.CreateString(pattern) : 0,
-      enabled_modes,
-      target_classification_score,
-      priority_score,
-      use_approximate_matching,
-      compressed_pattern);
-}
-
-flatbuffers::Offset<Pattern> CreatePattern(flatbuffers::FlatBufferBuilder &_fbb, const PatternT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-}  // namespace RegexModel_
-
-struct RegexModelT : public flatbuffers::NativeTable {
-  typedef RegexModel TableType;
-  std::vector<std::unique_ptr<libtextclassifier2::RegexModel_::PatternT>> patterns;
-  RegexModelT() {
-  }
-};
-
-struct RegexModel FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef RegexModelT NativeTableType;
-  enum {
-    VT_PATTERNS = 4
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>> *patterns() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>> *>(VT_PATTERNS);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_PATTERNS) &&
-           verifier.Verify(patterns()) &&
-           verifier.VerifyVectorOfTables(patterns()) &&
-           verifier.EndTable();
-  }
-  RegexModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RegexModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<RegexModel> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RegexModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct RegexModelBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_patterns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>>> patterns) {
-    fbb_.AddOffset(RegexModel::VT_PATTERNS, patterns);
-  }
-  explicit RegexModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  RegexModelBuilder &operator=(const RegexModelBuilder &);
-  flatbuffers::Offset<RegexModel> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<RegexModel>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<RegexModel> CreateRegexModel(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>>> patterns = 0) {
-  RegexModelBuilder builder_(_fbb);
-  builder_.add_patterns(patterns);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<RegexModel> CreateRegexModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>> *patterns = nullptr) {
-  return libtextclassifier2::CreateRegexModel(
-      _fbb,
-      patterns ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>>(*patterns) : 0);
-}
-
-flatbuffers::Offset<RegexModel> CreateRegexModel(flatbuffers::FlatBufferBuilder &_fbb, const RegexModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-namespace DatetimeModelPattern_ {
-
-struct RegexT : public flatbuffers::NativeTable {
-  typedef Regex TableType;
-  std::string pattern;
-  std::vector<libtextclassifier2::DatetimeGroupType> groups;
-  std::unique_ptr<libtextclassifier2::CompressedBufferT> compressed_pattern;
-  RegexT() {
-  }
-};
-
-struct Regex FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef RegexT NativeTableType;
-  enum {
-    VT_PATTERN = 4,
-    VT_GROUPS = 6,
-    VT_COMPRESSED_PATTERN = 8
-  };
-  const flatbuffers::String *pattern() const {
-    return GetPointer<const flatbuffers::String *>(VT_PATTERN);
-  }
-  const flatbuffers::Vector<int32_t> *groups() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_GROUPS);
-  }
-  const libtextclassifier2::CompressedBuffer *compressed_pattern() const {
-    return GetPointer<const libtextclassifier2::CompressedBuffer *>(VT_COMPRESSED_PATTERN);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_PATTERN) &&
-           verifier.Verify(pattern()) &&
-           VerifyOffset(verifier, VT_GROUPS) &&
-           verifier.Verify(groups()) &&
-           VerifyOffset(verifier, VT_COMPRESSED_PATTERN) &&
-           verifier.VerifyTable(compressed_pattern()) &&
-           verifier.EndTable();
-  }
-  RegexT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(RegexT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Regex> Pack(flatbuffers::FlatBufferBuilder &_fbb, const RegexT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct RegexBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_pattern(flatbuffers::Offset<flatbuffers::String> pattern) {
-    fbb_.AddOffset(Regex::VT_PATTERN, pattern);
-  }
-  void add_groups(flatbuffers::Offset<flatbuffers::Vector<int32_t>> groups) {
-    fbb_.AddOffset(Regex::VT_GROUPS, groups);
-  }
-  void add_compressed_pattern(flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern) {
-    fbb_.AddOffset(Regex::VT_COMPRESSED_PATTERN, compressed_pattern);
-  }
-  explicit RegexBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  RegexBuilder &operator=(const RegexBuilder &);
-  flatbuffers::Offset<Regex> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Regex>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Regex> CreateRegex(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> pattern = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> groups = 0,
-    flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern = 0) {
-  RegexBuilder builder_(_fbb);
-  builder_.add_compressed_pattern(compressed_pattern);
-  builder_.add_groups(groups);
-  builder_.add_pattern(pattern);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Regex> CreateRegexDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *pattern = nullptr,
-    const std::vector<int32_t> *groups = nullptr,
-    flatbuffers::Offset<libtextclassifier2::CompressedBuffer> compressed_pattern = 0) {
-  return libtextclassifier2::DatetimeModelPattern_::CreateRegex(
-      _fbb,
-      pattern ? _fbb.CreateString(pattern) : 0,
-      groups ? _fbb.CreateVector<int32_t>(*groups) : 0,
-      compressed_pattern);
-}
-
-flatbuffers::Offset<Regex> CreateRegex(flatbuffers::FlatBufferBuilder &_fbb, const RegexT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-}  // namespace DatetimeModelPattern_
-
-struct DatetimeModelPatternT : public flatbuffers::NativeTable {
-  typedef DatetimeModelPattern TableType;
-  std::vector<std::unique_ptr<libtextclassifier2::DatetimeModelPattern_::RegexT>> regexes;
-  std::vector<int32_t> locales;
-  float target_classification_score;
-  float priority_score;
-  ModeFlag enabled_modes;
-  DatetimeModelPatternT()
-      : target_classification_score(1.0f),
-        priority_score(0.0f),
-        enabled_modes(ModeFlag_ALL) {
-  }
-};
-
-struct DatetimeModelPattern FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DatetimeModelPatternT NativeTableType;
-  enum {
-    VT_REGEXES = 4,
-    VT_LOCALES = 6,
-    VT_TARGET_CLASSIFICATION_SCORE = 8,
-    VT_PRIORITY_SCORE = 10,
-    VT_ENABLED_MODES = 12
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>> *regexes() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>> *>(VT_REGEXES);
-  }
-  const flatbuffers::Vector<int32_t> *locales() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_LOCALES);
-  }
-  float target_classification_score() const {
-    return GetField<float>(VT_TARGET_CLASSIFICATION_SCORE, 1.0f);
-  }
-  float priority_score() const {
-    return GetField<float>(VT_PRIORITY_SCORE, 0.0f);
-  }
-  ModeFlag enabled_modes() const {
-    return static_cast<ModeFlag>(GetField<int32_t>(VT_ENABLED_MODES, 7));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_REGEXES) &&
-           verifier.Verify(regexes()) &&
-           verifier.VerifyVectorOfTables(regexes()) &&
-           VerifyOffset(verifier, VT_LOCALES) &&
-           verifier.Verify(locales()) &&
-           VerifyField<float>(verifier, VT_TARGET_CLASSIFICATION_SCORE) &&
-           VerifyField<float>(verifier, VT_PRIORITY_SCORE) &&
-           VerifyField<int32_t>(verifier, VT_ENABLED_MODES) &&
-           verifier.EndTable();
-  }
-  DatetimeModelPatternT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DatetimeModelPatternT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DatetimeModelPattern> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelPatternT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DatetimeModelPatternBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_regexes(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>>> regexes) {
-    fbb_.AddOffset(DatetimeModelPattern::VT_REGEXES, regexes);
-  }
-  void add_locales(flatbuffers::Offset<flatbuffers::Vector<int32_t>> locales) {
-    fbb_.AddOffset(DatetimeModelPattern::VT_LOCALES, locales);
-  }
-  void add_target_classification_score(float target_classification_score) {
-    fbb_.AddElement<float>(DatetimeModelPattern::VT_TARGET_CLASSIFICATION_SCORE, target_classification_score, 1.0f);
-  }
-  void add_priority_score(float priority_score) {
-    fbb_.AddElement<float>(DatetimeModelPattern::VT_PRIORITY_SCORE, priority_score, 0.0f);
-  }
-  void add_enabled_modes(ModeFlag enabled_modes) {
-    fbb_.AddElement<int32_t>(DatetimeModelPattern::VT_ENABLED_MODES, static_cast<int32_t>(enabled_modes), 7);
-  }
-  explicit DatetimeModelPatternBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DatetimeModelPatternBuilder &operator=(const DatetimeModelPatternBuilder &);
-  flatbuffers::Offset<DatetimeModelPattern> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DatetimeModelPattern>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DatetimeModelPattern> CreateDatetimeModelPattern(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>>> regexes = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> locales = 0,
-    float target_classification_score = 1.0f,
-    float priority_score = 0.0f,
-    ModeFlag enabled_modes = ModeFlag_ALL) {
-  DatetimeModelPatternBuilder builder_(_fbb);
-  builder_.add_enabled_modes(enabled_modes);
-  builder_.add_priority_score(priority_score);
-  builder_.add_target_classification_score(target_classification_score);
-  builder_.add_locales(locales);
-  builder_.add_regexes(regexes);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<DatetimeModelPattern> CreateDatetimeModelPatternDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>> *regexes = nullptr,
-    const std::vector<int32_t> *locales = nullptr,
-    float target_classification_score = 1.0f,
-    float priority_score = 0.0f,
-    ModeFlag enabled_modes = ModeFlag_ALL) {
-  return libtextclassifier2::CreateDatetimeModelPattern(
-      _fbb,
-      regexes ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>>(*regexes) : 0,
-      locales ? _fbb.CreateVector<int32_t>(*locales) : 0,
-      target_classification_score,
-      priority_score,
-      enabled_modes);
-}
-
-flatbuffers::Offset<DatetimeModelPattern> CreateDatetimeModelPattern(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelPatternT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DatetimeModelExtractorT : public flatbuffers::NativeTable {
-  typedef DatetimeModelExtractor TableType;
-  DatetimeExtractorType extractor;
-  std::string pattern;
-  std::vector<int32_t> locales;
-  std::unique_ptr<CompressedBufferT> compressed_pattern;
-  DatetimeModelExtractorT()
-      : extractor(DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE) {
-  }
-};
-
-struct DatetimeModelExtractor FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DatetimeModelExtractorT NativeTableType;
-  enum {
-    VT_EXTRACTOR = 4,
-    VT_PATTERN = 6,
-    VT_LOCALES = 8,
-    VT_COMPRESSED_PATTERN = 10
-  };
-  DatetimeExtractorType extractor() const {
-    return static_cast<DatetimeExtractorType>(GetField<int32_t>(VT_EXTRACTOR, 0));
-  }
-  const flatbuffers::String *pattern() const {
-    return GetPointer<const flatbuffers::String *>(VT_PATTERN);
-  }
-  const flatbuffers::Vector<int32_t> *locales() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_LOCALES);
-  }
-  const CompressedBuffer *compressed_pattern() const {
-    return GetPointer<const CompressedBuffer *>(VT_COMPRESSED_PATTERN);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_EXTRACTOR) &&
-           VerifyOffset(verifier, VT_PATTERN) &&
-           verifier.Verify(pattern()) &&
-           VerifyOffset(verifier, VT_LOCALES) &&
-           verifier.Verify(locales()) &&
-           VerifyOffset(verifier, VT_COMPRESSED_PATTERN) &&
-           verifier.VerifyTable(compressed_pattern()) &&
-           verifier.EndTable();
-  }
-  DatetimeModelExtractorT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DatetimeModelExtractorT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DatetimeModelExtractor> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelExtractorT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DatetimeModelExtractorBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_extractor(DatetimeExtractorType extractor) {
-    fbb_.AddElement<int32_t>(DatetimeModelExtractor::VT_EXTRACTOR, static_cast<int32_t>(extractor), 0);
-  }
-  void add_pattern(flatbuffers::Offset<flatbuffers::String> pattern) {
-    fbb_.AddOffset(DatetimeModelExtractor::VT_PATTERN, pattern);
-  }
-  void add_locales(flatbuffers::Offset<flatbuffers::Vector<int32_t>> locales) {
-    fbb_.AddOffset(DatetimeModelExtractor::VT_LOCALES, locales);
-  }
-  void add_compressed_pattern(flatbuffers::Offset<CompressedBuffer> compressed_pattern) {
-    fbb_.AddOffset(DatetimeModelExtractor::VT_COMPRESSED_PATTERN, compressed_pattern);
-  }
-  explicit DatetimeModelExtractorBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DatetimeModelExtractorBuilder &operator=(const DatetimeModelExtractorBuilder &);
-  flatbuffers::Offset<DatetimeModelExtractor> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DatetimeModelExtractor>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DatetimeModelExtractor> CreateDatetimeModelExtractor(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    DatetimeExtractorType extractor = DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE,
-    flatbuffers::Offset<flatbuffers::String> pattern = 0,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> locales = 0,
-    flatbuffers::Offset<CompressedBuffer> compressed_pattern = 0) {
-  DatetimeModelExtractorBuilder builder_(_fbb);
-  builder_.add_compressed_pattern(compressed_pattern);
-  builder_.add_locales(locales);
-  builder_.add_pattern(pattern);
-  builder_.add_extractor(extractor);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<DatetimeModelExtractor> CreateDatetimeModelExtractorDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    DatetimeExtractorType extractor = DatetimeExtractorType_UNKNOWN_DATETIME_EXTRACTOR_TYPE,
-    const char *pattern = nullptr,
-    const std::vector<int32_t> *locales = nullptr,
-    flatbuffers::Offset<CompressedBuffer> compressed_pattern = 0) {
-  return libtextclassifier2::CreateDatetimeModelExtractor(
-      _fbb,
-      extractor,
-      pattern ? _fbb.CreateString(pattern) : 0,
-      locales ? _fbb.CreateVector<int32_t>(*locales) : 0,
-      compressed_pattern);
-}
-
-flatbuffers::Offset<DatetimeModelExtractor> CreateDatetimeModelExtractor(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelExtractorT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct DatetimeModelT : public flatbuffers::NativeTable {
-  typedef DatetimeModel TableType;
-  std::vector<std::string> locales;
-  std::vector<std::unique_ptr<DatetimeModelPatternT>> patterns;
-  std::vector<std::unique_ptr<DatetimeModelExtractorT>> extractors;
-  bool use_extractors_for_locating;
-  std::vector<int32_t> default_locales;
-  DatetimeModelT()
-      : use_extractors_for_locating(true) {
-  }
-};
-
-struct DatetimeModel FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DatetimeModelT NativeTableType;
-  enum {
-    VT_LOCALES = 4,
-    VT_PATTERNS = 6,
-    VT_EXTRACTORS = 8,
-    VT_USE_EXTRACTORS_FOR_LOCATING = 10,
-    VT_DEFAULT_LOCALES = 12
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *locales() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_LOCALES);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<DatetimeModelPattern>> *patterns() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DatetimeModelPattern>> *>(VT_PATTERNS);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<DatetimeModelExtractor>> *extractors() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<DatetimeModelExtractor>> *>(VT_EXTRACTORS);
-  }
-  bool use_extractors_for_locating() const {
-    return GetField<uint8_t>(VT_USE_EXTRACTORS_FOR_LOCATING, 1) != 0;
-  }
-  const flatbuffers::Vector<int32_t> *default_locales() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_DEFAULT_LOCALES);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_LOCALES) &&
-           verifier.Verify(locales()) &&
-           verifier.VerifyVectorOfStrings(locales()) &&
-           VerifyOffset(verifier, VT_PATTERNS) &&
-           verifier.Verify(patterns()) &&
-           verifier.VerifyVectorOfTables(patterns()) &&
-           VerifyOffset(verifier, VT_EXTRACTORS) &&
-           verifier.Verify(extractors()) &&
-           verifier.VerifyVectorOfTables(extractors()) &&
-           VerifyField<uint8_t>(verifier, VT_USE_EXTRACTORS_FOR_LOCATING) &&
-           VerifyOffset(verifier, VT_DEFAULT_LOCALES) &&
-           verifier.Verify(default_locales()) &&
-           verifier.EndTable();
-  }
-  DatetimeModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DatetimeModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DatetimeModel> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DatetimeModelBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_locales(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> locales) {
-    fbb_.AddOffset(DatetimeModel::VT_LOCALES, locales);
-  }
-  void add_patterns(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DatetimeModelPattern>>> patterns) {
-    fbb_.AddOffset(DatetimeModel::VT_PATTERNS, patterns);
-  }
-  void add_extractors(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DatetimeModelExtractor>>> extractors) {
-    fbb_.AddOffset(DatetimeModel::VT_EXTRACTORS, extractors);
-  }
-  void add_use_extractors_for_locating(bool use_extractors_for_locating) {
-    fbb_.AddElement<uint8_t>(DatetimeModel::VT_USE_EXTRACTORS_FOR_LOCATING, static_cast<uint8_t>(use_extractors_for_locating), 1);
-  }
-  void add_default_locales(flatbuffers::Offset<flatbuffers::Vector<int32_t>> default_locales) {
-    fbb_.AddOffset(DatetimeModel::VT_DEFAULT_LOCALES, default_locales);
-  }
-  explicit DatetimeModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DatetimeModelBuilder &operator=(const DatetimeModelBuilder &);
-  flatbuffers::Offset<DatetimeModel> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DatetimeModel>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DatetimeModel> CreateDatetimeModel(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> locales = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DatetimeModelPattern>>> patterns = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<DatetimeModelExtractor>>> extractors = 0,
-    bool use_extractors_for_locating = true,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> default_locales = 0) {
-  DatetimeModelBuilder builder_(_fbb);
-  builder_.add_default_locales(default_locales);
-  builder_.add_extractors(extractors);
-  builder_.add_patterns(patterns);
-  builder_.add_locales(locales);
-  builder_.add_use_extractors_for_locating(use_extractors_for_locating);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<DatetimeModel> CreateDatetimeModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *locales = nullptr,
-    const std::vector<flatbuffers::Offset<DatetimeModelPattern>> *patterns = nullptr,
-    const std::vector<flatbuffers::Offset<DatetimeModelExtractor>> *extractors = nullptr,
-    bool use_extractors_for_locating = true,
-    const std::vector<int32_t> *default_locales = nullptr) {
-  return libtextclassifier2::CreateDatetimeModel(
-      _fbb,
-      locales ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*locales) : 0,
-      patterns ? _fbb.CreateVector<flatbuffers::Offset<DatetimeModelPattern>>(*patterns) : 0,
-      extractors ? _fbb.CreateVector<flatbuffers::Offset<DatetimeModelExtractor>>(*extractors) : 0,
-      use_extractors_for_locating,
-      default_locales ? _fbb.CreateVector<int32_t>(*default_locales) : 0);
-}
-
-flatbuffers::Offset<DatetimeModel> CreateDatetimeModel(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-namespace DatetimeModelLibrary_ {
-
-struct ItemT : public flatbuffers::NativeTable {
-  typedef Item TableType;
-  std::string key;
-  std::unique_ptr<libtextclassifier2::DatetimeModelT> value;
-  ItemT() {
-  }
-};
-
-struct Item FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ItemT NativeTableType;
-  enum {
-    VT_KEY = 4,
-    VT_VALUE = 6
-  };
-  const flatbuffers::String *key() const {
-    return GetPointer<const flatbuffers::String *>(VT_KEY);
-  }
-  const libtextclassifier2::DatetimeModel *value() const {
-    return GetPointer<const libtextclassifier2::DatetimeModel *>(VT_VALUE);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_KEY) &&
-           verifier.Verify(key()) &&
-           VerifyOffset(verifier, VT_VALUE) &&
-           verifier.VerifyTable(value()) &&
-           verifier.EndTable();
-  }
-  ItemT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ItemT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Item> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ItemT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ItemBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_key(flatbuffers::Offset<flatbuffers::String> key) {
-    fbb_.AddOffset(Item::VT_KEY, key);
-  }
-  void add_value(flatbuffers::Offset<libtextclassifier2::DatetimeModel> value) {
-    fbb_.AddOffset(Item::VT_VALUE, value);
-  }
-  explicit ItemBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ItemBuilder &operator=(const ItemBuilder &);
-  flatbuffers::Offset<Item> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Item>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Item> CreateItem(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> key = 0,
-    flatbuffers::Offset<libtextclassifier2::DatetimeModel> value = 0) {
-  ItemBuilder builder_(_fbb);
-  builder_.add_value(value);
-  builder_.add_key(key);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Item> CreateItemDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *key = nullptr,
-    flatbuffers::Offset<libtextclassifier2::DatetimeModel> value = 0) {
-  return libtextclassifier2::DatetimeModelLibrary_::CreateItem(
-      _fbb,
-      key ? _fbb.CreateString(key) : 0,
-      value);
-}
-
-flatbuffers::Offset<Item> CreateItem(flatbuffers::FlatBufferBuilder &_fbb, const ItemT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-}  // namespace DatetimeModelLibrary_
-
-struct DatetimeModelLibraryT : public flatbuffers::NativeTable {
-  typedef DatetimeModelLibrary TableType;
-  std::vector<std::unique_ptr<libtextclassifier2::DatetimeModelLibrary_::ItemT>> models;
-  DatetimeModelLibraryT() {
-  }
-};
-
-struct DatetimeModelLibrary FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef DatetimeModelLibraryT NativeTableType;
-  enum {
-    VT_MODELS = 4
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>> *models() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>> *>(VT_MODELS);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_MODELS) &&
-           verifier.Verify(models()) &&
-           verifier.VerifyVectorOfTables(models()) &&
-           verifier.EndTable();
-  }
-  DatetimeModelLibraryT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(DatetimeModelLibraryT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<DatetimeModelLibrary> Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelLibraryT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct DatetimeModelLibraryBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_models(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>>> models) {
-    fbb_.AddOffset(DatetimeModelLibrary::VT_MODELS, models);
-  }
-  explicit DatetimeModelLibraryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  DatetimeModelLibraryBuilder &operator=(const DatetimeModelLibraryBuilder &);
-  flatbuffers::Offset<DatetimeModelLibrary> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<DatetimeModelLibrary>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<DatetimeModelLibrary> CreateDatetimeModelLibrary(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>>> models = 0) {
-  DatetimeModelLibraryBuilder builder_(_fbb);
-  builder_.add_models(models);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<DatetimeModelLibrary> CreateDatetimeModelLibraryDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>> *models = nullptr) {
-  return libtextclassifier2::CreateDatetimeModelLibrary(
-      _fbb,
-      models ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>>(*models) : 0);
-}
-
-flatbuffers::Offset<DatetimeModelLibrary> CreateDatetimeModelLibrary(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelLibraryT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ModelTriggeringOptionsT : public flatbuffers::NativeTable {
-  typedef ModelTriggeringOptions TableType;
-  float min_annotate_confidence;
-  ModeFlag enabled_modes;
-  ModelTriggeringOptionsT()
-      : min_annotate_confidence(0.0f),
-        enabled_modes(ModeFlag_ALL) {
-  }
-};
-
-struct ModelTriggeringOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ModelTriggeringOptionsT NativeTableType;
-  enum {
-    VT_MIN_ANNOTATE_CONFIDENCE = 4,
-    VT_ENABLED_MODES = 6
-  };
-  float min_annotate_confidence() const {
-    return GetField<float>(VT_MIN_ANNOTATE_CONFIDENCE, 0.0f);
-  }
-  ModeFlag enabled_modes() const {
-    return static_cast<ModeFlag>(GetField<int32_t>(VT_ENABLED_MODES, 7));
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<float>(verifier, VT_MIN_ANNOTATE_CONFIDENCE) &&
-           VerifyField<int32_t>(verifier, VT_ENABLED_MODES) &&
-           verifier.EndTable();
-  }
-  ModelTriggeringOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ModelTriggeringOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<ModelTriggeringOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelTriggeringOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ModelTriggeringOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_min_annotate_confidence(float min_annotate_confidence) {
-    fbb_.AddElement<float>(ModelTriggeringOptions::VT_MIN_ANNOTATE_CONFIDENCE, min_annotate_confidence, 0.0f);
-  }
-  void add_enabled_modes(ModeFlag enabled_modes) {
-    fbb_.AddElement<int32_t>(ModelTriggeringOptions::VT_ENABLED_MODES, static_cast<int32_t>(enabled_modes), 7);
-  }
-  explicit ModelTriggeringOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ModelTriggeringOptionsBuilder &operator=(const ModelTriggeringOptionsBuilder &);
-  flatbuffers::Offset<ModelTriggeringOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<ModelTriggeringOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<ModelTriggeringOptions> CreateModelTriggeringOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    float min_annotate_confidence = 0.0f,
-    ModeFlag enabled_modes = ModeFlag_ALL) {
-  ModelTriggeringOptionsBuilder builder_(_fbb);
-  builder_.add_enabled_modes(enabled_modes);
-  builder_.add_min_annotate_confidence(min_annotate_confidence);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<ModelTriggeringOptions> CreateModelTriggeringOptions(flatbuffers::FlatBufferBuilder &_fbb, const ModelTriggeringOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct OutputOptionsT : public flatbuffers::NativeTable {
-  typedef OutputOptions TableType;
-  std::vector<std::string> filtered_collections_annotation;
-  std::vector<std::string> filtered_collections_classification;
-  std::vector<std::string> filtered_collections_selection;
-  OutputOptionsT() {
-  }
-};
-
-struct OutputOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef OutputOptionsT NativeTableType;
-  enum {
-    VT_FILTERED_COLLECTIONS_ANNOTATION = 4,
-    VT_FILTERED_COLLECTIONS_CLASSIFICATION = 6,
-    VT_FILTERED_COLLECTIONS_SELECTION = 8
-  };
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_annotation() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_FILTERED_COLLECTIONS_ANNOTATION);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_classification() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_FILTERED_COLLECTIONS_CLASSIFICATION);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_selection() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_FILTERED_COLLECTIONS_SELECTION);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_FILTERED_COLLECTIONS_ANNOTATION) &&
-           verifier.Verify(filtered_collections_annotation()) &&
-           verifier.VerifyVectorOfStrings(filtered_collections_annotation()) &&
-           VerifyOffset(verifier, VT_FILTERED_COLLECTIONS_CLASSIFICATION) &&
-           verifier.Verify(filtered_collections_classification()) &&
-           verifier.VerifyVectorOfStrings(filtered_collections_classification()) &&
-           VerifyOffset(verifier, VT_FILTERED_COLLECTIONS_SELECTION) &&
-           verifier.Verify(filtered_collections_selection()) &&
-           verifier.VerifyVectorOfStrings(filtered_collections_selection()) &&
-           verifier.EndTable();
-  }
-  OutputOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(OutputOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<OutputOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const OutputOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct OutputOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_filtered_collections_annotation(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_annotation) {
-    fbb_.AddOffset(OutputOptions::VT_FILTERED_COLLECTIONS_ANNOTATION, filtered_collections_annotation);
-  }
-  void add_filtered_collections_classification(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_classification) {
-    fbb_.AddOffset(OutputOptions::VT_FILTERED_COLLECTIONS_CLASSIFICATION, filtered_collections_classification);
-  }
-  void add_filtered_collections_selection(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_selection) {
-    fbb_.AddOffset(OutputOptions::VT_FILTERED_COLLECTIONS_SELECTION, filtered_collections_selection);
-  }
-  explicit OutputOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  OutputOptionsBuilder &operator=(const OutputOptionsBuilder &);
-  flatbuffers::Offset<OutputOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<OutputOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<OutputOptions> CreateOutputOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_annotation = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_classification = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> filtered_collections_selection = 0) {
-  OutputOptionsBuilder builder_(_fbb);
-  builder_.add_filtered_collections_selection(filtered_collections_selection);
-  builder_.add_filtered_collections_classification(filtered_collections_classification);
-  builder_.add_filtered_collections_annotation(filtered_collections_annotation);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<OutputOptions> CreateOutputOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_annotation = nullptr,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_classification = nullptr,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *filtered_collections_selection = nullptr) {
-  return libtextclassifier2::CreateOutputOptions(
-      _fbb,
-      filtered_collections_annotation ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*filtered_collections_annotation) : 0,
-      filtered_collections_classification ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*filtered_collections_classification) : 0,
-      filtered_collections_selection ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*filtered_collections_selection) : 0);
-}
-
-flatbuffers::Offset<OutputOptions> CreateOutputOptions(flatbuffers::FlatBufferBuilder &_fbb, const OutputOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct ModelT : public flatbuffers::NativeTable {
-  typedef Model TableType;
-  std::string locales;
-  int32_t version;
-  std::string name;
-  std::unique_ptr<FeatureProcessorOptionsT> selection_feature_options;
-  std::unique_ptr<FeatureProcessorOptionsT> classification_feature_options;
-  std::vector<uint8_t> selection_model;
-  std::vector<uint8_t> classification_model;
-  std::vector<uint8_t> embedding_model;
-  std::unique_ptr<SelectionModelOptionsT> selection_options;
-  std::unique_ptr<ClassificationModelOptionsT> classification_options;
-  std::unique_ptr<RegexModelT> regex_model;
-  std::unique_ptr<DatetimeModelT> datetime_model;
-  std::unique_ptr<ModelTriggeringOptionsT> triggering_options;
-  ModeFlag enabled_modes;
-  bool snap_whitespace_selections;
-  std::unique_ptr<OutputOptionsT> output_options;
-  ModelT()
-      : version(0),
-        enabled_modes(ModeFlag_ALL),
-        snap_whitespace_selections(true) {
-  }
-};
-
-struct Model FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef ModelT NativeTableType;
-  enum {
-    VT_LOCALES = 4,
-    VT_VERSION = 6,
-    VT_NAME = 8,
-    VT_SELECTION_FEATURE_OPTIONS = 10,
-    VT_CLASSIFICATION_FEATURE_OPTIONS = 12,
-    VT_SELECTION_MODEL = 14,
-    VT_CLASSIFICATION_MODEL = 16,
-    VT_EMBEDDING_MODEL = 18,
-    VT_SELECTION_OPTIONS = 20,
-    VT_CLASSIFICATION_OPTIONS = 22,
-    VT_REGEX_MODEL = 24,
-    VT_DATETIME_MODEL = 26,
-    VT_TRIGGERING_OPTIONS = 28,
-    VT_ENABLED_MODES = 30,
-    VT_SNAP_WHITESPACE_SELECTIONS = 32,
-    VT_OUTPUT_OPTIONS = 34
-  };
-  const flatbuffers::String *locales() const {
-    return GetPointer<const flatbuffers::String *>(VT_LOCALES);
-  }
-  int32_t version() const {
-    return GetField<int32_t>(VT_VERSION, 0);
-  }
-  const flatbuffers::String *name() const {
-    return GetPointer<const flatbuffers::String *>(VT_NAME);
-  }
-  const FeatureProcessorOptions *selection_feature_options() const {
-    return GetPointer<const FeatureProcessorOptions *>(VT_SELECTION_FEATURE_OPTIONS);
-  }
-  const FeatureProcessorOptions *classification_feature_options() const {
-    return GetPointer<const FeatureProcessorOptions *>(VT_CLASSIFICATION_FEATURE_OPTIONS);
-  }
-  const flatbuffers::Vector<uint8_t> *selection_model() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_SELECTION_MODEL);
-  }
-  const flatbuffers::Vector<uint8_t> *classification_model() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CLASSIFICATION_MODEL);
-  }
-  const flatbuffers::Vector<uint8_t> *embedding_model() const {
-    return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_EMBEDDING_MODEL);
-  }
-  const SelectionModelOptions *selection_options() const {
-    return GetPointer<const SelectionModelOptions *>(VT_SELECTION_OPTIONS);
-  }
-  const ClassificationModelOptions *classification_options() const {
-    return GetPointer<const ClassificationModelOptions *>(VT_CLASSIFICATION_OPTIONS);
-  }
-  const RegexModel *regex_model() const {
-    return GetPointer<const RegexModel *>(VT_REGEX_MODEL);
-  }
-  const DatetimeModel *datetime_model() const {
-    return GetPointer<const DatetimeModel *>(VT_DATETIME_MODEL);
-  }
-  const ModelTriggeringOptions *triggering_options() const {
-    return GetPointer<const ModelTriggeringOptions *>(VT_TRIGGERING_OPTIONS);
-  }
-  ModeFlag enabled_modes() const {
-    return static_cast<ModeFlag>(GetField<int32_t>(VT_ENABLED_MODES, 7));
-  }
-  bool snap_whitespace_selections() const {
-    return GetField<uint8_t>(VT_SNAP_WHITESPACE_SELECTIONS, 1) != 0;
-  }
-  const OutputOptions *output_options() const {
-    return GetPointer<const OutputOptions *>(VT_OUTPUT_OPTIONS);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_LOCALES) &&
-           verifier.Verify(locales()) &&
-           VerifyField<int32_t>(verifier, VT_VERSION) &&
-           VerifyOffset(verifier, VT_NAME) &&
-           verifier.Verify(name()) &&
-           VerifyOffset(verifier, VT_SELECTION_FEATURE_OPTIONS) &&
-           verifier.VerifyTable(selection_feature_options()) &&
-           VerifyOffset(verifier, VT_CLASSIFICATION_FEATURE_OPTIONS) &&
-           verifier.VerifyTable(classification_feature_options()) &&
-           VerifyOffset(verifier, VT_SELECTION_MODEL) &&
-           verifier.Verify(selection_model()) &&
-           VerifyOffset(verifier, VT_CLASSIFICATION_MODEL) &&
-           verifier.Verify(classification_model()) &&
-           VerifyOffset(verifier, VT_EMBEDDING_MODEL) &&
-           verifier.Verify(embedding_model()) &&
-           VerifyOffset(verifier, VT_SELECTION_OPTIONS) &&
-           verifier.VerifyTable(selection_options()) &&
-           VerifyOffset(verifier, VT_CLASSIFICATION_OPTIONS) &&
-           verifier.VerifyTable(classification_options()) &&
-           VerifyOffset(verifier, VT_REGEX_MODEL) &&
-           verifier.VerifyTable(regex_model()) &&
-           VerifyOffset(verifier, VT_DATETIME_MODEL) &&
-           verifier.VerifyTable(datetime_model()) &&
-           VerifyOffset(verifier, VT_TRIGGERING_OPTIONS) &&
-           verifier.VerifyTable(triggering_options()) &&
-           VerifyField<int32_t>(verifier, VT_ENABLED_MODES) &&
-           VerifyField<uint8_t>(verifier, VT_SNAP_WHITESPACE_SELECTIONS) &&
-           VerifyOffset(verifier, VT_OUTPUT_OPTIONS) &&
-           verifier.VerifyTable(output_options()) &&
-           verifier.EndTable();
-  }
-  ModelT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<Model> Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct ModelBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_locales(flatbuffers::Offset<flatbuffers::String> locales) {
-    fbb_.AddOffset(Model::VT_LOCALES, locales);
-  }
-  void add_version(int32_t version) {
-    fbb_.AddElement<int32_t>(Model::VT_VERSION, version, 0);
-  }
-  void add_name(flatbuffers::Offset<flatbuffers::String> name) {
-    fbb_.AddOffset(Model::VT_NAME, name);
-  }
-  void add_selection_feature_options(flatbuffers::Offset<FeatureProcessorOptions> selection_feature_options) {
-    fbb_.AddOffset(Model::VT_SELECTION_FEATURE_OPTIONS, selection_feature_options);
-  }
-  void add_classification_feature_options(flatbuffers::Offset<FeatureProcessorOptions> classification_feature_options) {
-    fbb_.AddOffset(Model::VT_CLASSIFICATION_FEATURE_OPTIONS, classification_feature_options);
-  }
-  void add_selection_model(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> selection_model) {
-    fbb_.AddOffset(Model::VT_SELECTION_MODEL, selection_model);
-  }
-  void add_classification_model(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> classification_model) {
-    fbb_.AddOffset(Model::VT_CLASSIFICATION_MODEL, classification_model);
-  }
-  void add_embedding_model(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> embedding_model) {
-    fbb_.AddOffset(Model::VT_EMBEDDING_MODEL, embedding_model);
-  }
-  void add_selection_options(flatbuffers::Offset<SelectionModelOptions> selection_options) {
-    fbb_.AddOffset(Model::VT_SELECTION_OPTIONS, selection_options);
-  }
-  void add_classification_options(flatbuffers::Offset<ClassificationModelOptions> classification_options) {
-    fbb_.AddOffset(Model::VT_CLASSIFICATION_OPTIONS, classification_options);
-  }
-  void add_regex_model(flatbuffers::Offset<RegexModel> regex_model) {
-    fbb_.AddOffset(Model::VT_REGEX_MODEL, regex_model);
-  }
-  void add_datetime_model(flatbuffers::Offset<DatetimeModel> datetime_model) {
-    fbb_.AddOffset(Model::VT_DATETIME_MODEL, datetime_model);
-  }
-  void add_triggering_options(flatbuffers::Offset<ModelTriggeringOptions> triggering_options) {
-    fbb_.AddOffset(Model::VT_TRIGGERING_OPTIONS, triggering_options);
-  }
-  void add_enabled_modes(ModeFlag enabled_modes) {
-    fbb_.AddElement<int32_t>(Model::VT_ENABLED_MODES, static_cast<int32_t>(enabled_modes), 7);
-  }
-  void add_snap_whitespace_selections(bool snap_whitespace_selections) {
-    fbb_.AddElement<uint8_t>(Model::VT_SNAP_WHITESPACE_SELECTIONS, static_cast<uint8_t>(snap_whitespace_selections), 1);
-  }
-  void add_output_options(flatbuffers::Offset<OutputOptions> output_options) {
-    fbb_.AddOffset(Model::VT_OUTPUT_OPTIONS, output_options);
-  }
-  explicit ModelBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  ModelBuilder &operator=(const ModelBuilder &);
-  flatbuffers::Offset<Model> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<Model>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<Model> CreateModel(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> locales = 0,
-    int32_t version = 0,
-    flatbuffers::Offset<flatbuffers::String> name = 0,
-    flatbuffers::Offset<FeatureProcessorOptions> selection_feature_options = 0,
-    flatbuffers::Offset<FeatureProcessorOptions> classification_feature_options = 0,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> selection_model = 0,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> classification_model = 0,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> embedding_model = 0,
-    flatbuffers::Offset<SelectionModelOptions> selection_options = 0,
-    flatbuffers::Offset<ClassificationModelOptions> classification_options = 0,
-    flatbuffers::Offset<RegexModel> regex_model = 0,
-    flatbuffers::Offset<DatetimeModel> datetime_model = 0,
-    flatbuffers::Offset<ModelTriggeringOptions> triggering_options = 0,
-    ModeFlag enabled_modes = ModeFlag_ALL,
-    bool snap_whitespace_selections = true,
-    flatbuffers::Offset<OutputOptions> output_options = 0) {
-  ModelBuilder builder_(_fbb);
-  builder_.add_output_options(output_options);
-  builder_.add_enabled_modes(enabled_modes);
-  builder_.add_triggering_options(triggering_options);
-  builder_.add_datetime_model(datetime_model);
-  builder_.add_regex_model(regex_model);
-  builder_.add_classification_options(classification_options);
-  builder_.add_selection_options(selection_options);
-  builder_.add_embedding_model(embedding_model);
-  builder_.add_classification_model(classification_model);
-  builder_.add_selection_model(selection_model);
-  builder_.add_classification_feature_options(classification_feature_options);
-  builder_.add_selection_feature_options(selection_feature_options);
-  builder_.add_name(name);
-  builder_.add_version(version);
-  builder_.add_locales(locales);
-  builder_.add_snap_whitespace_selections(snap_whitespace_selections);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<Model> CreateModelDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *locales = nullptr,
-    int32_t version = 0,
-    const char *name = nullptr,
-    flatbuffers::Offset<FeatureProcessorOptions> selection_feature_options = 0,
-    flatbuffers::Offset<FeatureProcessorOptions> classification_feature_options = 0,
-    const std::vector<uint8_t> *selection_model = nullptr,
-    const std::vector<uint8_t> *classification_model = nullptr,
-    const std::vector<uint8_t> *embedding_model = nullptr,
-    flatbuffers::Offset<SelectionModelOptions> selection_options = 0,
-    flatbuffers::Offset<ClassificationModelOptions> classification_options = 0,
-    flatbuffers::Offset<RegexModel> regex_model = 0,
-    flatbuffers::Offset<DatetimeModel> datetime_model = 0,
-    flatbuffers::Offset<ModelTriggeringOptions> triggering_options = 0,
-    ModeFlag enabled_modes = ModeFlag_ALL,
-    bool snap_whitespace_selections = true,
-    flatbuffers::Offset<OutputOptions> output_options = 0) {
-  return libtextclassifier2::CreateModel(
-      _fbb,
-      locales ? _fbb.CreateString(locales) : 0,
-      version,
-      name ? _fbb.CreateString(name) : 0,
-      selection_feature_options,
-      classification_feature_options,
-      selection_model ? _fbb.CreateVector<uint8_t>(*selection_model) : 0,
-      classification_model ? _fbb.CreateVector<uint8_t>(*classification_model) : 0,
-      embedding_model ? _fbb.CreateVector<uint8_t>(*embedding_model) : 0,
-      selection_options,
-      classification_options,
-      regex_model,
-      datetime_model,
-      triggering_options,
-      enabled_modes,
-      snap_whitespace_selections,
-      output_options);
-}
-
-flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct TokenizationCodepointRangeT : public flatbuffers::NativeTable {
-  typedef TokenizationCodepointRange TableType;
-  int32_t start;
-  int32_t end;
-  libtextclassifier2::TokenizationCodepointRange_::Role role;
-  int32_t script_id;
-  TokenizationCodepointRangeT()
-      : start(0),
-        end(0),
-        role(libtextclassifier2::TokenizationCodepointRange_::Role_DEFAULT_ROLE),
-        script_id(0) {
-  }
-};
-
-struct TokenizationCodepointRange FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TokenizationCodepointRangeT NativeTableType;
-  enum {
-    VT_START = 4,
-    VT_END = 6,
-    VT_ROLE = 8,
-    VT_SCRIPT_ID = 10
-  };
-  int32_t start() const {
-    return GetField<int32_t>(VT_START, 0);
-  }
-  int32_t end() const {
-    return GetField<int32_t>(VT_END, 0);
-  }
-  libtextclassifier2::TokenizationCodepointRange_::Role role() const {
-    return static_cast<libtextclassifier2::TokenizationCodepointRange_::Role>(GetField<int32_t>(VT_ROLE, 0));
-  }
-  int32_t script_id() const {
-    return GetField<int32_t>(VT_SCRIPT_ID, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_START) &&
-           VerifyField<int32_t>(verifier, VT_END) &&
-           VerifyField<int32_t>(verifier, VT_ROLE) &&
-           VerifyField<int32_t>(verifier, VT_SCRIPT_ID) &&
-           verifier.EndTable();
-  }
-  TokenizationCodepointRangeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TokenizationCodepointRangeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TokenizationCodepointRange> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TokenizationCodepointRangeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TokenizationCodepointRangeBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_start(int32_t start) {
-    fbb_.AddElement<int32_t>(TokenizationCodepointRange::VT_START, start, 0);
-  }
-  void add_end(int32_t end) {
-    fbb_.AddElement<int32_t>(TokenizationCodepointRange::VT_END, end, 0);
-  }
-  void add_role(libtextclassifier2::TokenizationCodepointRange_::Role role) {
-    fbb_.AddElement<int32_t>(TokenizationCodepointRange::VT_ROLE, static_cast<int32_t>(role), 0);
-  }
-  void add_script_id(int32_t script_id) {
-    fbb_.AddElement<int32_t>(TokenizationCodepointRange::VT_SCRIPT_ID, script_id, 0);
-  }
-  explicit TokenizationCodepointRangeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TokenizationCodepointRangeBuilder &operator=(const TokenizationCodepointRangeBuilder &);
-  flatbuffers::Offset<TokenizationCodepointRange> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TokenizationCodepointRange>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TokenizationCodepointRange> CreateTokenizationCodepointRange(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t start = 0,
-    int32_t end = 0,
-    libtextclassifier2::TokenizationCodepointRange_::Role role = libtextclassifier2::TokenizationCodepointRange_::Role_DEFAULT_ROLE,
-    int32_t script_id = 0) {
-  TokenizationCodepointRangeBuilder builder_(_fbb);
-  builder_.add_script_id(script_id);
-  builder_.add_role(role);
-  builder_.add_end(end);
-  builder_.add_start(start);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TokenizationCodepointRange> CreateTokenizationCodepointRange(flatbuffers::FlatBufferBuilder &_fbb, const TokenizationCodepointRangeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-namespace FeatureProcessorOptions_ {
-
-struct CodepointRangeT : public flatbuffers::NativeTable {
-  typedef CodepointRange TableType;
-  int32_t start;
-  int32_t end;
-  CodepointRangeT()
-      : start(0),
-        end(0) {
-  }
-};
-
-struct CodepointRange FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef CodepointRangeT NativeTableType;
-  enum {
-    VT_START = 4,
-    VT_END = 6
-  };
-  int32_t start() const {
-    return GetField<int32_t>(VT_START, 0);
-  }
-  int32_t end() const {
-    return GetField<int32_t>(VT_END, 0);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_START) &&
-           VerifyField<int32_t>(verifier, VT_END) &&
-           verifier.EndTable();
-  }
-  CodepointRangeT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(CodepointRangeT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<CodepointRange> Pack(flatbuffers::FlatBufferBuilder &_fbb, const CodepointRangeT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct CodepointRangeBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_start(int32_t start) {
-    fbb_.AddElement<int32_t>(CodepointRange::VT_START, start, 0);
-  }
-  void add_end(int32_t end) {
-    fbb_.AddElement<int32_t>(CodepointRange::VT_END, end, 0);
-  }
-  explicit CodepointRangeBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  CodepointRangeBuilder &operator=(const CodepointRangeBuilder &);
-  flatbuffers::Offset<CodepointRange> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<CodepointRange>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<CodepointRange> CreateCodepointRange(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t start = 0,
-    int32_t end = 0) {
-  CodepointRangeBuilder builder_(_fbb);
-  builder_.add_end(end);
-  builder_.add_start(start);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<CodepointRange> CreateCodepointRange(flatbuffers::FlatBufferBuilder &_fbb, const CodepointRangeT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct BoundsSensitiveFeaturesT : public flatbuffers::NativeTable {
-  typedef BoundsSensitiveFeatures TableType;
-  bool enabled;
-  int32_t num_tokens_before;
-  int32_t num_tokens_inside_left;
-  int32_t num_tokens_inside_right;
-  int32_t num_tokens_after;
-  bool include_inside_bag;
-  bool include_inside_length;
-  bool score_single_token_spans_as_zero;
-  BoundsSensitiveFeaturesT()
-      : enabled(false),
-        num_tokens_before(0),
-        num_tokens_inside_left(0),
-        num_tokens_inside_right(0),
-        num_tokens_after(0),
-        include_inside_bag(false),
-        include_inside_length(false),
-        score_single_token_spans_as_zero(false) {
-  }
-};
-
-struct BoundsSensitiveFeatures FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef BoundsSensitiveFeaturesT NativeTableType;
-  enum {
-    VT_ENABLED = 4,
-    VT_NUM_TOKENS_BEFORE = 6,
-    VT_NUM_TOKENS_INSIDE_LEFT = 8,
-    VT_NUM_TOKENS_INSIDE_RIGHT = 10,
-    VT_NUM_TOKENS_AFTER = 12,
-    VT_INCLUDE_INSIDE_BAG = 14,
-    VT_INCLUDE_INSIDE_LENGTH = 16,
-    VT_SCORE_SINGLE_TOKEN_SPANS_AS_ZERO = 18
-  };
-  bool enabled() const {
-    return GetField<uint8_t>(VT_ENABLED, 0) != 0;
-  }
-  int32_t num_tokens_before() const {
-    return GetField<int32_t>(VT_NUM_TOKENS_BEFORE, 0);
-  }
-  int32_t num_tokens_inside_left() const {
-    return GetField<int32_t>(VT_NUM_TOKENS_INSIDE_LEFT, 0);
-  }
-  int32_t num_tokens_inside_right() const {
-    return GetField<int32_t>(VT_NUM_TOKENS_INSIDE_RIGHT, 0);
-  }
-  int32_t num_tokens_after() const {
-    return GetField<int32_t>(VT_NUM_TOKENS_AFTER, 0);
-  }
-  bool include_inside_bag() const {
-    return GetField<uint8_t>(VT_INCLUDE_INSIDE_BAG, 0) != 0;
-  }
-  bool include_inside_length() const {
-    return GetField<uint8_t>(VT_INCLUDE_INSIDE_LENGTH, 0) != 0;
-  }
-  bool score_single_token_spans_as_zero() const {
-    return GetField<uint8_t>(VT_SCORE_SINGLE_TOKEN_SPANS_AS_ZERO, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<uint8_t>(verifier, VT_ENABLED) &&
-           VerifyField<int32_t>(verifier, VT_NUM_TOKENS_BEFORE) &&
-           VerifyField<int32_t>(verifier, VT_NUM_TOKENS_INSIDE_LEFT) &&
-           VerifyField<int32_t>(verifier, VT_NUM_TOKENS_INSIDE_RIGHT) &&
-           VerifyField<int32_t>(verifier, VT_NUM_TOKENS_AFTER) &&
-           VerifyField<uint8_t>(verifier, VT_INCLUDE_INSIDE_BAG) &&
-           VerifyField<uint8_t>(verifier, VT_INCLUDE_INSIDE_LENGTH) &&
-           VerifyField<uint8_t>(verifier, VT_SCORE_SINGLE_TOKEN_SPANS_AS_ZERO) &&
-           verifier.EndTable();
-  }
-  BoundsSensitiveFeaturesT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(BoundsSensitiveFeaturesT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<BoundsSensitiveFeatures> Pack(flatbuffers::FlatBufferBuilder &_fbb, const BoundsSensitiveFeaturesT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct BoundsSensitiveFeaturesBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_enabled(bool enabled) {
-    fbb_.AddElement<uint8_t>(BoundsSensitiveFeatures::VT_ENABLED, static_cast<uint8_t>(enabled), 0);
-  }
-  void add_num_tokens_before(int32_t num_tokens_before) {
-    fbb_.AddElement<int32_t>(BoundsSensitiveFeatures::VT_NUM_TOKENS_BEFORE, num_tokens_before, 0);
-  }
-  void add_num_tokens_inside_left(int32_t num_tokens_inside_left) {
-    fbb_.AddElement<int32_t>(BoundsSensitiveFeatures::VT_NUM_TOKENS_INSIDE_LEFT, num_tokens_inside_left, 0);
-  }
-  void add_num_tokens_inside_right(int32_t num_tokens_inside_right) {
-    fbb_.AddElement<int32_t>(BoundsSensitiveFeatures::VT_NUM_TOKENS_INSIDE_RIGHT, num_tokens_inside_right, 0);
-  }
-  void add_num_tokens_after(int32_t num_tokens_after) {
-    fbb_.AddElement<int32_t>(BoundsSensitiveFeatures::VT_NUM_TOKENS_AFTER, num_tokens_after, 0);
-  }
-  void add_include_inside_bag(bool include_inside_bag) {
-    fbb_.AddElement<uint8_t>(BoundsSensitiveFeatures::VT_INCLUDE_INSIDE_BAG, static_cast<uint8_t>(include_inside_bag), 0);
-  }
-  void add_include_inside_length(bool include_inside_length) {
-    fbb_.AddElement<uint8_t>(BoundsSensitiveFeatures::VT_INCLUDE_INSIDE_LENGTH, static_cast<uint8_t>(include_inside_length), 0);
-  }
-  void add_score_single_token_spans_as_zero(bool score_single_token_spans_as_zero) {
-    fbb_.AddElement<uint8_t>(BoundsSensitiveFeatures::VT_SCORE_SINGLE_TOKEN_SPANS_AS_ZERO, static_cast<uint8_t>(score_single_token_spans_as_zero), 0);
-  }
-  explicit BoundsSensitiveFeaturesBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  BoundsSensitiveFeaturesBuilder &operator=(const BoundsSensitiveFeaturesBuilder &);
-  flatbuffers::Offset<BoundsSensitiveFeatures> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<BoundsSensitiveFeatures>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<BoundsSensitiveFeatures> CreateBoundsSensitiveFeatures(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    bool enabled = false,
-    int32_t num_tokens_before = 0,
-    int32_t num_tokens_inside_left = 0,
-    int32_t num_tokens_inside_right = 0,
-    int32_t num_tokens_after = 0,
-    bool include_inside_bag = false,
-    bool include_inside_length = false,
-    bool score_single_token_spans_as_zero = false) {
-  BoundsSensitiveFeaturesBuilder builder_(_fbb);
-  builder_.add_num_tokens_after(num_tokens_after);
-  builder_.add_num_tokens_inside_right(num_tokens_inside_right);
-  builder_.add_num_tokens_inside_left(num_tokens_inside_left);
-  builder_.add_num_tokens_before(num_tokens_before);
-  builder_.add_score_single_token_spans_as_zero(score_single_token_spans_as_zero);
-  builder_.add_include_inside_length(include_inside_length);
-  builder_.add_include_inside_bag(include_inside_bag);
-  builder_.add_enabled(enabled);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<BoundsSensitiveFeatures> CreateBoundsSensitiveFeatures(flatbuffers::FlatBufferBuilder &_fbb, const BoundsSensitiveFeaturesT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-struct AlternativeCollectionMapEntryT : public flatbuffers::NativeTable {
-  typedef AlternativeCollectionMapEntry TableType;
-  std::string key;
-  std::string value;
-  AlternativeCollectionMapEntryT() {
-  }
-};
-
-struct AlternativeCollectionMapEntry FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef AlternativeCollectionMapEntryT NativeTableType;
-  enum {
-    VT_KEY = 4,
-    VT_VALUE = 6
-  };
-  const flatbuffers::String *key() const {
-    return GetPointer<const flatbuffers::String *>(VT_KEY);
-  }
-  const flatbuffers::String *value() const {
-    return GetPointer<const flatbuffers::String *>(VT_VALUE);
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyOffset(verifier, VT_KEY) &&
-           verifier.Verify(key()) &&
-           VerifyOffset(verifier, VT_VALUE) &&
-           verifier.Verify(value()) &&
-           verifier.EndTable();
-  }
-  AlternativeCollectionMapEntryT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(AlternativeCollectionMapEntryT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<AlternativeCollectionMapEntry> Pack(flatbuffers::FlatBufferBuilder &_fbb, const AlternativeCollectionMapEntryT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct AlternativeCollectionMapEntryBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_key(flatbuffers::Offset<flatbuffers::String> key) {
-    fbb_.AddOffset(AlternativeCollectionMapEntry::VT_KEY, key);
-  }
-  void add_value(flatbuffers::Offset<flatbuffers::String> value) {
-    fbb_.AddOffset(AlternativeCollectionMapEntry::VT_VALUE, value);
-  }
-  explicit AlternativeCollectionMapEntryBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  AlternativeCollectionMapEntryBuilder &operator=(const AlternativeCollectionMapEntryBuilder &);
-  flatbuffers::Offset<AlternativeCollectionMapEntry> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<AlternativeCollectionMapEntry>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<AlternativeCollectionMapEntry> CreateAlternativeCollectionMapEntry(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    flatbuffers::Offset<flatbuffers::String> key = 0,
-    flatbuffers::Offset<flatbuffers::String> value = 0) {
-  AlternativeCollectionMapEntryBuilder builder_(_fbb);
-  builder_.add_value(value);
-  builder_.add_key(key);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<AlternativeCollectionMapEntry> CreateAlternativeCollectionMapEntryDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    const char *key = nullptr,
-    const char *value = nullptr) {
-  return libtextclassifier2::FeatureProcessorOptions_::CreateAlternativeCollectionMapEntry(
-      _fbb,
-      key ? _fbb.CreateString(key) : 0,
-      value ? _fbb.CreateString(value) : 0);
-}
-
-flatbuffers::Offset<AlternativeCollectionMapEntry> CreateAlternativeCollectionMapEntry(flatbuffers::FlatBufferBuilder &_fbb, const AlternativeCollectionMapEntryT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-}  // namespace FeatureProcessorOptions_
-
-struct FeatureProcessorOptionsT : public flatbuffers::NativeTable {
-  typedef FeatureProcessorOptions TableType;
-  int32_t num_buckets;
-  int32_t embedding_size;
-  int32_t embedding_quantization_bits;
-  int32_t context_size;
-  int32_t max_selection_span;
-  std::vector<int32_t> chargram_orders;
-  int32_t max_word_length;
-  bool unicode_aware_features;
-  bool extract_case_feature;
-  bool extract_selection_mask_feature;
-  std::vector<std::string> regexp_feature;
-  bool remap_digits;
-  bool lowercase_tokens;
-  bool selection_reduced_output_space;
-  std::vector<std::string> collections;
-  int32_t default_collection;
-  bool only_use_line_with_click;
-  bool split_tokens_on_selection_boundaries;
-  std::vector<std::unique_ptr<TokenizationCodepointRangeT>> tokenization_codepoint_config;
-  libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod center_token_selection_method;
-  bool snap_label_span_boundaries_to_containing_tokens;
-  std::vector<std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::CodepointRangeT>> supported_codepoint_ranges;
-  std::vector<std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::CodepointRangeT>> internal_tokenizer_codepoint_ranges;
-  float min_supported_codepoint_ratio;
-  int32_t feature_version;
-  libtextclassifier2::FeatureProcessorOptions_::TokenizationType tokenization_type;
-  bool icu_preserve_whitespace_tokens;
-  std::vector<int32_t> ignored_span_boundary_codepoints;
-  std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeaturesT> bounds_sensitive_features;
-  std::vector<std::string> allowed_chargrams;
-  bool tokenize_on_script_change;
-  FeatureProcessorOptionsT()
-      : num_buckets(-1),
-        embedding_size(-1),
-        embedding_quantization_bits(8),
-        context_size(-1),
-        max_selection_span(-1),
-        max_word_length(20),
-        unicode_aware_features(false),
-        extract_case_feature(false),
-        extract_selection_mask_feature(false),
-        remap_digits(false),
-        lowercase_tokens(false),
-        selection_reduced_output_space(true),
-        default_collection(-1),
-        only_use_line_with_click(false),
-        split_tokens_on_selection_boundaries(false),
-        center_token_selection_method(libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD),
-        snap_label_span_boundaries_to_containing_tokens(false),
-        min_supported_codepoint_ratio(0.0f),
-        feature_version(0),
-        tokenization_type(libtextclassifier2::FeatureProcessorOptions_::TokenizationType_INTERNAL_TOKENIZER),
-        icu_preserve_whitespace_tokens(false),
-        tokenize_on_script_change(false) {
-  }
-};
-
-struct FeatureProcessorOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef FeatureProcessorOptionsT NativeTableType;
-  enum {
-    VT_NUM_BUCKETS = 4,
-    VT_EMBEDDING_SIZE = 6,
-    VT_EMBEDDING_QUANTIZATION_BITS = 8,
-    VT_CONTEXT_SIZE = 10,
-    VT_MAX_SELECTION_SPAN = 12,
-    VT_CHARGRAM_ORDERS = 14,
-    VT_MAX_WORD_LENGTH = 16,
-    VT_UNICODE_AWARE_FEATURES = 18,
-    VT_EXTRACT_CASE_FEATURE = 20,
-    VT_EXTRACT_SELECTION_MASK_FEATURE = 22,
-    VT_REGEXP_FEATURE = 24,
-    VT_REMAP_DIGITS = 26,
-    VT_LOWERCASE_TOKENS = 28,
-    VT_SELECTION_REDUCED_OUTPUT_SPACE = 30,
-    VT_COLLECTIONS = 32,
-    VT_DEFAULT_COLLECTION = 34,
-    VT_ONLY_USE_LINE_WITH_CLICK = 36,
-    VT_SPLIT_TOKENS_ON_SELECTION_BOUNDARIES = 38,
-    VT_TOKENIZATION_CODEPOINT_CONFIG = 40,
-    VT_CENTER_TOKEN_SELECTION_METHOD = 42,
-    VT_SNAP_LABEL_SPAN_BOUNDARIES_TO_CONTAINING_TOKENS = 44,
-    VT_SUPPORTED_CODEPOINT_RANGES = 46,
-    VT_INTERNAL_TOKENIZER_CODEPOINT_RANGES = 48,
-    VT_MIN_SUPPORTED_CODEPOINT_RATIO = 50,
-    VT_FEATURE_VERSION = 52,
-    VT_TOKENIZATION_TYPE = 54,
-    VT_ICU_PRESERVE_WHITESPACE_TOKENS = 56,
-    VT_IGNORED_SPAN_BOUNDARY_CODEPOINTS = 58,
-    VT_BOUNDS_SENSITIVE_FEATURES = 60,
-    VT_ALLOWED_CHARGRAMS = 62,
-    VT_TOKENIZE_ON_SCRIPT_CHANGE = 64
-  };
-  int32_t num_buckets() const {
-    return GetField<int32_t>(VT_NUM_BUCKETS, -1);
-  }
-  int32_t embedding_size() const {
-    return GetField<int32_t>(VT_EMBEDDING_SIZE, -1);
-  }
-  int32_t embedding_quantization_bits() const {
-    return GetField<int32_t>(VT_EMBEDDING_QUANTIZATION_BITS, 8);
-  }
-  int32_t context_size() const {
-    return GetField<int32_t>(VT_CONTEXT_SIZE, -1);
-  }
-  int32_t max_selection_span() const {
-    return GetField<int32_t>(VT_MAX_SELECTION_SPAN, -1);
-  }
-  const flatbuffers::Vector<int32_t> *chargram_orders() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_CHARGRAM_ORDERS);
-  }
-  int32_t max_word_length() const {
-    return GetField<int32_t>(VT_MAX_WORD_LENGTH, 20);
-  }
-  bool unicode_aware_features() const {
-    return GetField<uint8_t>(VT_UNICODE_AWARE_FEATURES, 0) != 0;
-  }
-  bool extract_case_feature() const {
-    return GetField<uint8_t>(VT_EXTRACT_CASE_FEATURE, 0) != 0;
-  }
-  bool extract_selection_mask_feature() const {
-    return GetField<uint8_t>(VT_EXTRACT_SELECTION_MASK_FEATURE, 0) != 0;
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *regexp_feature() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_REGEXP_FEATURE);
-  }
-  bool remap_digits() const {
-    return GetField<uint8_t>(VT_REMAP_DIGITS, 0) != 0;
-  }
-  bool lowercase_tokens() const {
-    return GetField<uint8_t>(VT_LOWERCASE_TOKENS, 0) != 0;
-  }
-  bool selection_reduced_output_space() const {
-    return GetField<uint8_t>(VT_SELECTION_REDUCED_OUTPUT_SPACE, 1) != 0;
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *collections() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_COLLECTIONS);
-  }
-  int32_t default_collection() const {
-    return GetField<int32_t>(VT_DEFAULT_COLLECTION, -1);
-  }
-  bool only_use_line_with_click() const {
-    return GetField<uint8_t>(VT_ONLY_USE_LINE_WITH_CLICK, 0) != 0;
-  }
-  bool split_tokens_on_selection_boundaries() const {
-    return GetField<uint8_t>(VT_SPLIT_TOKENS_ON_SELECTION_BOUNDARIES, 0) != 0;
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<TokenizationCodepointRange>> *tokenization_codepoint_config() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<TokenizationCodepointRange>> *>(VT_TOKENIZATION_CODEPOINT_CONFIG);
-  }
-  libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod center_token_selection_method() const {
-    return static_cast<libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod>(GetField<int32_t>(VT_CENTER_TOKEN_SELECTION_METHOD, 0));
-  }
-  bool snap_label_span_boundaries_to_containing_tokens() const {
-    return GetField<uint8_t>(VT_SNAP_LABEL_SPAN_BOUNDARIES_TO_CONTAINING_TOKENS, 0) != 0;
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *supported_codepoint_ranges() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *>(VT_SUPPORTED_CODEPOINT_RANGES);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *internal_tokenizer_codepoint_ranges() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *>(VT_INTERNAL_TOKENIZER_CODEPOINT_RANGES);
-  }
-  float min_supported_codepoint_ratio() const {
-    return GetField<float>(VT_MIN_SUPPORTED_CODEPOINT_RATIO, 0.0f);
-  }
-  int32_t feature_version() const {
-    return GetField<int32_t>(VT_FEATURE_VERSION, 0);
-  }
-  libtextclassifier2::FeatureProcessorOptions_::TokenizationType tokenization_type() const {
-    return static_cast<libtextclassifier2::FeatureProcessorOptions_::TokenizationType>(GetField<int32_t>(VT_TOKENIZATION_TYPE, 1));
-  }
-  bool icu_preserve_whitespace_tokens() const {
-    return GetField<uint8_t>(VT_ICU_PRESERVE_WHITESPACE_TOKENS, 0) != 0;
-  }
-  const flatbuffers::Vector<int32_t> *ignored_span_boundary_codepoints() const {
-    return GetPointer<const flatbuffers::Vector<int32_t> *>(VT_IGNORED_SPAN_BOUNDARY_CODEPOINTS);
-  }
-  const libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeatures *bounds_sensitive_features() const {
-    return GetPointer<const libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeatures *>(VT_BOUNDS_SENSITIVE_FEATURES);
-  }
-  const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *allowed_chargrams() const {
-    return GetPointer<const flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>> *>(VT_ALLOWED_CHARGRAMS);
-  }
-  bool tokenize_on_script_change() const {
-    return GetField<uint8_t>(VT_TOKENIZE_ON_SCRIPT_CHANGE, 0) != 0;
-  }
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           VerifyField<int32_t>(verifier, VT_NUM_BUCKETS) &&
-           VerifyField<int32_t>(verifier, VT_EMBEDDING_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_EMBEDDING_QUANTIZATION_BITS) &&
-           VerifyField<int32_t>(verifier, VT_CONTEXT_SIZE) &&
-           VerifyField<int32_t>(verifier, VT_MAX_SELECTION_SPAN) &&
-           VerifyOffset(verifier, VT_CHARGRAM_ORDERS) &&
-           verifier.Verify(chargram_orders()) &&
-           VerifyField<int32_t>(verifier, VT_MAX_WORD_LENGTH) &&
-           VerifyField<uint8_t>(verifier, VT_UNICODE_AWARE_FEATURES) &&
-           VerifyField<uint8_t>(verifier, VT_EXTRACT_CASE_FEATURE) &&
-           VerifyField<uint8_t>(verifier, VT_EXTRACT_SELECTION_MASK_FEATURE) &&
-           VerifyOffset(verifier, VT_REGEXP_FEATURE) &&
-           verifier.Verify(regexp_feature()) &&
-           verifier.VerifyVectorOfStrings(regexp_feature()) &&
-           VerifyField<uint8_t>(verifier, VT_REMAP_DIGITS) &&
-           VerifyField<uint8_t>(verifier, VT_LOWERCASE_TOKENS) &&
-           VerifyField<uint8_t>(verifier, VT_SELECTION_REDUCED_OUTPUT_SPACE) &&
-           VerifyOffset(verifier, VT_COLLECTIONS) &&
-           verifier.Verify(collections()) &&
-           verifier.VerifyVectorOfStrings(collections()) &&
-           VerifyField<int32_t>(verifier, VT_DEFAULT_COLLECTION) &&
-           VerifyField<uint8_t>(verifier, VT_ONLY_USE_LINE_WITH_CLICK) &&
-           VerifyField<uint8_t>(verifier, VT_SPLIT_TOKENS_ON_SELECTION_BOUNDARIES) &&
-           VerifyOffset(verifier, VT_TOKENIZATION_CODEPOINT_CONFIG) &&
-           verifier.Verify(tokenization_codepoint_config()) &&
-           verifier.VerifyVectorOfTables(tokenization_codepoint_config()) &&
-           VerifyField<int32_t>(verifier, VT_CENTER_TOKEN_SELECTION_METHOD) &&
-           VerifyField<uint8_t>(verifier, VT_SNAP_LABEL_SPAN_BOUNDARIES_TO_CONTAINING_TOKENS) &&
-           VerifyOffset(verifier, VT_SUPPORTED_CODEPOINT_RANGES) &&
-           verifier.Verify(supported_codepoint_ranges()) &&
-           verifier.VerifyVectorOfTables(supported_codepoint_ranges()) &&
-           VerifyOffset(verifier, VT_INTERNAL_TOKENIZER_CODEPOINT_RANGES) &&
-           verifier.Verify(internal_tokenizer_codepoint_ranges()) &&
-           verifier.VerifyVectorOfTables(internal_tokenizer_codepoint_ranges()) &&
-           VerifyField<float>(verifier, VT_MIN_SUPPORTED_CODEPOINT_RATIO) &&
-           VerifyField<int32_t>(verifier, VT_FEATURE_VERSION) &&
-           VerifyField<int32_t>(verifier, VT_TOKENIZATION_TYPE) &&
-           VerifyField<uint8_t>(verifier, VT_ICU_PRESERVE_WHITESPACE_TOKENS) &&
-           VerifyOffset(verifier, VT_IGNORED_SPAN_BOUNDARY_CODEPOINTS) &&
-           verifier.Verify(ignored_span_boundary_codepoints()) &&
-           VerifyOffset(verifier, VT_BOUNDS_SENSITIVE_FEATURES) &&
-           verifier.VerifyTable(bounds_sensitive_features()) &&
-           VerifyOffset(verifier, VT_ALLOWED_CHARGRAMS) &&
-           verifier.Verify(allowed_chargrams()) &&
-           verifier.VerifyVectorOfStrings(allowed_chargrams()) &&
-           VerifyField<uint8_t>(verifier, VT_TOKENIZE_ON_SCRIPT_CHANGE) &&
-           verifier.EndTable();
-  }
-  FeatureProcessorOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(FeatureProcessorOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<FeatureProcessorOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const FeatureProcessorOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct FeatureProcessorOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  void add_num_buckets(int32_t num_buckets) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_NUM_BUCKETS, num_buckets, -1);
-  }
-  void add_embedding_size(int32_t embedding_size) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_EMBEDDING_SIZE, embedding_size, -1);
-  }
-  void add_embedding_quantization_bits(int32_t embedding_quantization_bits) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_EMBEDDING_QUANTIZATION_BITS, embedding_quantization_bits, 8);
-  }
-  void add_context_size(int32_t context_size) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_CONTEXT_SIZE, context_size, -1);
-  }
-  void add_max_selection_span(int32_t max_selection_span) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_MAX_SELECTION_SPAN, max_selection_span, -1);
-  }
-  void add_chargram_orders(flatbuffers::Offset<flatbuffers::Vector<int32_t>> chargram_orders) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_CHARGRAM_ORDERS, chargram_orders);
-  }
-  void add_max_word_length(int32_t max_word_length) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_MAX_WORD_LENGTH, max_word_length, 20);
-  }
-  void add_unicode_aware_features(bool unicode_aware_features) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_UNICODE_AWARE_FEATURES, static_cast<uint8_t>(unicode_aware_features), 0);
-  }
-  void add_extract_case_feature(bool extract_case_feature) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_EXTRACT_CASE_FEATURE, static_cast<uint8_t>(extract_case_feature), 0);
-  }
-  void add_extract_selection_mask_feature(bool extract_selection_mask_feature) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_EXTRACT_SELECTION_MASK_FEATURE, static_cast<uint8_t>(extract_selection_mask_feature), 0);
-  }
-  void add_regexp_feature(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> regexp_feature) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_REGEXP_FEATURE, regexp_feature);
-  }
-  void add_remap_digits(bool remap_digits) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_REMAP_DIGITS, static_cast<uint8_t>(remap_digits), 0);
-  }
-  void add_lowercase_tokens(bool lowercase_tokens) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_LOWERCASE_TOKENS, static_cast<uint8_t>(lowercase_tokens), 0);
-  }
-  void add_selection_reduced_output_space(bool selection_reduced_output_space) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_SELECTION_REDUCED_OUTPUT_SPACE, static_cast<uint8_t>(selection_reduced_output_space), 1);
-  }
-  void add_collections(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> collections) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_COLLECTIONS, collections);
-  }
-  void add_default_collection(int32_t default_collection) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_DEFAULT_COLLECTION, default_collection, -1);
-  }
-  void add_only_use_line_with_click(bool only_use_line_with_click) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_ONLY_USE_LINE_WITH_CLICK, static_cast<uint8_t>(only_use_line_with_click), 0);
-  }
-  void add_split_tokens_on_selection_boundaries(bool split_tokens_on_selection_boundaries) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_SPLIT_TOKENS_ON_SELECTION_BOUNDARIES, static_cast<uint8_t>(split_tokens_on_selection_boundaries), 0);
-  }
-  void add_tokenization_codepoint_config(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TokenizationCodepointRange>>> tokenization_codepoint_config) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_TOKENIZATION_CODEPOINT_CONFIG, tokenization_codepoint_config);
-  }
-  void add_center_token_selection_method(libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod center_token_selection_method) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_CENTER_TOKEN_SELECTION_METHOD, static_cast<int32_t>(center_token_selection_method), 0);
-  }
-  void add_snap_label_span_boundaries_to_containing_tokens(bool snap_label_span_boundaries_to_containing_tokens) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_SNAP_LABEL_SPAN_BOUNDARIES_TO_CONTAINING_TOKENS, static_cast<uint8_t>(snap_label_span_boundaries_to_containing_tokens), 0);
-  }
-  void add_supported_codepoint_ranges(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>> supported_codepoint_ranges) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_SUPPORTED_CODEPOINT_RANGES, supported_codepoint_ranges);
-  }
-  void add_internal_tokenizer_codepoint_ranges(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>> internal_tokenizer_codepoint_ranges) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_INTERNAL_TOKENIZER_CODEPOINT_RANGES, internal_tokenizer_codepoint_ranges);
-  }
-  void add_min_supported_codepoint_ratio(float min_supported_codepoint_ratio) {
-    fbb_.AddElement<float>(FeatureProcessorOptions::VT_MIN_SUPPORTED_CODEPOINT_RATIO, min_supported_codepoint_ratio, 0.0f);
-  }
-  void add_feature_version(int32_t feature_version) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_FEATURE_VERSION, feature_version, 0);
-  }
-  void add_tokenization_type(libtextclassifier2::FeatureProcessorOptions_::TokenizationType tokenization_type) {
-    fbb_.AddElement<int32_t>(FeatureProcessorOptions::VT_TOKENIZATION_TYPE, static_cast<int32_t>(tokenization_type), 1);
-  }
-  void add_icu_preserve_whitespace_tokens(bool icu_preserve_whitespace_tokens) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_ICU_PRESERVE_WHITESPACE_TOKENS, static_cast<uint8_t>(icu_preserve_whitespace_tokens), 0);
-  }
-  void add_ignored_span_boundary_codepoints(flatbuffers::Offset<flatbuffers::Vector<int32_t>> ignored_span_boundary_codepoints) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_IGNORED_SPAN_BOUNDARY_CODEPOINTS, ignored_span_boundary_codepoints);
-  }
-  void add_bounds_sensitive_features(flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeatures> bounds_sensitive_features) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_BOUNDS_SENSITIVE_FEATURES, bounds_sensitive_features);
-  }
-  void add_allowed_chargrams(flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> allowed_chargrams) {
-    fbb_.AddOffset(FeatureProcessorOptions::VT_ALLOWED_CHARGRAMS, allowed_chargrams);
-  }
-  void add_tokenize_on_script_change(bool tokenize_on_script_change) {
-    fbb_.AddElement<uint8_t>(FeatureProcessorOptions::VT_TOKENIZE_ON_SCRIPT_CHANGE, static_cast<uint8_t>(tokenize_on_script_change), 0);
-  }
-  explicit FeatureProcessorOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  FeatureProcessorOptionsBuilder &operator=(const FeatureProcessorOptionsBuilder &);
-  flatbuffers::Offset<FeatureProcessorOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<FeatureProcessorOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<FeatureProcessorOptions> CreateFeatureProcessorOptions(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_buckets = -1,
-    int32_t embedding_size = -1,
-    int32_t embedding_quantization_bits = 8,
-    int32_t context_size = -1,
-    int32_t max_selection_span = -1,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> chargram_orders = 0,
-    int32_t max_word_length = 20,
-    bool unicode_aware_features = false,
-    bool extract_case_feature = false,
-    bool extract_selection_mask_feature = false,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> regexp_feature = 0,
-    bool remap_digits = false,
-    bool lowercase_tokens = false,
-    bool selection_reduced_output_space = true,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> collections = 0,
-    int32_t default_collection = -1,
-    bool only_use_line_with_click = false,
-    bool split_tokens_on_selection_boundaries = false,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<TokenizationCodepointRange>>> tokenization_codepoint_config = 0,
-    libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod center_token_selection_method = libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD,
-    bool snap_label_span_boundaries_to_containing_tokens = false,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>> supported_codepoint_ranges = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>> internal_tokenizer_codepoint_ranges = 0,
-    float min_supported_codepoint_ratio = 0.0f,
-    int32_t feature_version = 0,
-    libtextclassifier2::FeatureProcessorOptions_::TokenizationType tokenization_type = libtextclassifier2::FeatureProcessorOptions_::TokenizationType_INTERNAL_TOKENIZER,
-    bool icu_preserve_whitespace_tokens = false,
-    flatbuffers::Offset<flatbuffers::Vector<int32_t>> ignored_span_boundary_codepoints = 0,
-    flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeatures> bounds_sensitive_features = 0,
-    flatbuffers::Offset<flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>> allowed_chargrams = 0,
-    bool tokenize_on_script_change = false) {
-  FeatureProcessorOptionsBuilder builder_(_fbb);
-  builder_.add_allowed_chargrams(allowed_chargrams);
-  builder_.add_bounds_sensitive_features(bounds_sensitive_features);
-  builder_.add_ignored_span_boundary_codepoints(ignored_span_boundary_codepoints);
-  builder_.add_tokenization_type(tokenization_type);
-  builder_.add_feature_version(feature_version);
-  builder_.add_min_supported_codepoint_ratio(min_supported_codepoint_ratio);
-  builder_.add_internal_tokenizer_codepoint_ranges(internal_tokenizer_codepoint_ranges);
-  builder_.add_supported_codepoint_ranges(supported_codepoint_ranges);
-  builder_.add_center_token_selection_method(center_token_selection_method);
-  builder_.add_tokenization_codepoint_config(tokenization_codepoint_config);
-  builder_.add_default_collection(default_collection);
-  builder_.add_collections(collections);
-  builder_.add_regexp_feature(regexp_feature);
-  builder_.add_max_word_length(max_word_length);
-  builder_.add_chargram_orders(chargram_orders);
-  builder_.add_max_selection_span(max_selection_span);
-  builder_.add_context_size(context_size);
-  builder_.add_embedding_quantization_bits(embedding_quantization_bits);
-  builder_.add_embedding_size(embedding_size);
-  builder_.add_num_buckets(num_buckets);
-  builder_.add_tokenize_on_script_change(tokenize_on_script_change);
-  builder_.add_icu_preserve_whitespace_tokens(icu_preserve_whitespace_tokens);
-  builder_.add_snap_label_span_boundaries_to_containing_tokens(snap_label_span_boundaries_to_containing_tokens);
-  builder_.add_split_tokens_on_selection_boundaries(split_tokens_on_selection_boundaries);
-  builder_.add_only_use_line_with_click(only_use_line_with_click);
-  builder_.add_selection_reduced_output_space(selection_reduced_output_space);
-  builder_.add_lowercase_tokens(lowercase_tokens);
-  builder_.add_remap_digits(remap_digits);
-  builder_.add_extract_selection_mask_feature(extract_selection_mask_feature);
-  builder_.add_extract_case_feature(extract_case_feature);
-  builder_.add_unicode_aware_features(unicode_aware_features);
-  return builder_.Finish();
-}
-
-inline flatbuffers::Offset<FeatureProcessorOptions> CreateFeatureProcessorOptionsDirect(
-    flatbuffers::FlatBufferBuilder &_fbb,
-    int32_t num_buckets = -1,
-    int32_t embedding_size = -1,
-    int32_t embedding_quantization_bits = 8,
-    int32_t context_size = -1,
-    int32_t max_selection_span = -1,
-    const std::vector<int32_t> *chargram_orders = nullptr,
-    int32_t max_word_length = 20,
-    bool unicode_aware_features = false,
-    bool extract_case_feature = false,
-    bool extract_selection_mask_feature = false,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *regexp_feature = nullptr,
-    bool remap_digits = false,
-    bool lowercase_tokens = false,
-    bool selection_reduced_output_space = true,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *collections = nullptr,
-    int32_t default_collection = -1,
-    bool only_use_line_with_click = false,
-    bool split_tokens_on_selection_boundaries = false,
-    const std::vector<flatbuffers::Offset<TokenizationCodepointRange>> *tokenization_codepoint_config = nullptr,
-    libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod center_token_selection_method = libtextclassifier2::FeatureProcessorOptions_::CenterTokenSelectionMethod_DEFAULT_CENTER_TOKEN_METHOD,
-    bool snap_label_span_boundaries_to_containing_tokens = false,
-    const std::vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *supported_codepoint_ranges = nullptr,
-    const std::vector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> *internal_tokenizer_codepoint_ranges = nullptr,
-    float min_supported_codepoint_ratio = 0.0f,
-    int32_t feature_version = 0,
-    libtextclassifier2::FeatureProcessorOptions_::TokenizationType tokenization_type = libtextclassifier2::FeatureProcessorOptions_::TokenizationType_INTERNAL_TOKENIZER,
-    bool icu_preserve_whitespace_tokens = false,
-    const std::vector<int32_t> *ignored_span_boundary_codepoints = nullptr,
-    flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeatures> bounds_sensitive_features = 0,
-    const std::vector<flatbuffers::Offset<flatbuffers::String>> *allowed_chargrams = nullptr,
-    bool tokenize_on_script_change = false) {
-  return libtextclassifier2::CreateFeatureProcessorOptions(
-      _fbb,
-      num_buckets,
-      embedding_size,
-      embedding_quantization_bits,
-      context_size,
-      max_selection_span,
-      chargram_orders ? _fbb.CreateVector<int32_t>(*chargram_orders) : 0,
-      max_word_length,
-      unicode_aware_features,
-      extract_case_feature,
-      extract_selection_mask_feature,
-      regexp_feature ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*regexp_feature) : 0,
-      remap_digits,
-      lowercase_tokens,
-      selection_reduced_output_space,
-      collections ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*collections) : 0,
-      default_collection,
-      only_use_line_with_click,
-      split_tokens_on_selection_boundaries,
-      tokenization_codepoint_config ? _fbb.CreateVector<flatbuffers::Offset<TokenizationCodepointRange>>(*tokenization_codepoint_config) : 0,
-      center_token_selection_method,
-      snap_label_span_boundaries_to_containing_tokens,
-      supported_codepoint_ranges ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>(*supported_codepoint_ranges) : 0,
-      internal_tokenizer_codepoint_ranges ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>>(*internal_tokenizer_codepoint_ranges) : 0,
-      min_supported_codepoint_ratio,
-      feature_version,
-      tokenization_type,
-      icu_preserve_whitespace_tokens,
-      ignored_span_boundary_codepoints ? _fbb.CreateVector<int32_t>(*ignored_span_boundary_codepoints) : 0,
-      bounds_sensitive_features,
-      allowed_chargrams ? _fbb.CreateVector<flatbuffers::Offset<flatbuffers::String>>(*allowed_chargrams) : 0,
-      tokenize_on_script_change);
-}
-
-flatbuffers::Offset<FeatureProcessorOptions> CreateFeatureProcessorOptions(flatbuffers::FlatBufferBuilder &_fbb, const FeatureProcessorOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
-inline CompressedBufferT *CompressedBuffer::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CompressedBufferT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CompressedBuffer::UnPackTo(CompressedBufferT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = buffer(); if (_e) { _o->buffer.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->buffer[_i] = _e->Get(_i); } } };
-  { auto _e = uncompressed_size(); _o->uncompressed_size = _e; };
-}
-
-inline flatbuffers::Offset<CompressedBuffer> CompressedBuffer::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CompressedBufferT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCompressedBuffer(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CompressedBuffer> CreateCompressedBuffer(flatbuffers::FlatBufferBuilder &_fbb, const CompressedBufferT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CompressedBufferT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _buffer = _o->buffer.size() ? _fbb.CreateVector(_o->buffer) : 0;
-  auto _uncompressed_size = _o->uncompressed_size;
-  return libtextclassifier2::CreateCompressedBuffer(
-      _fbb,
-      _buffer,
-      _uncompressed_size);
-}
-
-inline SelectionModelOptionsT *SelectionModelOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new SelectionModelOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void SelectionModelOptions::UnPackTo(SelectionModelOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = strip_unpaired_brackets(); _o->strip_unpaired_brackets = _e; };
-  { auto _e = symmetry_context_size(); _o->symmetry_context_size = _e; };
-  { auto _e = batch_size(); _o->batch_size = _e; };
-  { auto _e = always_classify_suggested_selection(); _o->always_classify_suggested_selection = _e; };
-}
-
-inline flatbuffers::Offset<SelectionModelOptions> SelectionModelOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const SelectionModelOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateSelectionModelOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<SelectionModelOptions> CreateSelectionModelOptions(flatbuffers::FlatBufferBuilder &_fbb, const SelectionModelOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const SelectionModelOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _strip_unpaired_brackets = _o->strip_unpaired_brackets;
-  auto _symmetry_context_size = _o->symmetry_context_size;
-  auto _batch_size = _o->batch_size;
-  auto _always_classify_suggested_selection = _o->always_classify_suggested_selection;
-  return libtextclassifier2::CreateSelectionModelOptions(
-      _fbb,
-      _strip_unpaired_brackets,
-      _symmetry_context_size,
-      _batch_size,
-      _always_classify_suggested_selection);
-}
-
-inline ClassificationModelOptionsT *ClassificationModelOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ClassificationModelOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ClassificationModelOptions::UnPackTo(ClassificationModelOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = phone_min_num_digits(); _o->phone_min_num_digits = _e; };
-  { auto _e = phone_max_num_digits(); _o->phone_max_num_digits = _e; };
-  { auto _e = address_min_num_tokens(); _o->address_min_num_tokens = _e; };
-  { auto _e = max_num_tokens(); _o->max_num_tokens = _e; };
-}
-
-inline flatbuffers::Offset<ClassificationModelOptions> ClassificationModelOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ClassificationModelOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateClassificationModelOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ClassificationModelOptions> CreateClassificationModelOptions(flatbuffers::FlatBufferBuilder &_fbb, const ClassificationModelOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ClassificationModelOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _phone_min_num_digits = _o->phone_min_num_digits;
-  auto _phone_max_num_digits = _o->phone_max_num_digits;
-  auto _address_min_num_tokens = _o->address_min_num_tokens;
-  auto _max_num_tokens = _o->max_num_tokens;
-  return libtextclassifier2::CreateClassificationModelOptions(
-      _fbb,
-      _phone_min_num_digits,
-      _phone_max_num_digits,
-      _address_min_num_tokens,
-      _max_num_tokens);
-}
-
-namespace RegexModel_ {
-
-inline PatternT *Pattern::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new PatternT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Pattern::UnPackTo(PatternT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = collection_name(); if (_e) _o->collection_name = _e->str(); };
-  { auto _e = pattern(); if (_e) _o->pattern = _e->str(); };
-  { auto _e = enabled_modes(); _o->enabled_modes = _e; };
-  { auto _e = target_classification_score(); _o->target_classification_score = _e; };
-  { auto _e = priority_score(); _o->priority_score = _e; };
-  { auto _e = use_approximate_matching(); _o->use_approximate_matching = _e; };
-  { auto _e = compressed_pattern(); if (_e) _o->compressed_pattern = std::unique_ptr<libtextclassifier2::CompressedBufferT>(_e->UnPack(_resolver)); };
-}
-
-inline flatbuffers::Offset<Pattern> Pattern::Pack(flatbuffers::FlatBufferBuilder &_fbb, const PatternT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreatePattern(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Pattern> CreatePattern(flatbuffers::FlatBufferBuilder &_fbb, const PatternT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const PatternT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _collection_name = _o->collection_name.empty() ? 0 : _fbb.CreateString(_o->collection_name);
-  auto _pattern = _o->pattern.empty() ? 0 : _fbb.CreateString(_o->pattern);
-  auto _enabled_modes = _o->enabled_modes;
-  auto _target_classification_score = _o->target_classification_score;
-  auto _priority_score = _o->priority_score;
-  auto _use_approximate_matching = _o->use_approximate_matching;
-  auto _compressed_pattern = _o->compressed_pattern ? CreateCompressedBuffer(_fbb, _o->compressed_pattern.get(), _rehasher) : 0;
-  return libtextclassifier2::RegexModel_::CreatePattern(
-      _fbb,
-      _collection_name,
-      _pattern,
-      _enabled_modes,
-      _target_classification_score,
-      _priority_score,
-      _use_approximate_matching,
-      _compressed_pattern);
-}
-
-}  // namespace RegexModel_
-
-inline RegexModelT *RegexModel::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new RegexModelT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void RegexModel::UnPackTo(RegexModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = patterns(); if (_e) { _o->patterns.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->patterns[_i] = std::unique_ptr<libtextclassifier2::RegexModel_::PatternT>(_e->Get(_i)->UnPack(_resolver)); } } };
-}
-
-inline flatbuffers::Offset<RegexModel> RegexModel::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RegexModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRegexModel(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<RegexModel> CreateRegexModel(flatbuffers::FlatBufferBuilder &_fbb, const RegexModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RegexModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _patterns = _o->patterns.size() ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::RegexModel_::Pattern>> (_o->patterns.size(), [](size_t i, _VectorArgs *__va) { return CreatePattern(*__va->__fbb, __va->__o->patterns[i].get(), __va->__rehasher); }, &_va ) : 0;
-  return libtextclassifier2::CreateRegexModel(
-      _fbb,
-      _patterns);
-}
-
-namespace DatetimeModelPattern_ {
-
-inline RegexT *Regex::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new RegexT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Regex::UnPackTo(RegexT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = pattern(); if (_e) _o->pattern = _e->str(); };
-  { auto _e = groups(); if (_e) { _o->groups.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->groups[_i] = (DatetimeGroupType)_e->Get(_i); } } };
-  { auto _e = compressed_pattern(); if (_e) _o->compressed_pattern = std::unique_ptr<libtextclassifier2::CompressedBufferT>(_e->UnPack(_resolver)); };
-}
-
-inline flatbuffers::Offset<Regex> Regex::Pack(flatbuffers::FlatBufferBuilder &_fbb, const RegexT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateRegex(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Regex> CreateRegex(flatbuffers::FlatBufferBuilder &_fbb, const RegexT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const RegexT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _pattern = _o->pattern.empty() ? 0 : _fbb.CreateString(_o->pattern);
-  auto _groups = _o->groups.size() ? _fbb.CreateVector((const int32_t*)_o->groups.data(), _o->groups.size()) : 0;
-  auto _compressed_pattern = _o->compressed_pattern ? CreateCompressedBuffer(_fbb, _o->compressed_pattern.get(), _rehasher) : 0;
-  return libtextclassifier2::DatetimeModelPattern_::CreateRegex(
-      _fbb,
-      _pattern,
-      _groups,
-      _compressed_pattern);
-}
-
-}  // namespace DatetimeModelPattern_
-
-inline DatetimeModelPatternT *DatetimeModelPattern::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DatetimeModelPatternT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DatetimeModelPattern::UnPackTo(DatetimeModelPatternT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = regexes(); if (_e) { _o->regexes.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->regexes[_i] = std::unique_ptr<libtextclassifier2::DatetimeModelPattern_::RegexT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = locales(); if (_e) { _o->locales.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->locales[_i] = _e->Get(_i); } } };
-  { auto _e = target_classification_score(); _o->target_classification_score = _e; };
-  { auto _e = priority_score(); _o->priority_score = _e; };
-  { auto _e = enabled_modes(); _o->enabled_modes = _e; };
-}
-
-inline flatbuffers::Offset<DatetimeModelPattern> DatetimeModelPattern::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelPatternT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDatetimeModelPattern(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DatetimeModelPattern> CreateDatetimeModelPattern(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelPatternT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DatetimeModelPatternT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _regexes = _o->regexes.size() ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::DatetimeModelPattern_::Regex>> (_o->regexes.size(), [](size_t i, _VectorArgs *__va) { return CreateRegex(*__va->__fbb, __va->__o->regexes[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _locales = _o->locales.size() ? _fbb.CreateVector(_o->locales) : 0;
-  auto _target_classification_score = _o->target_classification_score;
-  auto _priority_score = _o->priority_score;
-  auto _enabled_modes = _o->enabled_modes;
-  return libtextclassifier2::CreateDatetimeModelPattern(
-      _fbb,
-      _regexes,
-      _locales,
-      _target_classification_score,
-      _priority_score,
-      _enabled_modes);
-}
-
-inline DatetimeModelExtractorT *DatetimeModelExtractor::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DatetimeModelExtractorT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DatetimeModelExtractor::UnPackTo(DatetimeModelExtractorT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = extractor(); _o->extractor = _e; };
-  { auto _e = pattern(); if (_e) _o->pattern = _e->str(); };
-  { auto _e = locales(); if (_e) { _o->locales.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->locales[_i] = _e->Get(_i); } } };
-  { auto _e = compressed_pattern(); if (_e) _o->compressed_pattern = std::unique_ptr<CompressedBufferT>(_e->UnPack(_resolver)); };
-}
-
-inline flatbuffers::Offset<DatetimeModelExtractor> DatetimeModelExtractor::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelExtractorT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDatetimeModelExtractor(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DatetimeModelExtractor> CreateDatetimeModelExtractor(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelExtractorT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DatetimeModelExtractorT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _extractor = _o->extractor;
-  auto _pattern = _o->pattern.empty() ? 0 : _fbb.CreateString(_o->pattern);
-  auto _locales = _o->locales.size() ? _fbb.CreateVector(_o->locales) : 0;
-  auto _compressed_pattern = _o->compressed_pattern ? CreateCompressedBuffer(_fbb, _o->compressed_pattern.get(), _rehasher) : 0;
-  return libtextclassifier2::CreateDatetimeModelExtractor(
-      _fbb,
-      _extractor,
-      _pattern,
-      _locales,
-      _compressed_pattern);
-}
-
-inline DatetimeModelT *DatetimeModel::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DatetimeModelT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DatetimeModel::UnPackTo(DatetimeModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = locales(); if (_e) { _o->locales.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->locales[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = patterns(); if (_e) { _o->patterns.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->patterns[_i] = std::unique_ptr<DatetimeModelPatternT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = extractors(); if (_e) { _o->extractors.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->extractors[_i] = std::unique_ptr<DatetimeModelExtractorT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = use_extractors_for_locating(); _o->use_extractors_for_locating = _e; };
-  { auto _e = default_locales(); if (_e) { _o->default_locales.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->default_locales[_i] = _e->Get(_i); } } };
-}
-
-inline flatbuffers::Offset<DatetimeModel> DatetimeModel::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDatetimeModel(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DatetimeModel> CreateDatetimeModel(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DatetimeModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _locales = _o->locales.size() ? _fbb.CreateVectorOfStrings(_o->locales) : 0;
-  auto _patterns = _o->patterns.size() ? _fbb.CreateVector<flatbuffers::Offset<DatetimeModelPattern>> (_o->patterns.size(), [](size_t i, _VectorArgs *__va) { return CreateDatetimeModelPattern(*__va->__fbb, __va->__o->patterns[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _extractors = _o->extractors.size() ? _fbb.CreateVector<flatbuffers::Offset<DatetimeModelExtractor>> (_o->extractors.size(), [](size_t i, _VectorArgs *__va) { return CreateDatetimeModelExtractor(*__va->__fbb, __va->__o->extractors[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _use_extractors_for_locating = _o->use_extractors_for_locating;
-  auto _default_locales = _o->default_locales.size() ? _fbb.CreateVector(_o->default_locales) : 0;
-  return libtextclassifier2::CreateDatetimeModel(
-      _fbb,
-      _locales,
-      _patterns,
-      _extractors,
-      _use_extractors_for_locating,
-      _default_locales);
-}
-
-namespace DatetimeModelLibrary_ {
-
-inline ItemT *Item::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ItemT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Item::UnPackTo(ItemT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = key(); if (_e) _o->key = _e->str(); };
-  { auto _e = value(); if (_e) _o->value = std::unique_ptr<libtextclassifier2::DatetimeModelT>(_e->UnPack(_resolver)); };
-}
-
-inline flatbuffers::Offset<Item> Item::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ItemT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateItem(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Item> CreateItem(flatbuffers::FlatBufferBuilder &_fbb, const ItemT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ItemT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _key = _o->key.empty() ? 0 : _fbb.CreateString(_o->key);
-  auto _value = _o->value ? CreateDatetimeModel(_fbb, _o->value.get(), _rehasher) : 0;
-  return libtextclassifier2::DatetimeModelLibrary_::CreateItem(
-      _fbb,
-      _key,
-      _value);
-}
-
-}  // namespace DatetimeModelLibrary_
-
-inline DatetimeModelLibraryT *DatetimeModelLibrary::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new DatetimeModelLibraryT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void DatetimeModelLibrary::UnPackTo(DatetimeModelLibraryT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = models(); if (_e) { _o->models.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->models[_i] = std::unique_ptr<libtextclassifier2::DatetimeModelLibrary_::ItemT>(_e->Get(_i)->UnPack(_resolver)); } } };
-}
-
-inline flatbuffers::Offset<DatetimeModelLibrary> DatetimeModelLibrary::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelLibraryT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateDatetimeModelLibrary(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<DatetimeModelLibrary> CreateDatetimeModelLibrary(flatbuffers::FlatBufferBuilder &_fbb, const DatetimeModelLibraryT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DatetimeModelLibraryT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _models = _o->models.size() ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::DatetimeModelLibrary_::Item>> (_o->models.size(), [](size_t i, _VectorArgs *__va) { return CreateItem(*__va->__fbb, __va->__o->models[i].get(), __va->__rehasher); }, &_va ) : 0;
-  return libtextclassifier2::CreateDatetimeModelLibrary(
-      _fbb,
-      _models);
-}
-
-inline ModelTriggeringOptionsT *ModelTriggeringOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ModelTriggeringOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void ModelTriggeringOptions::UnPackTo(ModelTriggeringOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = min_annotate_confidence(); _o->min_annotate_confidence = _e; };
-  { auto _e = enabled_modes(); _o->enabled_modes = _e; };
-}
-
-inline flatbuffers::Offset<ModelTriggeringOptions> ModelTriggeringOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelTriggeringOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateModelTriggeringOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<ModelTriggeringOptions> CreateModelTriggeringOptions(flatbuffers::FlatBufferBuilder &_fbb, const ModelTriggeringOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelTriggeringOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _min_annotate_confidence = _o->min_annotate_confidence;
-  auto _enabled_modes = _o->enabled_modes;
-  return libtextclassifier2::CreateModelTriggeringOptions(
-      _fbb,
-      _min_annotate_confidence,
-      _enabled_modes);
-}
-
-inline OutputOptionsT *OutputOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new OutputOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void OutputOptions::UnPackTo(OutputOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = filtered_collections_annotation(); if (_e) { _o->filtered_collections_annotation.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->filtered_collections_annotation[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = filtered_collections_classification(); if (_e) { _o->filtered_collections_classification.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->filtered_collections_classification[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = filtered_collections_selection(); if (_e) { _o->filtered_collections_selection.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->filtered_collections_selection[_i] = _e->Get(_i)->str(); } } };
-}
-
-inline flatbuffers::Offset<OutputOptions> OutputOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const OutputOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateOutputOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<OutputOptions> CreateOutputOptions(flatbuffers::FlatBufferBuilder &_fbb, const OutputOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const OutputOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _filtered_collections_annotation = _o->filtered_collections_annotation.size() ? _fbb.CreateVectorOfStrings(_o->filtered_collections_annotation) : 0;
-  auto _filtered_collections_classification = _o->filtered_collections_classification.size() ? _fbb.CreateVectorOfStrings(_o->filtered_collections_classification) : 0;
-  auto _filtered_collections_selection = _o->filtered_collections_selection.size() ? _fbb.CreateVectorOfStrings(_o->filtered_collections_selection) : 0;
-  return libtextclassifier2::CreateOutputOptions(
-      _fbb,
-      _filtered_collections_annotation,
-      _filtered_collections_classification,
-      _filtered_collections_selection);
-}
-
-inline ModelT *Model::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new ModelT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void Model::UnPackTo(ModelT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = locales(); if (_e) _o->locales = _e->str(); };
-  { auto _e = version(); _o->version = _e; };
-  { auto _e = name(); if (_e) _o->name = _e->str(); };
-  { auto _e = selection_feature_options(); if (_e) _o->selection_feature_options = std::unique_ptr<FeatureProcessorOptionsT>(_e->UnPack(_resolver)); };
-  { auto _e = classification_feature_options(); if (_e) _o->classification_feature_options = std::unique_ptr<FeatureProcessorOptionsT>(_e->UnPack(_resolver)); };
-  { auto _e = selection_model(); if (_e) { _o->selection_model.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->selection_model[_i] = _e->Get(_i); } } };
-  { auto _e = classification_model(); if (_e) { _o->classification_model.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->classification_model[_i] = _e->Get(_i); } } };
-  { auto _e = embedding_model(); if (_e) { _o->embedding_model.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->embedding_model[_i] = _e->Get(_i); } } };
-  { auto _e = selection_options(); if (_e) _o->selection_options = std::unique_ptr<SelectionModelOptionsT>(_e->UnPack(_resolver)); };
-  { auto _e = classification_options(); if (_e) _o->classification_options = std::unique_ptr<ClassificationModelOptionsT>(_e->UnPack(_resolver)); };
-  { auto _e = regex_model(); if (_e) _o->regex_model = std::unique_ptr<RegexModelT>(_e->UnPack(_resolver)); };
-  { auto _e = datetime_model(); if (_e) _o->datetime_model = std::unique_ptr<DatetimeModelT>(_e->UnPack(_resolver)); };
-  { auto _e = triggering_options(); if (_e) _o->triggering_options = std::unique_ptr<ModelTriggeringOptionsT>(_e->UnPack(_resolver)); };
-  { auto _e = enabled_modes(); _o->enabled_modes = _e; };
-  { auto _e = snap_whitespace_selections(); _o->snap_whitespace_selections = _e; };
-  { auto _e = output_options(); if (_e) _o->output_options = std::unique_ptr<OutputOptionsT>(_e->UnPack(_resolver)); };
-}
-
-inline flatbuffers::Offset<Model> Model::Pack(flatbuffers::FlatBufferBuilder &_fbb, const ModelT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateModel(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<Model> CreateModel(flatbuffers::FlatBufferBuilder &_fbb, const ModelT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const ModelT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _locales = _o->locales.empty() ? 0 : _fbb.CreateString(_o->locales);
-  auto _version = _o->version;
-  auto _name = _o->name.empty() ? 0 : _fbb.CreateString(_o->name);
-  auto _selection_feature_options = _o->selection_feature_options ? CreateFeatureProcessorOptions(_fbb, _o->selection_feature_options.get(), _rehasher) : 0;
-  auto _classification_feature_options = _o->classification_feature_options ? CreateFeatureProcessorOptions(_fbb, _o->classification_feature_options.get(), _rehasher) : 0;
-  auto _selection_model = _o->selection_model.size() ? _fbb.CreateVector(_o->selection_model) : 0;
-  auto _classification_model = _o->classification_model.size() ? _fbb.CreateVector(_o->classification_model) : 0;
-  auto _embedding_model = _o->embedding_model.size() ? _fbb.CreateVector(_o->embedding_model) : 0;
-  auto _selection_options = _o->selection_options ? CreateSelectionModelOptions(_fbb, _o->selection_options.get(), _rehasher) : 0;
-  auto _classification_options = _o->classification_options ? CreateClassificationModelOptions(_fbb, _o->classification_options.get(), _rehasher) : 0;
-  auto _regex_model = _o->regex_model ? CreateRegexModel(_fbb, _o->regex_model.get(), _rehasher) : 0;
-  auto _datetime_model = _o->datetime_model ? CreateDatetimeModel(_fbb, _o->datetime_model.get(), _rehasher) : 0;
-  auto _triggering_options = _o->triggering_options ? CreateModelTriggeringOptions(_fbb, _o->triggering_options.get(), _rehasher) : 0;
-  auto _enabled_modes = _o->enabled_modes;
-  auto _snap_whitespace_selections = _o->snap_whitespace_selections;
-  auto _output_options = _o->output_options ? CreateOutputOptions(_fbb, _o->output_options.get(), _rehasher) : 0;
-  return libtextclassifier2::CreateModel(
-      _fbb,
-      _locales,
-      _version,
-      _name,
-      _selection_feature_options,
-      _classification_feature_options,
-      _selection_model,
-      _classification_model,
-      _embedding_model,
-      _selection_options,
-      _classification_options,
-      _regex_model,
-      _datetime_model,
-      _triggering_options,
-      _enabled_modes,
-      _snap_whitespace_selections,
-      _output_options);
-}
-
-inline TokenizationCodepointRangeT *TokenizationCodepointRange::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TokenizationCodepointRangeT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TokenizationCodepointRange::UnPackTo(TokenizationCodepointRangeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = start(); _o->start = _e; };
-  { auto _e = end(); _o->end = _e; };
-  { auto _e = role(); _o->role = _e; };
-  { auto _e = script_id(); _o->script_id = _e; };
-}
-
-inline flatbuffers::Offset<TokenizationCodepointRange> TokenizationCodepointRange::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TokenizationCodepointRangeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTokenizationCodepointRange(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TokenizationCodepointRange> CreateTokenizationCodepointRange(flatbuffers::FlatBufferBuilder &_fbb, const TokenizationCodepointRangeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TokenizationCodepointRangeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _start = _o->start;
-  auto _end = _o->end;
-  auto _role = _o->role;
-  auto _script_id = _o->script_id;
-  return libtextclassifier2::CreateTokenizationCodepointRange(
-      _fbb,
-      _start,
-      _end,
-      _role,
-      _script_id);
-}
-
-namespace FeatureProcessorOptions_ {
-
-inline CodepointRangeT *CodepointRange::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new CodepointRangeT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void CodepointRange::UnPackTo(CodepointRangeT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = start(); _o->start = _e; };
-  { auto _e = end(); _o->end = _e; };
-}
-
-inline flatbuffers::Offset<CodepointRange> CodepointRange::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CodepointRangeT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateCodepointRange(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<CodepointRange> CreateCodepointRange(flatbuffers::FlatBufferBuilder &_fbb, const CodepointRangeT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CodepointRangeT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _start = _o->start;
-  auto _end = _o->end;
-  return libtextclassifier2::FeatureProcessorOptions_::CreateCodepointRange(
-      _fbb,
-      _start,
-      _end);
-}
-
-inline BoundsSensitiveFeaturesT *BoundsSensitiveFeatures::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new BoundsSensitiveFeaturesT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void BoundsSensitiveFeatures::UnPackTo(BoundsSensitiveFeaturesT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = enabled(); _o->enabled = _e; };
-  { auto _e = num_tokens_before(); _o->num_tokens_before = _e; };
-  { auto _e = num_tokens_inside_left(); _o->num_tokens_inside_left = _e; };
-  { auto _e = num_tokens_inside_right(); _o->num_tokens_inside_right = _e; };
-  { auto _e = num_tokens_after(); _o->num_tokens_after = _e; };
-  { auto _e = include_inside_bag(); _o->include_inside_bag = _e; };
-  { auto _e = include_inside_length(); _o->include_inside_length = _e; };
-  { auto _e = score_single_token_spans_as_zero(); _o->score_single_token_spans_as_zero = _e; };
-}
-
-inline flatbuffers::Offset<BoundsSensitiveFeatures> BoundsSensitiveFeatures::Pack(flatbuffers::FlatBufferBuilder &_fbb, const BoundsSensitiveFeaturesT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateBoundsSensitiveFeatures(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<BoundsSensitiveFeatures> CreateBoundsSensitiveFeatures(flatbuffers::FlatBufferBuilder &_fbb, const BoundsSensitiveFeaturesT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const BoundsSensitiveFeaturesT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _enabled = _o->enabled;
-  auto _num_tokens_before = _o->num_tokens_before;
-  auto _num_tokens_inside_left = _o->num_tokens_inside_left;
-  auto _num_tokens_inside_right = _o->num_tokens_inside_right;
-  auto _num_tokens_after = _o->num_tokens_after;
-  auto _include_inside_bag = _o->include_inside_bag;
-  auto _include_inside_length = _o->include_inside_length;
-  auto _score_single_token_spans_as_zero = _o->score_single_token_spans_as_zero;
-  return libtextclassifier2::FeatureProcessorOptions_::CreateBoundsSensitiveFeatures(
-      _fbb,
-      _enabled,
-      _num_tokens_before,
-      _num_tokens_inside_left,
-      _num_tokens_inside_right,
-      _num_tokens_after,
-      _include_inside_bag,
-      _include_inside_length,
-      _score_single_token_spans_as_zero);
-}
-
-inline AlternativeCollectionMapEntryT *AlternativeCollectionMapEntry::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new AlternativeCollectionMapEntryT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void AlternativeCollectionMapEntry::UnPackTo(AlternativeCollectionMapEntryT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = key(); if (_e) _o->key = _e->str(); };
-  { auto _e = value(); if (_e) _o->value = _e->str(); };
-}
-
-inline flatbuffers::Offset<AlternativeCollectionMapEntry> AlternativeCollectionMapEntry::Pack(flatbuffers::FlatBufferBuilder &_fbb, const AlternativeCollectionMapEntryT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateAlternativeCollectionMapEntry(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<AlternativeCollectionMapEntry> CreateAlternativeCollectionMapEntry(flatbuffers::FlatBufferBuilder &_fbb, const AlternativeCollectionMapEntryT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const AlternativeCollectionMapEntryT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _key = _o->key.empty() ? 0 : _fbb.CreateString(_o->key);
-  auto _value = _o->value.empty() ? 0 : _fbb.CreateString(_o->value);
-  return libtextclassifier2::FeatureProcessorOptions_::CreateAlternativeCollectionMapEntry(
-      _fbb,
-      _key,
-      _value);
-}
-
-}  // namespace FeatureProcessorOptions_
-
-inline FeatureProcessorOptionsT *FeatureProcessorOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new FeatureProcessorOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void FeatureProcessorOptions::UnPackTo(FeatureProcessorOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-  { auto _e = num_buckets(); _o->num_buckets = _e; };
-  { auto _e = embedding_size(); _o->embedding_size = _e; };
-  { auto _e = embedding_quantization_bits(); _o->embedding_quantization_bits = _e; };
-  { auto _e = context_size(); _o->context_size = _e; };
-  { auto _e = max_selection_span(); _o->max_selection_span = _e; };
-  { auto _e = chargram_orders(); if (_e) { _o->chargram_orders.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->chargram_orders[_i] = _e->Get(_i); } } };
-  { auto _e = max_word_length(); _o->max_word_length = _e; };
-  { auto _e = unicode_aware_features(); _o->unicode_aware_features = _e; };
-  { auto _e = extract_case_feature(); _o->extract_case_feature = _e; };
-  { auto _e = extract_selection_mask_feature(); _o->extract_selection_mask_feature = _e; };
-  { auto _e = regexp_feature(); if (_e) { _o->regexp_feature.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->regexp_feature[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = remap_digits(); _o->remap_digits = _e; };
-  { auto _e = lowercase_tokens(); _o->lowercase_tokens = _e; };
-  { auto _e = selection_reduced_output_space(); _o->selection_reduced_output_space = _e; };
-  { auto _e = collections(); if (_e) { _o->collections.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->collections[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = default_collection(); _o->default_collection = _e; };
-  { auto _e = only_use_line_with_click(); _o->only_use_line_with_click = _e; };
-  { auto _e = split_tokens_on_selection_boundaries(); _o->split_tokens_on_selection_boundaries = _e; };
-  { auto _e = tokenization_codepoint_config(); if (_e) { _o->tokenization_codepoint_config.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->tokenization_codepoint_config[_i] = std::unique_ptr<TokenizationCodepointRangeT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = center_token_selection_method(); _o->center_token_selection_method = _e; };
-  { auto _e = snap_label_span_boundaries_to_containing_tokens(); _o->snap_label_span_boundaries_to_containing_tokens = _e; };
-  { auto _e = supported_codepoint_ranges(); if (_e) { _o->supported_codepoint_ranges.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->supported_codepoint_ranges[_i] = std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::CodepointRangeT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = internal_tokenizer_codepoint_ranges(); if (_e) { _o->internal_tokenizer_codepoint_ranges.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->internal_tokenizer_codepoint_ranges[_i] = std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::CodepointRangeT>(_e->Get(_i)->UnPack(_resolver)); } } };
-  { auto _e = min_supported_codepoint_ratio(); _o->min_supported_codepoint_ratio = _e; };
-  { auto _e = feature_version(); _o->feature_version = _e; };
-  { auto _e = tokenization_type(); _o->tokenization_type = _e; };
-  { auto _e = icu_preserve_whitespace_tokens(); _o->icu_preserve_whitespace_tokens = _e; };
-  { auto _e = ignored_span_boundary_codepoints(); if (_e) { _o->ignored_span_boundary_codepoints.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->ignored_span_boundary_codepoints[_i] = _e->Get(_i); } } };
-  { auto _e = bounds_sensitive_features(); if (_e) _o->bounds_sensitive_features = std::unique_ptr<libtextclassifier2::FeatureProcessorOptions_::BoundsSensitiveFeaturesT>(_e->UnPack(_resolver)); };
-  { auto _e = allowed_chargrams(); if (_e) { _o->allowed_chargrams.resize(_e->size()); for (flatbuffers::uoffset_t _i = 0; _i < _e->size(); _i++) { _o->allowed_chargrams[_i] = _e->Get(_i)->str(); } } };
-  { auto _e = tokenize_on_script_change(); _o->tokenize_on_script_change = _e; };
-}
-
-inline flatbuffers::Offset<FeatureProcessorOptions> FeatureProcessorOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const FeatureProcessorOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateFeatureProcessorOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<FeatureProcessorOptions> CreateFeatureProcessorOptions(flatbuffers::FlatBufferBuilder &_fbb, const FeatureProcessorOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const FeatureProcessorOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  auto _num_buckets = _o->num_buckets;
-  auto _embedding_size = _o->embedding_size;
-  auto _embedding_quantization_bits = _o->embedding_quantization_bits;
-  auto _context_size = _o->context_size;
-  auto _max_selection_span = _o->max_selection_span;
-  auto _chargram_orders = _o->chargram_orders.size() ? _fbb.CreateVector(_o->chargram_orders) : 0;
-  auto _max_word_length = _o->max_word_length;
-  auto _unicode_aware_features = _o->unicode_aware_features;
-  auto _extract_case_feature = _o->extract_case_feature;
-  auto _extract_selection_mask_feature = _o->extract_selection_mask_feature;
-  auto _regexp_feature = _o->regexp_feature.size() ? _fbb.CreateVectorOfStrings(_o->regexp_feature) : 0;
-  auto _remap_digits = _o->remap_digits;
-  auto _lowercase_tokens = _o->lowercase_tokens;
-  auto _selection_reduced_output_space = _o->selection_reduced_output_space;
-  auto _collections = _o->collections.size() ? _fbb.CreateVectorOfStrings(_o->collections) : 0;
-  auto _default_collection = _o->default_collection;
-  auto _only_use_line_with_click = _o->only_use_line_with_click;
-  auto _split_tokens_on_selection_boundaries = _o->split_tokens_on_selection_boundaries;
-  auto _tokenization_codepoint_config = _o->tokenization_codepoint_config.size() ? _fbb.CreateVector<flatbuffers::Offset<TokenizationCodepointRange>> (_o->tokenization_codepoint_config.size(), [](size_t i, _VectorArgs *__va) { return CreateTokenizationCodepointRange(*__va->__fbb, __va->__o->tokenization_codepoint_config[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _center_token_selection_method = _o->center_token_selection_method;
-  auto _snap_label_span_boundaries_to_containing_tokens = _o->snap_label_span_boundaries_to_containing_tokens;
-  auto _supported_codepoint_ranges = _o->supported_codepoint_ranges.size() ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> (_o->supported_codepoint_ranges.size(), [](size_t i, _VectorArgs *__va) { return CreateCodepointRange(*__va->__fbb, __va->__o->supported_codepoint_ranges[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _internal_tokenizer_codepoint_ranges = _o->internal_tokenizer_codepoint_ranges.size() ? _fbb.CreateVector<flatbuffers::Offset<libtextclassifier2::FeatureProcessorOptions_::CodepointRange>> (_o->internal_tokenizer_codepoint_ranges.size(), [](size_t i, _VectorArgs *__va) { return CreateCodepointRange(*__va->__fbb, __va->__o->internal_tokenizer_codepoint_ranges[i].get(), __va->__rehasher); }, &_va ) : 0;
-  auto _min_supported_codepoint_ratio = _o->min_supported_codepoint_ratio;
-  auto _feature_version = _o->feature_version;
-  auto _tokenization_type = _o->tokenization_type;
-  auto _icu_preserve_whitespace_tokens = _o->icu_preserve_whitespace_tokens;
-  auto _ignored_span_boundary_codepoints = _o->ignored_span_boundary_codepoints.size() ? _fbb.CreateVector(_o->ignored_span_boundary_codepoints) : 0;
-  auto _bounds_sensitive_features = _o->bounds_sensitive_features ? CreateBoundsSensitiveFeatures(_fbb, _o->bounds_sensitive_features.get(), _rehasher) : 0;
-  auto _allowed_chargrams = _o->allowed_chargrams.size() ? _fbb.CreateVectorOfStrings(_o->allowed_chargrams) : 0;
-  auto _tokenize_on_script_change = _o->tokenize_on_script_change;
-  return libtextclassifier2::CreateFeatureProcessorOptions(
-      _fbb,
-      _num_buckets,
-      _embedding_size,
-      _embedding_quantization_bits,
-      _context_size,
-      _max_selection_span,
-      _chargram_orders,
-      _max_word_length,
-      _unicode_aware_features,
-      _extract_case_feature,
-      _extract_selection_mask_feature,
-      _regexp_feature,
-      _remap_digits,
-      _lowercase_tokens,
-      _selection_reduced_output_space,
-      _collections,
-      _default_collection,
-      _only_use_line_with_click,
-      _split_tokens_on_selection_boundaries,
-      _tokenization_codepoint_config,
-      _center_token_selection_method,
-      _snap_label_span_boundaries_to_containing_tokens,
-      _supported_codepoint_ranges,
-      _internal_tokenizer_codepoint_ranges,
-      _min_supported_codepoint_ratio,
-      _feature_version,
-      _tokenization_type,
-      _icu_preserve_whitespace_tokens,
-      _ignored_span_boundary_codepoints,
-      _bounds_sensitive_features,
-      _allowed_chargrams,
-      _tokenize_on_script_change);
-}
-
-inline const libtextclassifier2::Model *GetModel(const void *buf) {
-  return flatbuffers::GetRoot<libtextclassifier2::Model>(buf);
-}
-
-inline const char *ModelIdentifier() {
-  return "TC2 ";
-}
-
-inline bool ModelBufferHasIdentifier(const void *buf) {
-  return flatbuffers::BufferHasIdentifier(
-      buf, ModelIdentifier());
-}
-
-inline bool VerifyModelBuffer(
-    flatbuffers::Verifier &verifier) {
-  return verifier.VerifyBuffer<libtextclassifier2::Model>(ModelIdentifier());
-}
-
-inline void FinishModelBuffer(
-    flatbuffers::FlatBufferBuilder &fbb,
-    flatbuffers::Offset<libtextclassifier2::Model> root) {
-  fbb.Finish(root, ModelIdentifier());
-}
-
-inline std::unique_ptr<ModelT> UnPackModel(
-    const void *buf,
-    const flatbuffers::resolver_function_t *res = nullptr) {
-  return std::unique_ptr<ModelT>(GetModel(buf)->UnPack(res));
-}
-
-}  // namespace libtextclassifier2
-
-#endif  // FLATBUFFERS_GENERATED_MODEL_LIBTEXTCLASSIFIER2_H_

diff --git a/models/actions_suggestions.model b/models/actions_suggestions.model
new file mode 100644
index 0000000..956eced
--- /dev/null
+++ b/models/actions_suggestions.model
Binary files differ

diff --git a/models/lang_id.model b/models/lang_id.model
new file mode 100644
index 0000000..e577a69
--- /dev/null
+++ b/models/lang_id.model
Binary files differ

diff --git a/models/textclassifier.ar.model b/models/textclassifier.ar.model
index 2342daa..4153026 100644
--- a/models/textclassifier.ar.model
+++ b/models/textclassifier.ar.model
Binary files differ

diff --git a/models/textclassifier.en.model b/models/textclassifier.en.model
index a40f940..887d1df 100644
--- a/models/textclassifier.en.model
+++ b/models/textclassifier.en.model
Binary files differ

diff --git a/models/textclassifier.es.model b/models/textclassifier.es.model
index 7de4e5d..2093b41 100644
--- a/models/textclassifier.es.model
+++ b/models/textclassifier.es.model
Binary files differ

diff --git a/models/textclassifier.fr.model b/models/textclassifier.fr.model
index 1072041..b54345b 100644
--- a/models/textclassifier.fr.model
+++ b/models/textclassifier.fr.model
Binary files differ

diff --git a/models/textclassifier.it.model b/models/textclassifier.it.model
index 5bc98ae..e05d2db 100644
--- a/models/textclassifier.it.model
+++ b/models/textclassifier.it.model
Binary files differ

diff --git a/models/textclassifier.ja.model b/models/textclassifier.ja.model
index 9f60b8a..de10271 100644
--- a/models/textclassifier.ja.model
+++ b/models/textclassifier.ja.model
Binary files differ

diff --git a/models/textclassifier.ko.model b/models/textclassifier.ko.model
index 451df45..00d1bf3 100644
--- a/models/textclassifier.ko.model
+++ b/models/textclassifier.ko.model
Binary files differ

diff --git a/models/textclassifier.nl.model b/models/textclassifier.nl.model
index 07ea076..a733938 100644
--- a/models/textclassifier.nl.model
+++ b/models/textclassifier.nl.model
Binary files differ

diff --git a/models/textclassifier.pl.model b/models/textclassifier.pl.model
index 6cf62a5..3947dc2 100644
--- a/models/textclassifier.pl.model
+++ b/models/textclassifier.pl.model
Binary files differ

diff --git a/models/textclassifier.pt.model b/models/textclassifier.pt.model
index a745d58..b7bb298 100644
--- a/models/textclassifier.pt.model
+++ b/models/textclassifier.pt.model
Binary files differ

diff --git a/models/textclassifier.ru.model b/models/textclassifier.ru.model
index aa97ebc..377f73f 100644
--- a/models/textclassifier.ru.model
+++ b/models/textclassifier.ru.model
Binary files differ

diff --git a/models/textclassifier.th.model b/models/textclassifier.th.model
index 37339b7..41a3a3b 100644
--- a/models/textclassifier.th.model
+++ b/models/textclassifier.th.model
Binary files differ

diff --git a/models/textclassifier.tr.model b/models/textclassifier.tr.model
index 2405d9e..e284388 100644
--- a/models/textclassifier.tr.model
+++ b/models/textclassifier.tr.model
Binary files differ

diff --git a/models/textclassifier.universal.model b/models/textclassifier.universal.model
index 5c4220f..7856747 100644
--- a/models/textclassifier.universal.model
+++ b/models/textclassifier.universal.model
Binary files differ

diff --git a/models/textclassifier.zh-Hant.model b/models/textclassifier.zh-Hant.model
index 32edfe4..dd04f09 100644
--- a/models/textclassifier.zh-Hant.model
+++ b/models/textclassifier.zh-Hant.model
Binary files differ

diff --git a/models/textclassifier.zh.model b/models/textclassifier.zh.model
index eb1ff61..4e5f525 100644
--- a/models/textclassifier.zh.model
+++ b/models/textclassifier.zh.model
Binary files differ

diff --git a/test_data/wrong_embeddings.fb b/test_data/wrong_embeddings.fb
deleted file mode 100644
index e1aa3ea..0000000
--- a/test_data/wrong_embeddings.fb
+++ /dev/null
Binary files differ

diff --git a/textclassifier_jni.cc b/textclassifier_jni.cc
deleted file mode 100644
index 29cf745..0000000
--- a/textclassifier_jni.cc
+++ /dev/null

@@ -1,496 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// JNI wrapper for the TextClassifier.
-
-#include "textclassifier_jni.h"
-
-#include <jni.h>
-#include <type_traits>
-#include <vector>
-
-#include "text-classifier.h"
-#include "util/base/integral_types.h"
-#include "util/java/scoped_local_ref.h"
-#include "util/java/string_utils.h"
-#include "util/memory/mmap.h"
-#include "util/utf8/unilib.h"
-
-using libtextclassifier2::AnnotatedSpan;
-using libtextclassifier2::AnnotationOptions;
-using libtextclassifier2::ClassificationOptions;
-using libtextclassifier2::ClassificationResult;
-using libtextclassifier2::CodepointSpan;
-using libtextclassifier2::JStringToUtf8String;
-using libtextclassifier2::Model;
-using libtextclassifier2::ScopedLocalRef;
-using libtextclassifier2::SelectionOptions;
-using libtextclassifier2::TextClassifier;
-#ifdef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-using libtextclassifier2::UniLib;
-#endif
-
-namespace libtextclassifier2 {
-
-using libtextclassifier2::CodepointSpan;
-
-namespace {
-
-std::string ToStlString(JNIEnv* env, const jstring& str) {
-  std::string result;
-  JStringToUtf8String(env, str, &result);
-  return result;
-}
-
-jobjectArray ClassificationResultsToJObjectArray(
-    JNIEnv* env,
-    const std::vector<ClassificationResult>& classification_result) {
-  const ScopedLocalRef<jclass> result_class(
-      env->FindClass(TC_PACKAGE_PATH TC_CLASS_NAME_STR "$ClassificationResult"),
-      env);
-  if (!result_class) {
-    TC_LOG(ERROR) << "Couldn't find ClassificationResult class.";
-    return nullptr;
-  }
-  const ScopedLocalRef<jclass> datetime_parse_class(
-      env->FindClass(TC_PACKAGE_PATH TC_CLASS_NAME_STR "$DatetimeResult"), env);
-  if (!datetime_parse_class) {
-    TC_LOG(ERROR) << "Couldn't find DatetimeResult class.";
-    return nullptr;
-  }
-
-  const jmethodID result_class_constructor =
-      env->GetMethodID(result_class.get(), "<init>",
-                       "(Ljava/lang/String;FL" TC_PACKAGE_PATH TC_CLASS_NAME_STR
-                       "$DatetimeResult;)V");
-  const jmethodID datetime_parse_class_constructor =
-      env->GetMethodID(datetime_parse_class.get(), "<init>", "(JI)V");
-
-  const jobjectArray results = env->NewObjectArray(classification_result.size(),
-                                                   result_class.get(), nullptr);
-  for (int i = 0; i < classification_result.size(); i++) {
-    jstring row_string =
-        env->NewStringUTF(classification_result[i].collection.c_str());
-    jobject row_datetime_parse = nullptr;
-    if (classification_result[i].datetime_parse_result.IsSet()) {
-      row_datetime_parse = env->NewObject(
-          datetime_parse_class.get(), datetime_parse_class_constructor,
-          classification_result[i].datetime_parse_result.time_ms_utc,
-          classification_result[i].datetime_parse_result.granularity);
-    }
-    jobject result =
-        env->NewObject(result_class.get(), result_class_constructor, row_string,
-                       static_cast<jfloat>(classification_result[i].score),
-                       row_datetime_parse);
-    env->SetObjectArrayElement(results, i, result);
-    env->DeleteLocalRef(result);
-  }
-  return results;
-}
-
-template <typename T, typename F>
-std::pair<bool, T> CallJniMethod0(JNIEnv* env, jobject object,
-                                  jclass class_object, F function,
-                                  const std::string& method_name,
-                                  const std::string& return_java_type) {
-  const jmethodID method = env->GetMethodID(class_object, method_name.c_str(),
-                                            ("()" + return_java_type).c_str());
-  if (!method) {
-    return std::make_pair(false, T());
-  }
-  return std::make_pair(true, (env->*function)(object, method));
-}
-
-SelectionOptions FromJavaSelectionOptions(JNIEnv* env, jobject joptions) {
-  if (!joptions) {
-    return {};
-  }
-
-  const ScopedLocalRef<jclass> options_class(
-      env->FindClass(TC_PACKAGE_PATH TC_CLASS_NAME_STR "$SelectionOptions"),
-      env);
-  const std::pair<bool, jobject> status_or_locales = CallJniMethod0<jobject>(
-      env, joptions, options_class.get(), &JNIEnv::CallObjectMethod,
-      "getLocales", "Ljava/lang/String;");
-  if (!status_or_locales.first) {
-    return {};
-  }
-
-  SelectionOptions options;
-  options.locales =
-      ToStlString(env, reinterpret_cast<jstring>(status_or_locales.second));
-
-  return options;
-}
-
-template <typename T>
-T FromJavaOptionsInternal(JNIEnv* env, jobject joptions,
-                          const std::string& class_name) {
-  if (!joptions) {
-    return {};
-  }
-
-  const ScopedLocalRef<jclass> options_class(env->FindClass(class_name.c_str()),
-                                             env);
-  if (!options_class) {
-    return {};
-  }
-
-  const std::pair<bool, jobject> status_or_locales = CallJniMethod0<jobject>(
-      env, joptions, options_class.get(), &JNIEnv::CallObjectMethod,
-      "getLocale", "Ljava/lang/String;");
-  const std::pair<bool, jobject> status_or_reference_timezone =
-      CallJniMethod0<jobject>(env, joptions, options_class.get(),
-                              &JNIEnv::CallObjectMethod, "getReferenceTimezone",
-                              "Ljava/lang/String;");
-  const std::pair<bool, int64> status_or_reference_time_ms_utc =
-      CallJniMethod0<int64>(env, joptions, options_class.get(),
-                            &JNIEnv::CallLongMethod, "getReferenceTimeMsUtc",
-                            "J");
-
-  if (!status_or_locales.first || !status_or_reference_timezone.first ||
-      !status_or_reference_time_ms_utc.first) {
-    return {};
-  }
-
-  T options;
-  options.locales =
-      ToStlString(env, reinterpret_cast<jstring>(status_or_locales.second));
-  options.reference_timezone = ToStlString(
-      env, reinterpret_cast<jstring>(status_or_reference_timezone.second));
-  options.reference_time_ms_utc = status_or_reference_time_ms_utc.second;
-  return options;
-}
-
-ClassificationOptions FromJavaClassificationOptions(JNIEnv* env,
-                                                    jobject joptions) {
-  return FromJavaOptionsInternal<ClassificationOptions>(
-      env, joptions,
-      TC_PACKAGE_PATH TC_CLASS_NAME_STR "$ClassificationOptions");
-}
-
-AnnotationOptions FromJavaAnnotationOptions(JNIEnv* env, jobject joptions) {
-  return FromJavaOptionsInternal<AnnotationOptions>(
-      env, joptions, TC_PACKAGE_PATH TC_CLASS_NAME_STR "$AnnotationOptions");
-}
-
-CodepointSpan ConvertIndicesBMPUTF8(const std::string& utf8_str,
-                                    CodepointSpan orig_indices,
-                                    bool from_utf8) {
-  const libtextclassifier2::UnicodeText unicode_str =
-      libtextclassifier2::UTF8ToUnicodeText(utf8_str, /*do_copy=*/false);
-
-  int unicode_index = 0;
-  int bmp_index = 0;
-
-  const int* source_index;
-  const int* target_index;
-  if (from_utf8) {
-    source_index = &unicode_index;
-    target_index = &bmp_index;
-  } else {
-    source_index = &bmp_index;
-    target_index = &unicode_index;
-  }
-
-  CodepointSpan result{-1, -1};
-  std::function<void()> assign_indices_fn = [&result, &orig_indices,
-                                             &source_index, &target_index]() {
-    if (orig_indices.first == *source_index) {
-      result.first = *target_index;
-    }
-
-    if (orig_indices.second == *source_index) {
-      result.second = *target_index;
-    }
-  };
-
-  for (auto it = unicode_str.begin(); it != unicode_str.end();
-       ++it, ++unicode_index, ++bmp_index) {
-    assign_indices_fn();
-
-    // There is 1 extra character in the input for each UTF8 character > 0xFFFF.
-    if (*it > 0xFFFF) {
-      ++bmp_index;
-    }
-  }
-  assign_indices_fn();
-
-  return result;
-}
-
-}  // namespace
-
-CodepointSpan ConvertIndicesBMPToUTF8(const std::string& utf8_str,
-                                      CodepointSpan bmp_indices) {
-  return ConvertIndicesBMPUTF8(utf8_str, bmp_indices, /*from_utf8=*/false);
-}
-
-CodepointSpan ConvertIndicesUTF8ToBMP(const std::string& utf8_str,
-                                      CodepointSpan utf8_indices) {
-  return ConvertIndicesBMPUTF8(utf8_str, utf8_indices, /*from_utf8=*/true);
-}
-
-jint GetFdFromAssetFileDescriptor(JNIEnv* env, jobject afd) {
-  // Get system-level file descriptor from AssetFileDescriptor.
-  ScopedLocalRef<jclass> afd_class(
-      env->FindClass("android/content/res/AssetFileDescriptor"), env);
-  if (afd_class == nullptr) {
-    TC_LOG(ERROR) << "Couldn't find AssetFileDescriptor.";
-    return reinterpret_cast<jlong>(nullptr);
-  }
-  jmethodID afd_class_getFileDescriptor = env->GetMethodID(
-      afd_class.get(), "getFileDescriptor", "()Ljava/io/FileDescriptor;");
-  if (afd_class_getFileDescriptor == nullptr) {
-    TC_LOG(ERROR) << "Couldn't find getFileDescriptor.";
-    return reinterpret_cast<jlong>(nullptr);
-  }
-
-  ScopedLocalRef<jclass> fd_class(env->FindClass("java/io/FileDescriptor"),
-                                  env);
-  if (fd_class == nullptr) {
-    TC_LOG(ERROR) << "Couldn't find FileDescriptor.";
-    return reinterpret_cast<jlong>(nullptr);
-  }
-  jfieldID fd_class_descriptor =
-      env->GetFieldID(fd_class.get(), "descriptor", "I");
-  if (fd_class_descriptor == nullptr) {
-    TC_LOG(ERROR) << "Couldn't find descriptor.";
-    return reinterpret_cast<jlong>(nullptr);
-  }
-
-  jobject bundle_jfd = env->CallObjectMethod(afd, afd_class_getFileDescriptor);
-  return env->GetIntField(bundle_jfd, fd_class_descriptor);
-}
-
-jstring GetLocalesFromMmap(JNIEnv* env, libtextclassifier2::ScopedMmap* mmap) {
-  if (!mmap->handle().ok()) {
-    return env->NewStringUTF("");
-  }
-  const Model* model = libtextclassifier2::ViewModel(
-      mmap->handle().start(), mmap->handle().num_bytes());
-  if (!model || !model->locales()) {
-    return env->NewStringUTF("");
-  }
-  return env->NewStringUTF(model->locales()->c_str());
-}
-
-jint GetVersionFromMmap(JNIEnv* env, libtextclassifier2::ScopedMmap* mmap) {
-  if (!mmap->handle().ok()) {
-    return 0;
-  }
-  const Model* model = libtextclassifier2::ViewModel(
-      mmap->handle().start(), mmap->handle().num_bytes());
-  if (!model) {
-    return 0;
-  }
-  return model->version();
-}
-
-jstring GetNameFromMmap(JNIEnv* env, libtextclassifier2::ScopedMmap* mmap) {
-  if (!mmap->handle().ok()) {
-    return env->NewStringUTF("");
-  }
-  const Model* model = libtextclassifier2::ViewModel(
-      mmap->handle().start(), mmap->handle().num_bytes());
-  if (!model || !model->name()) {
-    return env->NewStringUTF("");
-  }
-  return env->NewStringUTF(model->name()->c_str());
-}
-
-}  // namespace libtextclassifier2
-
-using libtextclassifier2::ClassificationResultsToJObjectArray;
-using libtextclassifier2::ConvertIndicesBMPToUTF8;
-using libtextclassifier2::ConvertIndicesUTF8ToBMP;
-using libtextclassifier2::FromJavaAnnotationOptions;
-using libtextclassifier2::FromJavaClassificationOptions;
-using libtextclassifier2::FromJavaSelectionOptions;
-using libtextclassifier2::ToStlString;
-
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNew)
-(JNIEnv* env, jobject thiz, jint fd) {
-#ifdef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-  return reinterpret_cast<jlong>(
-      TextClassifier::FromFileDescriptor(fd).release(), new UniLib(env));
-#else
-  return reinterpret_cast<jlong>(
-      TextClassifier::FromFileDescriptor(fd).release());
-#endif
-}
-
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNewFromPath)
-(JNIEnv* env, jobject thiz, jstring path) {
-  const std::string path_str = ToStlString(env, path);
-#ifdef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-  return reinterpret_cast<jlong>(
-      TextClassifier::FromPath(path_str, new UniLib(env)).release());
-#else
-  return reinterpret_cast<jlong>(TextClassifier::FromPath(path_str).release());
-#endif
-}
-
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNewFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
-  const jint fd = libtextclassifier2::GetFdFromAssetFileDescriptor(env, afd);
-#ifdef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-  return reinterpret_cast<jlong>(
-      TextClassifier::FromFileDescriptor(fd, offset, size, new UniLib(env))
-          .release());
-#else
-  return reinterpret_cast<jlong>(
-      TextClassifier::FromFileDescriptor(fd, offset, size).release());
-#endif
-}
-
-JNI_METHOD(jintArray, TC_CLASS_NAME, nativeSuggestSelection)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
- jint selection_end, jobject options) {
-  if (!ptr) {
-    return nullptr;
-  }
-
-  TextClassifier* model = reinterpret_cast<TextClassifier*>(ptr);
-
-  const std::string context_utf8 = ToStlString(env, context);
-  CodepointSpan input_indices =
-      ConvertIndicesBMPToUTF8(context_utf8, {selection_begin, selection_end});
-  CodepointSpan selection = model->SuggestSelection(
-      context_utf8, input_indices, FromJavaSelectionOptions(env, options));
-  selection = ConvertIndicesUTF8ToBMP(context_utf8, selection);
-
-  jintArray result = env->NewIntArray(2);
-  env->SetIntArrayRegion(result, 0, 1, &(std::get<0>(selection)));
-  env->SetIntArrayRegion(result, 1, 1, &(std::get<1>(selection)));
-  return result;
-}
-
-JNI_METHOD(jobjectArray, TC_CLASS_NAME, nativeClassifyText)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
- jint selection_end, jobject options) {
-  if (!ptr) {
-    return nullptr;
-  }
-  TextClassifier* ff_model = reinterpret_cast<TextClassifier*>(ptr);
-
-  const std::string context_utf8 = ToStlString(env, context);
-  const CodepointSpan input_indices =
-      ConvertIndicesBMPToUTF8(context_utf8, {selection_begin, selection_end});
-  const std::vector<ClassificationResult> classification_result =
-      ff_model->ClassifyText(context_utf8, input_indices,
-                             FromJavaClassificationOptions(env, options));
-
-  return ClassificationResultsToJObjectArray(env, classification_result);
-}
-
-JNI_METHOD(jobjectArray, TC_CLASS_NAME, nativeAnnotate)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options) {
-  if (!ptr) {
-    return nullptr;
-  }
-  TextClassifier* model = reinterpret_cast<TextClassifier*>(ptr);
-  std::string context_utf8 = ToStlString(env, context);
-  std::vector<AnnotatedSpan> annotations =
-      model->Annotate(context_utf8, FromJavaAnnotationOptions(env, options));
-
-  jclass result_class =
-      env->FindClass(TC_PACKAGE_PATH TC_CLASS_NAME_STR "$AnnotatedSpan");
-  if (!result_class) {
-    TC_LOG(ERROR) << "Couldn't find result class: "
-                  << TC_PACKAGE_PATH TC_CLASS_NAME_STR "$AnnotatedSpan";
-    return nullptr;
-  }
-
-  jmethodID result_class_constructor = env->GetMethodID(
-      result_class, "<init>",
-      "(II[L" TC_PACKAGE_PATH TC_CLASS_NAME_STR "$ClassificationResult;)V");
-
-  jobjectArray results =
-      env->NewObjectArray(annotations.size(), result_class, nullptr);
-
-  for (int i = 0; i < annotations.size(); ++i) {
-    CodepointSpan span_bmp =
-        ConvertIndicesUTF8ToBMP(context_utf8, annotations[i].span);
-    jobject result = env->NewObject(
-        result_class, result_class_constructor,
-        static_cast<jint>(span_bmp.first), static_cast<jint>(span_bmp.second),
-        ClassificationResultsToJObjectArray(env,
-
-                                            annotations[i].classification));
-    env->SetObjectArrayElement(results, i, result);
-    env->DeleteLocalRef(result);
-  }
-  env->DeleteLocalRef(result_class);
-  return results;
-}
-
-JNI_METHOD(void, TC_CLASS_NAME, nativeClose)
-(JNIEnv* env, jobject thiz, jlong ptr) {
-  TextClassifier* model = reinterpret_cast<TextClassifier*>(ptr);
-  delete model;
-}
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLanguage)
-(JNIEnv* env, jobject clazz, jint fd) {
-  TC_LOG(WARNING) << "Using deprecated getLanguage().";
-  return JNI_METHOD_NAME(TC_CLASS_NAME, nativeGetLocales)(env, clazz, fd);
-}
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLocales)
-(JNIEnv* env, jobject clazz, jint fd) {
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd));
-  return GetLocalesFromMmap(env, mmap.get());
-}
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLocalesFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
-  const jint fd = libtextclassifier2::GetFdFromAssetFileDescriptor(env, afd);
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd, offset, size));
-  return GetLocalesFromMmap(env, mmap.get());
-}
-
-JNI_METHOD(jint, TC_CLASS_NAME, nativeGetVersion)
-(JNIEnv* env, jobject clazz, jint fd) {
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd));
-  return GetVersionFromMmap(env, mmap.get());
-}
-
-JNI_METHOD(jint, TC_CLASS_NAME, nativeGetVersionFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
-  const jint fd = libtextclassifier2::GetFdFromAssetFileDescriptor(env, afd);
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd, offset, size));
-  return GetVersionFromMmap(env, mmap.get());
-}
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetName)
-(JNIEnv* env, jobject clazz, jint fd) {
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd));
-  return GetNameFromMmap(env, mmap.get());
-}
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetNameFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size) {
-  const jint fd = libtextclassifier2::GetFdFromAssetFileDescriptor(env, afd);
-  const std::unique_ptr<libtextclassifier2::ScopedMmap> mmap(
-      new libtextclassifier2::ScopedMmap(fd, offset, size));
-  return GetNameFromMmap(env, mmap.get());
-}

diff --git a/textclassifier_jni.h b/textclassifier_jni.h
deleted file mode 100644
index d6e742e..0000000
--- a/textclassifier_jni.h
+++ /dev/null

@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_
-#define LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_
-
-#include <jni.h>
-#include <string>
-
-#include "types.h"
-
-// When we use a macro as an argument for a macro, an additional level of
-// indirection is needed, if the macro argument is used with # or ##.
-#define ADD_QUOTES_HELPER(TOKEN) #TOKEN
-#define ADD_QUOTES(TOKEN) ADD_QUOTES_HELPER(TOKEN)
-
-#ifndef TC_PACKAGE_NAME
-#define TC_PACKAGE_NAME android_view_textclassifier
-#endif
-
-#ifndef TC_CLASS_NAME
-#define TC_CLASS_NAME TextClassifierImplNative
-#endif
-#define TC_CLASS_NAME_STR ADD_QUOTES(TC_CLASS_NAME)
-
-#ifndef TC_PACKAGE_PATH
-#define TC_PACKAGE_PATH "android/view/textclassifier/"
-#endif
-
-#define JNI_METHOD_NAME_INTERNAL(package_name, class_name, method_name) \
-  Java_##package_name##_##class_name##_##method_name
-
-#define JNI_METHOD_PRIMITIVE(return_type, package_name, class_name, \
-                             method_name)                           \
-  JNIEXPORT return_type JNICALL JNI_METHOD_NAME_INTERNAL(           \
-      package_name, class_name, method_name)
-
-// The indirection is needed to correctly expand the TC_PACKAGE_NAME macro.
-// See the explanation near ADD_QUOTES macro.
-#define JNI_METHOD2(return_type, package_name, class_name, method_name) \
-  JNI_METHOD_PRIMITIVE(return_type, package_name, class_name, method_name)
-
-#define JNI_METHOD(return_type, class_name, method_name) \
-  JNI_METHOD2(return_type, TC_PACKAGE_NAME, class_name, method_name)
-
-#define JNI_METHOD_NAME2(package_name, class_name, method_name) \
-  JNI_METHOD_NAME_INTERNAL(package_name, class_name, method_name)
-
-#define JNI_METHOD_NAME(class_name, method_name) \
-  JNI_METHOD_NAME2(TC_PACKAGE_NAME, class_name, method_name)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// SmartSelection.
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNew)
-(JNIEnv* env, jobject thiz, jint fd);
-
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNewFromPath)
-(JNIEnv* env, jobject thiz, jstring path);
-
-JNI_METHOD(jlong, TC_CLASS_NAME, nativeNewFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
-
-JNI_METHOD(jintArray, TC_CLASS_NAME, nativeSuggestSelection)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
- jint selection_end, jobject options);
-
-JNI_METHOD(jobjectArray, TC_CLASS_NAME, nativeClassifyText)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jint selection_begin,
- jint selection_end, jobject options);
-
-JNI_METHOD(jobjectArray, TC_CLASS_NAME, nativeAnnotate)
-(JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options);
-
-JNI_METHOD(void, TC_CLASS_NAME, nativeClose)
-(JNIEnv* env, jobject thiz, jlong ptr);
-
-// DEPRECATED. Use nativeGetLocales instead.
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLanguage)
-(JNIEnv* env, jobject clazz, jint fd);
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLocales)
-(JNIEnv* env, jobject clazz, jint fd);
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetLocalesFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
-
-JNI_METHOD(jint, TC_CLASS_NAME, nativeGetVersion)
-(JNIEnv* env, jobject clazz, jint fd);
-
-JNI_METHOD(jint, TC_CLASS_NAME, nativeGetVersionFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetName)
-(JNIEnv* env, jobject clazz, jint fd);
-
-JNI_METHOD(jstring, TC_CLASS_NAME, nativeGetNameFromAssetFileDescriptor)
-(JNIEnv* env, jobject thiz, jobject afd, jlong offset, jlong size);
-
-#ifdef __cplusplus
-}
-#endif
-
-namespace libtextclassifier2 {
-
-// Given a utf8 string and a span expressed in Java BMP (basic multilingual
-// plane) codepoints, converts it to a span expressed in utf8 codepoints.
-libtextclassifier2::CodepointSpan ConvertIndicesBMPToUTF8(
-    const std::string& utf8_str, libtextclassifier2::CodepointSpan bmp_indices);
-
-// Given a utf8 string and a span expressed in utf8 codepoints, converts it to a
-// span expressed in Java BMP (basic multilingual plane) codepoints.
-libtextclassifier2::CodepointSpan ConvertIndicesUTF8ToBMP(
-    const std::string& utf8_str,
-    libtextclassifier2::CodepointSpan utf8_indices);
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_TEXTCLASSIFIER_JNI_H_

diff --git a/util/base/logging.h b/util/base/logging.h
deleted file mode 100644
index 4391d46..0000000
--- a/util/base/logging.h
+++ /dev/null

@@ -1,167 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_H_
-
-#include <cassert>
-#include <string>
-
-#include "util/base/logging_levels.h"
-#include "util/base/port.h"
-
-
-namespace libtextclassifier2 {
-namespace logging {
-
-// A tiny code footprint string stream for assembling log messages.
-struct LoggingStringStream {
-  LoggingStringStream() {}
-  LoggingStringStream &stream() { return *this; }
-  // Needed for invocation in TC_CHECK macro.
-  explicit operator bool() const { return true; }
-
-  std::string message;
-};
-
-template <typename T>
-inline LoggingStringStream &operator<<(LoggingStringStream &stream,
-                                       const T &entry) {
-  stream.message.append(std::to_string(entry));
-  return stream;
-}
-
-inline LoggingStringStream &operator<<(LoggingStringStream &stream,
-                                       const char *message) {
-  stream.message.append(message);
-  return stream;
-}
-
-#if defined(HAS_GLOBAL_STRING)
-inline LoggingStringStream &operator<<(LoggingStringStream &stream,
-                                       const ::string &message) {
-  stream.message.append(message);
-  return stream;
-}
-#endif
-
-inline LoggingStringStream &operator<<(LoggingStringStream &stream,
-                                       const std::string &message) {
-  stream.message.append(message);
-  return stream;
-}
-
-// The class that does all the work behind our TC_LOG(severity) macros.  Each
-// TC_LOG(severity) << obj1 << obj2 << ...; logging statement creates a
-// LogMessage temporary object containing a stringstream.  Each operator<< adds
-// info to that stringstream and the LogMessage destructor performs the actual
-// logging.  The reason this works is that in C++, "all temporary objects are
-// destroyed as the last step in evaluating the full-expression that (lexically)
-// contains the point where they were created."  For more info, see
-// http://en.cppreference.com/w/cpp/language/lifetime.  Hence, the destructor is
-// invoked after the last << from that logging statement.
-class LogMessage {
- public:
-  LogMessage(LogSeverity severity, const char *file_name,
-             int line_number) TC_ATTRIBUTE_NOINLINE;
-
-  ~LogMessage() TC_ATTRIBUTE_NOINLINE;
-
-  // Returns the stream associated with the logger object.
-  LoggingStringStream &stream() { return stream_; }
-
- private:
-  const LogSeverity severity_;
-
-  // Stream that "prints" all info into a string (not to a file).  We construct
-  // here the entire logging message and next print it in one operation.
-  LoggingStringStream stream_;
-};
-
-// Pseudo-stream that "eats" the tokens <<-pumped into it, without printing
-// anything.
-class NullStream {
- public:
-  NullStream() {}
-  NullStream &stream() { return *this; }
-};
-template <typename T>
-inline NullStream &operator<<(NullStream &str, const T &) {
-  return str;
-}
-
-}  // namespace logging
-}  // namespace libtextclassifier2
-
-#define TC_LOG(severity)                                           \
-  ::libtextclassifier2::logging::LogMessage(                       \
-      ::libtextclassifier2::logging::severity, __FILE__, __LINE__) \
-      .stream()
-
-// If condition x is true, does nothing.  Otherwise, crashes the program (liek
-// LOG(FATAL)) with an informative message.  Can be continued with extra
-// messages, via <<, like any logging macro, e.g.,
-//
-// TC_CHECK(my_cond) << "I think we hit a problem";
-#define TC_CHECK(x)                                                           \
-  (x) || TC_LOG(FATAL) << __FILE__ << ":" << __LINE__ << ": check failed: \"" \
-                       << #x
-
-#define TC_CHECK_EQ(x, y) TC_CHECK((x) == (y))
-#define TC_CHECK_LT(x, y) TC_CHECK((x) < (y))
-#define TC_CHECK_GT(x, y) TC_CHECK((x) > (y))
-#define TC_CHECK_LE(x, y) TC_CHECK((x) <= (y))
-#define TC_CHECK_GE(x, y) TC_CHECK((x) >= (y))
-#define TC_CHECK_NE(x, y) TC_CHECK((x) != (y))
-
-#define TC_NULLSTREAM ::libtextclassifier2::logging::NullStream().stream()
-
-// Debug checks: a TC_DCHECK<suffix> macro should behave like TC_CHECK<suffix>
-// in debug mode an don't check / don't print anything in non-debug mode.
-#ifdef NDEBUG
-
-#define TC_DCHECK(x) TC_NULLSTREAM
-#define TC_DCHECK_EQ(x, y) TC_NULLSTREAM
-#define TC_DCHECK_LT(x, y) TC_NULLSTREAM
-#define TC_DCHECK_GT(x, y) TC_NULLSTREAM
-#define TC_DCHECK_LE(x, y) TC_NULLSTREAM
-#define TC_DCHECK_GE(x, y) TC_NULLSTREAM
-#define TC_DCHECK_NE(x, y) TC_NULLSTREAM
-
-#else  // NDEBUG
-
-// In debug mode, each TC_DCHECK<suffix> is equivalent to TC_CHECK<suffix>,
-// i.e., a real check that crashes when the condition is not true.
-#define TC_DCHECK(x) TC_CHECK(x)
-#define TC_DCHECK_EQ(x, y) TC_CHECK_EQ(x, y)
-#define TC_DCHECK_LT(x, y) TC_CHECK_LT(x, y)
-#define TC_DCHECK_GT(x, y) TC_CHECK_GT(x, y)
-#define TC_DCHECK_LE(x, y) TC_CHECK_LE(x, y)
-#define TC_DCHECK_GE(x, y) TC_CHECK_GE(x, y)
-#define TC_DCHECK_NE(x, y) TC_CHECK_NE(x, y)
-
-#endif  // NDEBUG
-
-#ifdef LIBTEXTCLASSIFIER_VLOG
-#define TC_VLOG(severity)                                      \
-  ::libtextclassifier2::logging::LogMessage(                   \
-      ::libtextclassifier2::logging::INFO, __FILE__, __LINE__) \
-      .stream()
-#else
-#define TC_VLOG(severity) TC_NULLSTREAM
-#endif
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_H_

diff --git a/util/calendar/calendar-icu.cc b/util/calendar/calendar-icu.cc
deleted file mode 100644
index 34ea22d..0000000
--- a/util/calendar/calendar-icu.cc
+++ /dev/null

@@ -1,436 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/calendar/calendar-icu.h"
-
-#include <memory>
-
-#include "util/base/macros.h"
-#include "unicode/gregocal.h"
-#include "unicode/timezone.h"
-#include "unicode/ucal.h"
-
-namespace libtextclassifier2 {
-namespace {
-int MapToDayOfWeekOrDefault(int relation_type, int default_value) {
-  switch (relation_type) {
-    case DateParseData::MONDAY:
-      return UCalendarDaysOfWeek::UCAL_MONDAY;
-    case DateParseData::TUESDAY:
-      return UCalendarDaysOfWeek::UCAL_TUESDAY;
-    case DateParseData::WEDNESDAY:
-      return UCalendarDaysOfWeek::UCAL_WEDNESDAY;
-    case DateParseData::THURSDAY:
-      return UCalendarDaysOfWeek::UCAL_THURSDAY;
-    case DateParseData::FRIDAY:
-      return UCalendarDaysOfWeek::UCAL_FRIDAY;
-    case DateParseData::SATURDAY:
-      return UCalendarDaysOfWeek::UCAL_SATURDAY;
-    case DateParseData::SUNDAY:
-      return UCalendarDaysOfWeek::UCAL_SUNDAY;
-    default:
-      return default_value;
-  }
-}
-
-bool DispatchToRecedeOrToLastDayOfWeek(icu::Calendar* date, int relation_type,
-                                       int distance) {
-  UErrorCode status = U_ZERO_ERROR;
-  switch (relation_type) {
-    case DateParseData::MONDAY:
-    case DateParseData::TUESDAY:
-    case DateParseData::WEDNESDAY:
-    case DateParseData::THURSDAY:
-    case DateParseData::FRIDAY:
-    case DateParseData::SATURDAY:
-    case DateParseData::SUNDAY:
-      for (int i = 0; i < distance; i++) {
-        do {
-          if (U_FAILURE(status)) {
-            TC_LOG(ERROR) << "error day of week";
-            return false;
-          }
-          date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1, status);
-          if (U_FAILURE(status)) {
-            TC_LOG(ERROR) << "error adding a day";
-            return false;
-          }
-        } while (date->get(UCalendarDateFields::UCAL_DAY_OF_WEEK, status) !=
-                 MapToDayOfWeekOrDefault(relation_type, 1));
-      }
-      return true;
-    case DateParseData::DAY:
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, -1 * distance, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a day";
-        return false;
-      }
-
-      return true;
-    case DateParseData::WEEK:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_WEEK, 1);
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, -7 * (distance - 1),
-                status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a week";
-        return false;
-      }
-
-      return true;
-    case DateParseData::MONTH:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1);
-      date->add(UCalendarDateFields::UCAL_MONTH, -1 * (distance - 1), status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a month";
-        return false;
-      }
-      return true;
-    case DateParseData::YEAR:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_YEAR, 1);
-      date->add(UCalendarDateFields::UCAL_YEAR, -1 * (distance - 1), status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a year";
-
-        return true;
-        default:
-          return false;
-      }
-      return false;
-  }
-}
-
-bool DispatchToAdvancerOrToNextOrSameDayOfWeek(icu::Calendar* date,
-                                               int relation_type) {
-  UErrorCode status = U_ZERO_ERROR;
-  switch (relation_type) {
-    case DateParseData::MONDAY:
-    case DateParseData::TUESDAY:
-    case DateParseData::WEDNESDAY:
-    case DateParseData::THURSDAY:
-    case DateParseData::FRIDAY:
-    case DateParseData::SATURDAY:
-    case DateParseData::SUNDAY:
-      while (date->get(UCalendarDateFields::UCAL_DAY_OF_WEEK, status) !=
-             MapToDayOfWeekOrDefault(relation_type, 1)) {
-        if (U_FAILURE(status)) {
-          TC_LOG(ERROR) << "error day of week";
-          return false;
-        }
-        date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1, status);
-        if (U_FAILURE(status)) {
-          TC_LOG(ERROR) << "error adding a day";
-          return false;
-        }
-      }
-      return true;
-    case DateParseData::DAY:
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a day";
-        return false;
-      }
-
-      return true;
-    case DateParseData::WEEK:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_WEEK, 1);
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 7, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a week";
-        return false;
-      }
-
-      return true;
-    case DateParseData::MONTH:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1);
-      date->add(UCalendarDateFields::UCAL_MONTH, 1, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a month";
-        return false;
-      }
-      return true;
-    case DateParseData::YEAR:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_YEAR, 1);
-      date->add(UCalendarDateFields::UCAL_YEAR, 1, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a year";
-
-        return true;
-        default:
-          return false;
-      }
-      return false;
-  }
-}
-
-bool DispatchToAdvancerOrToNextDayOfWeek(icu::Calendar* date, int relation_type,
-                                         int distance) {
-  UErrorCode status = U_ZERO_ERROR;
-  switch (relation_type) {
-    case DateParseData::MONDAY:
-    case DateParseData::TUESDAY:
-    case DateParseData::WEDNESDAY:
-    case DateParseData::THURSDAY:
-    case DateParseData::FRIDAY:
-    case DateParseData::SATURDAY:
-    case DateParseData::SUNDAY:
-      for (int i = 0; i < distance; i++) {
-        do {
-          if (U_FAILURE(status)) {
-            TC_LOG(ERROR) << "error day of week";
-            return false;
-          }
-          date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1, status);
-          if (U_FAILURE(status)) {
-            TC_LOG(ERROR) << "error adding a day";
-            return false;
-          }
-        } while (date->get(UCalendarDateFields::UCAL_DAY_OF_WEEK, status) !=
-                 MapToDayOfWeekOrDefault(relation_type, 1));
-      }
-      return true;
-    case DateParseData::DAY:
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, distance, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a day";
-        return false;
-      }
-
-      return true;
-    case DateParseData::WEEK:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_WEEK, 1);
-      date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 7 * distance, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a week";
-        return false;
-      }
-
-      return true;
-    case DateParseData::MONTH:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1);
-      date->add(UCalendarDateFields::UCAL_MONTH, 1 * distance, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a month";
-        return false;
-      }
-      return true;
-    case DateParseData::YEAR:
-      date->set(UCalendarDateFields::UCAL_DAY_OF_YEAR, 1);
-      date->add(UCalendarDateFields::UCAL_YEAR, 1 * distance, status);
-      if (U_FAILURE(status)) {
-        TC_LOG(ERROR) << "error adding a year";
-
-        return true;
-        default:
-          return false;
-      }
-      return false;
-  }
-}
-
-bool RoundToGranularity(DatetimeGranularity granularity,
-                        icu::Calendar* calendar) {
-  // Force recomputation before doing the rounding.
-  UErrorCode status = U_ZERO_ERROR;
-  calendar->get(UCalendarDateFields::UCAL_DAY_OF_WEEK, status);
-  if (U_FAILURE(status)) {
-    TC_LOG(ERROR) << "Can't interpret date.";
-    return false;
-  }
-
-  switch (granularity) {
-    case GRANULARITY_YEAR:
-      calendar->set(UCalendarDateFields::UCAL_MONTH, 0);
-      TC_FALLTHROUGH_INTENDED;
-    case GRANULARITY_MONTH:
-      calendar->set(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1);
-      TC_FALLTHROUGH_INTENDED;
-    case GRANULARITY_DAY:
-      calendar->set(UCalendarDateFields::UCAL_HOUR, 0);
-      TC_FALLTHROUGH_INTENDED;
-    case GRANULARITY_HOUR:
-      calendar->set(UCalendarDateFields::UCAL_MINUTE, 0);
-      TC_FALLTHROUGH_INTENDED;
-    case GRANULARITY_MINUTE:
-      calendar->set(UCalendarDateFields::UCAL_SECOND, 0);
-      break;
-
-    case GRANULARITY_WEEK:
-      calendar->set(UCalendarDateFields::UCAL_DAY_OF_WEEK,
-                    calendar->getFirstDayOfWeek());
-      calendar->set(UCalendarDateFields::UCAL_HOUR, 0);
-      calendar->set(UCalendarDateFields::UCAL_MINUTE, 0);
-      calendar->set(UCalendarDateFields::UCAL_SECOND, 0);
-      break;
-
-    case GRANULARITY_UNKNOWN:
-    case GRANULARITY_SECOND:
-      break;
-  }
-
-  return true;
-}
-
-}  // namespace
-
-bool CalendarLib::InterpretParseData(const DateParseData& parse_data,
-                                     int64 reference_time_ms_utc,
-                                     const std::string& reference_timezone,
-                                     const std::string& reference_locale,
-                                     DatetimeGranularity granularity,
-                                     int64* interpreted_time_ms_utc) const {
-  UErrorCode status = U_ZERO_ERROR;
-
-  std::unique_ptr<icu::Calendar> date(icu::Calendar::createInstance(
-      icu::Locale::createFromName(reference_locale.c_str()), status));
-  if (U_FAILURE(status)) {
-    TC_LOG(ERROR) << "error getting calendar instance";
-    return false;
-  }
-
-  date->adoptTimeZone(icu::TimeZone::createTimeZone(
-      icu::UnicodeString::fromUTF8(reference_timezone)));
-  date->setTime(reference_time_ms_utc, status);
-
-  // By default, the parsed time is interpreted to be on the reference day. But
-  // a parsed date, should have time 0:00:00 unless specified.
-  date->set(UCalendarDateFields::UCAL_HOUR_OF_DAY, 0);
-  date->set(UCalendarDateFields::UCAL_MINUTE, 0);
-  date->set(UCalendarDateFields::UCAL_SECOND, 0);
-  date->set(UCalendarDateFields::UCAL_MILLISECOND, 0);
-
-  static const int64 kMillisInHour = 1000 * 60 * 60;
-  if (parse_data.field_set_mask & DateParseData::Fields::ZONE_OFFSET_FIELD) {
-    date->set(UCalendarDateFields::UCAL_ZONE_OFFSET,
-              parse_data.zone_offset * kMillisInHour);
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::DST_OFFSET_FIELD) {
-    // convert from hours to milliseconds
-    date->set(UCalendarDateFields::UCAL_DST_OFFSET,
-              parse_data.dst_offset * kMillisInHour);
-  }
-
-  if (parse_data.field_set_mask & DateParseData::Fields::RELATION_FIELD) {
-    switch (parse_data.relation) {
-      case DateParseData::Relation::NEXT:
-        if (parse_data.field_set_mask &
-            DateParseData::Fields::RELATION_TYPE_FIELD) {
-          if (!DispatchToAdvancerOrToNextDayOfWeek(
-                  date.get(), parse_data.relation_type, 1)) {
-            return false;
-          }
-        }
-        break;
-      case DateParseData::Relation::NEXT_OR_SAME:
-        if (parse_data.field_set_mask &
-            DateParseData::Fields::RELATION_TYPE_FIELD) {
-          if (!DispatchToAdvancerOrToNextOrSameDayOfWeek(
-                  date.get(), parse_data.relation_type)) {
-            return false;
-          }
-        }
-        break;
-      case DateParseData::Relation::LAST:
-        if (parse_data.field_set_mask &
-            DateParseData::Fields::RELATION_TYPE_FIELD) {
-          if (!DispatchToRecedeOrToLastDayOfWeek(date.get(),
-                                                 parse_data.relation_type, 1)) {
-            return false;
-          }
-        }
-        break;
-      case DateParseData::Relation::NOW:
-        // NOOP
-        break;
-      case DateParseData::Relation::TOMORROW:
-        date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, 1, status);
-        if (U_FAILURE(status)) {
-          TC_LOG(ERROR) << "error adding a day";
-          return false;
-        }
-        break;
-      case DateParseData::Relation::YESTERDAY:
-        date->add(UCalendarDateFields::UCAL_DAY_OF_MONTH, -1, status);
-        if (U_FAILURE(status)) {
-          TC_LOG(ERROR) << "error subtracting a day";
-          return false;
-        }
-        break;
-      case DateParseData::Relation::PAST:
-        if (parse_data.field_set_mask &
-            DateParseData::Fields::RELATION_TYPE_FIELD) {
-          if (parse_data.field_set_mask &
-              DateParseData::Fields::RELATION_DISTANCE_FIELD) {
-            if (!DispatchToRecedeOrToLastDayOfWeek(
-                    date.get(), parse_data.relation_type,
-                    parse_data.relation_distance)) {
-              return false;
-            }
-          }
-        }
-        break;
-      case DateParseData::Relation::FUTURE:
-        if (parse_data.field_set_mask &
-            DateParseData::Fields::RELATION_TYPE_FIELD) {
-          if (parse_data.field_set_mask &
-              DateParseData::Fields::RELATION_DISTANCE_FIELD) {
-            if (!DispatchToAdvancerOrToNextDayOfWeek(
-                    date.get(), parse_data.relation_type,
-                    parse_data.relation_distance)) {
-              return false;
-            }
-          }
-        }
-        break;
-    }
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::YEAR_FIELD) {
-    date->set(UCalendarDateFields::UCAL_YEAR, parse_data.year);
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::MONTH_FIELD) {
-    // NOTE: Java and ICU disagree on month formats
-    date->set(UCalendarDateFields::UCAL_MONTH, parse_data.month - 1);
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::DAY_FIELD) {
-    date->set(UCalendarDateFields::UCAL_DAY_OF_MONTH, parse_data.day_of_month);
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::HOUR_FIELD) {
-    if (parse_data.field_set_mask & DateParseData::Fields::AMPM_FIELD &&
-        parse_data.ampm == 1 && parse_data.hour < 12) {
-      date->set(UCalendarDateFields::UCAL_HOUR_OF_DAY, parse_data.hour + 12);
-    } else {
-      date->set(UCalendarDateFields::UCAL_HOUR_OF_DAY, parse_data.hour);
-    }
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::MINUTE_FIELD) {
-    date->set(UCalendarDateFields::UCAL_MINUTE, parse_data.minute);
-  }
-  if (parse_data.field_set_mask & DateParseData::Fields::SECOND_FIELD) {
-    date->set(UCalendarDateFields::UCAL_SECOND, parse_data.second);
-  }
-
-  if (!RoundToGranularity(granularity, date.get())) {
-    return false;
-  }
-
-  *interpreted_time_ms_utc = date->getTime(status);
-  if (U_FAILURE(status)) {
-    TC_LOG(ERROR) << "error getting time from instance";
-    return false;
-  }
-
-  return true;
-}
-}  // namespace libtextclassifier2

diff --git a/util/calendar/calendar-icu.h b/util/calendar/calendar-icu.h
deleted file mode 100644
index 8aae7ab..0000000
--- a/util/calendar/calendar-icu.h
+++ /dev/null

@@ -1,41 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_ICU_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_ICU_H_
-
-#include <string>
-
-#include "types.h"
-#include "util/base/integral_types.h"
-#include "util/base/logging.h"
-
-namespace libtextclassifier2 {
-
-class CalendarLib {
- public:
-  // Interprets parse_data as milliseconds since_epoch. Relative times are
-  // resolved against the current time (reference_time_ms_utc). Returns true if
-  // the interpratation was successful, false otherwise.
-  bool InterpretParseData(const DateParseData& parse_data,
-                          int64 reference_time_ms_utc,
-                          const std::string& reference_timezone,
-                          const std::string& reference_locale,
-                          DatetimeGranularity granularity,
-                          int64* interpreted_time_ms_utc) const;
-};
-}  // namespace libtextclassifier2
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_ICU_H_

diff --git a/util/calendar/calendar_test.cc b/util/calendar/calendar_test.cc
deleted file mode 100644
index 1f29106..0000000
--- a/util/calendar/calendar_test.cc
+++ /dev/null

@@ -1,129 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// This test serves the purpose of making sure all the different implementations
-// of the unspoken CalendarLib interface support the same methods.
-
-#include "util/calendar/calendar.h"
-#include "util/base/logging.h"
-
-#include "gtest/gtest.h"
-
-namespace libtextclassifier2 {
-namespace {
-
-TEST(CalendarTest, Interface) {
-  CalendarLib calendar;
-  int64 time;
-  std::string timezone;
-  bool result = calendar.InterpretParseData(
-      DateParseData{0l, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-                    static_cast<DateParseData::Relation>(0),
-                    static_cast<DateParseData::RelationType>(0), 0},
-      0L, "Zurich", "en-CH", GRANULARITY_UNKNOWN, &time);
-  TC_LOG(INFO) << result;
-}
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(CalendarTest, RoundingToGranularity) {
-  CalendarLib calendar;
-  int64 time;
-  std::string timezone;
-  DateParseData data;
-  data.year = 2018;
-  data.month = 4;
-  data.day_of_month = 25;
-  data.hour = 9;
-  data.minute = 33;
-  data.second = 59;
-  data.field_set_mask = DateParseData::YEAR_FIELD | DateParseData::MONTH_FIELD |
-                        DateParseData::DAY_FIELD | DateParseData::HOUR_FIELD |
-                        DateParseData::MINUTE_FIELD |
-                        DateParseData::SECOND_FIELD;
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_YEAR, &time));
-  EXPECT_EQ(time, 1514761200000L /* Jan 01 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_MONTH, &time));
-  EXPECT_EQ(time, 1522533600000L /* Apr 01 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_WEEK, &time));
-  EXPECT_EQ(time, 1524434400000L /* Mon Apr 23 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"*-CH",
-      /*granularity=*/GRANULARITY_WEEK, &time));
-  EXPECT_EQ(time, 1524434400000L /* Mon Apr 23 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-US",
-      /*granularity=*/GRANULARITY_WEEK, &time));
-  EXPECT_EQ(time, 1524348000000L /* Sun Apr 22 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"*-US",
-      /*granularity=*/GRANULARITY_WEEK, &time));
-  EXPECT_EQ(time, 1524348000000L /* Sun Apr 22 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_DAY, &time));
-  EXPECT_EQ(time, 1524607200000L /* Apr 25 2018 00:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_HOUR, &time));
-  EXPECT_EQ(time, 1524639600000L /* Apr 25 2018 09:00:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_MINUTE, &time));
-  EXPECT_EQ(time, 1524641580000 /* Apr 25 2018 09:33:00 */);
-
-  ASSERT_TRUE(calendar.InterpretParseData(
-      data,
-      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
-      /*reference_locale=*/"en-CH",
-      /*granularity=*/GRANULARITY_SECOND, &time));
-  EXPECT_EQ(time, 1524641639000 /* Apr 25 2018 09:33:59 */);
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_DUMMY
-
-}  // namespace
-}  // namespace libtextclassifier2

diff --git a/util/gtl/map_util.h b/util/gtl/map_util.h
deleted file mode 100644
index bd020f8..0000000
--- a/util/gtl/map_util.h
+++ /dev/null

@@ -1,65 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_GTL_MAP_UTIL_H_
-#define LIBTEXTCLASSIFIER_UTIL_GTL_MAP_UTIL_H_
-
-namespace libtextclassifier2 {
-
-// Returns a const reference to the value associated with the given key if it
-// exists, otherwise returns a const reference to the provided default value.
-//
-// WARNING: If a temporary object is passed as the default "value,"
-// this function will return a reference to that temporary object,
-// which will be destroyed at the end of the statement. A common
-// example: if you have a map with string values, and you pass a char*
-// as the default "value," either use the returned value immediately
-// or store it in a string (not string&).
-template <class Collection>
-const typename Collection::value_type::second_type& FindWithDefault(
-    const Collection& collection,
-    const typename Collection::value_type::first_type& key,
-    const typename Collection::value_type::second_type& value) {
-  typename Collection::const_iterator it = collection.find(key);
-  if (it == collection.end()) {
-    return value;
-  }
-  return it->second;
-}
-
-// Inserts the given key and value into the given collection if and only if the
-// given key did NOT already exist in the collection. If the key previously
-// existed in the collection, the value is not changed. Returns true if the
-// key-value pair was inserted; returns false if the key was already present.
-template <class Collection>
-bool InsertIfNotPresent(Collection* const collection,
-                        const typename Collection::value_type& vt) {
-  return collection->insert(vt).second;
-}
-
-// Same as above except the key and value are passed separately.
-template <class Collection>
-bool InsertIfNotPresent(
-    Collection* const collection,
-    const typename Collection::value_type::first_type& key,
-    const typename Collection::value_type::second_type& value) {
-  return InsertIfNotPresent(collection,
-                            typename Collection::value_type(key, value));
-}
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_GTL_MAP_UTIL_H_

diff --git a/util/gtl/stl_util.h b/util/gtl/stl_util.h
deleted file mode 100644
index 7b88e05..0000000
--- a/util/gtl/stl_util.h
+++ /dev/null

@@ -1,55 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_GTL_STL_UTIL_H_
-#define LIBTEXTCLASSIFIER_UTIL_GTL_STL_UTIL_H_
-
-namespace libtextclassifier2 {
-
-// Deletes all the elements in an STL container and clears the container. This
-// function is suitable for use with a vector, set, hash_set, or any other STL
-// container which defines sensible begin(), end(), and clear() methods.
-// If container is NULL, this function is a no-op.
-template <typename T>
-void STLDeleteElements(T *container) {
-  if (!container) return;
-  auto it = container->begin();
-  while (it != container->end()) {
-    auto temp = it;
-    ++it;
-    delete *temp;
-  }
-  container->clear();
-}
-
-// Given an STL container consisting of (key, value) pairs, STLDeleteValues
-// deletes all the "value" components and clears the container. Does nothing in
-// the case it's given a nullptr.
-template <typename T>
-void STLDeleteValues(T *container) {
-  if (!container) return;
-  auto it = container->begin();
-  while (it != container->end()) {
-    auto temp = it;
-    ++it;
-    delete temp->second;
-  }
-  container->clear();
-}
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_GTL_STL_UTIL_H_

diff --git a/util/hash/hash.cc b/util/hash/hash.cc
index 9722ddc..eaa85ae 100644
--- a/util/hash/hash.cc
+++ b/util/hash/hash.cc

@@ -16,7 +16,7 @@
 
 #include "util/hash/hash.h"
 
-#include "util/base/macros.h"
+#include "utils/base/macros.h"
 
 namespace libtextclassifier2 {
 
@@ -59,10 +59,10 @@
   switch (n) {
     case 3:
       h ^= ByteAs32(data[2]) << 16;
-      TC_FALLTHROUGH_INTENDED;
+      TC3_FALLTHROUGH_INTENDED;
     case 2:
       h ^= ByteAs32(data[1]) << 8;
-      TC_FALLTHROUGH_INTENDED;
+      TC3_FALLTHROUGH_INTENDED;
     case 1:
       h ^= ByteAs32(data[0]);
       h *= m;

diff --git a/util/hash/hash.h b/util/hash/hash.h
index b7a3b53..9353e5f 100644
--- a/util/hash/hash.h
+++ b/util/hash/hash.h

@@ -19,10 +19,12 @@
 
 #include <string>
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
 namespace libtextclassifier2 {
 
+using namespace libtextclassifier3;
+
 uint32 Hash32(const char *data, size_t n, uint32 seed);
 
 static inline uint32 Hash32WithDefaultSeed(const char *data, size_t n) {

diff --git a/util/java/string_utils.h b/util/java/string_utils.h
deleted file mode 100644
index 6a85856..0000000
--- a/util/java/string_utils.h
+++ /dev/null

@@ -1,29 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_JAVA_STRING_UTILS_H_
-#define LIBTEXTCLASSIFIER_UTIL_JAVA_STRING_UTILS_H_
-
-#include <jni.h>
-#include <string>
-
-namespace libtextclassifier2 {
-
-bool JStringToUtf8String(JNIEnv* env, const jstring& jstr, std::string* result);
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_JAVA_STRING_UTILS_H_

diff --git a/util/strings/stringpiece.h b/util/strings/stringpiece.h
deleted file mode 100644
index cd07848..0000000
--- a/util/strings/stringpiece.h
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_STRINGS_STRINGPIECE_H_
-#define LIBTEXTCLASSIFIER_UTIL_STRINGS_STRINGPIECE_H_
-
-#include <stddef.h>
-
-#include <string>
-
-namespace libtextclassifier2 {
-
-// Read-only "view" of a piece of data.  Does not own the underlying data.
-class StringPiece {
- public:
-  StringPiece() : StringPiece(nullptr, 0) {}
-
-  StringPiece(const char *str)  // NOLINT(runtime/explicit)
-      : start_(str), size_(strlen(str)) {}
-
-  StringPiece(const char *start, size_t size)
-      : start_(start), size_(size) {}
-
-  // Intentionally no "explicit" keyword: in function calls, we want strings to
-  // be converted to StringPiece implicitly.
-  StringPiece(const std::string &s)  // NOLINT(runtime/explicit)
-      : StringPiece(s.data(), s.size()) {}
-
-  StringPiece(const std::string &s, int offset, int len)
-      : StringPiece(s.data() + offset, len) {}
-
-  char operator[](size_t i) const { return start_[i]; }
-
-  // Returns start address of underlying data.
-  const char *data() const { return start_; }
-
-  // Returns number of bytes of underlying data.
-  size_t size() const { return size_; }
-  size_t length() const { return size_; }
-
-  bool empty() const { return size_ == 0; }
-
-  // Returns a std::string containing a copy of the underlying data.
-  std::string ToString() const {
-    return std::string(data(), size());
-  }
-
- private:
-  const char *start_;  // Not owned.
-  size_t size_;
-};
-
-}  // namespace libtextclassifier2
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_STRINGS_STRINGPIECE_H_

diff --git a/util/strings/utf8.cc b/util/strings/utf8.cc
deleted file mode 100644
index 39dcb4e..0000000
--- a/util/strings/utf8.cc
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/strings/utf8.h"
-
-namespace libtextclassifier2 {
-bool IsValidUTF8(const char *src, int size) {
-  for (int i = 0; i < size;) {
-    // Unexpected trail byte.
-    if (IsTrailByte(src[i])) {
-      return false;
-    }
-
-    const int num_codepoint_bytes = GetNumBytesForUTF8Char(&src[i]);
-    if (num_codepoint_bytes <= 0 || i + num_codepoint_bytes > size) {
-      return false;
-    }
-
-    // Check that remaining bytes in the codepoint are trailing bytes.
-    i++;
-    for (int k = 1; k < num_codepoint_bytes; k++, i++) {
-      if (!IsTrailByte(src[i])) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-}  // namespace libtextclassifier2

diff --git a/util/utf8/unilib.h b/util/utf8/unilib.h
deleted file mode 100644
index 29b4575..0000000
--- a/util/utf8/unilib.h
+++ /dev/null

@@ -1,23 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_H_
-#define LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_H_
-
-#include "util/utf8/unilib-icu.h"
-#define CREATE_UNILIB_FOR_TESTING const UniLib unilib;
-
-#endif  // LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_H_

diff --git a/util/utf8/unilib_test.cc b/util/utf8/unilib_test.cc
deleted file mode 100644
index 13b1347..0000000
--- a/util/utf8/unilib_test.cc
+++ /dev/null

@@ -1,232 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/utf8/unilib.h"
-
-#include "util/base/logging.h"
-#include "util/utf8/unicodetext.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-
-namespace libtextclassifier2 {
-namespace {
-
-using ::testing::ElementsAre;
-
-TEST(UniLibTest, CharacterClassesAscii) {
-  CREATE_UNILIB_FOR_TESTING;
-  EXPECT_TRUE(unilib.IsOpeningBracket('('));
-  EXPECT_TRUE(unilib.IsClosingBracket(')'));
-  EXPECT_FALSE(unilib.IsWhitespace(')'));
-  EXPECT_TRUE(unilib.IsWhitespace(' '));
-  EXPECT_FALSE(unilib.IsDigit(')'));
-  EXPECT_TRUE(unilib.IsDigit('0'));
-  EXPECT_TRUE(unilib.IsDigit('9'));
-  EXPECT_FALSE(unilib.IsUpper(')'));
-  EXPECT_TRUE(unilib.IsUpper('A'));
-  EXPECT_TRUE(unilib.IsUpper('Z'));
-  EXPECT_EQ(unilib.ToLower('A'), 'a');
-  EXPECT_EQ(unilib.ToLower('Z'), 'z');
-  EXPECT_EQ(unilib.ToLower(')'), ')');
-  EXPECT_EQ(unilib.GetPairedBracket(')'), '(');
-  EXPECT_EQ(unilib.GetPairedBracket('}'), '{');
-}
-
-#ifndef LIBTEXTCLASSIFIER_UNILIB_DUMMY
-TEST(UniLibTest, CharacterClassesUnicode) {
-  CREATE_UNILIB_FOR_TESTING;
-  EXPECT_TRUE(unilib.IsOpeningBracket(0x0F3C));  // TIBET ANG KHANG GYON
-  EXPECT_TRUE(unilib.IsClosingBracket(0x0F3D));  // TIBET ANG KHANG GYAS
-  EXPECT_FALSE(unilib.IsWhitespace(0x23F0));     // ALARM CLOCK
-  EXPECT_TRUE(unilib.IsWhitespace(0x2003));      // EM SPACE
-  EXPECT_FALSE(unilib.IsDigit(0xA619));          // VAI SYMBOL JONG
-  EXPECT_TRUE(unilib.IsDigit(0xA620));           // VAI DIGIT ZERO
-  EXPECT_TRUE(unilib.IsDigit(0xA629));           // VAI DIGIT NINE
-  EXPECT_FALSE(unilib.IsDigit(0xA62A));          // VAI SYLLABLE NDOLE MA
-  EXPECT_FALSE(unilib.IsUpper(0x0211));          // SMALL R WITH DOUBLE GRAVE
-  EXPECT_TRUE(unilib.IsUpper(0x0212));           // CAPITAL R WITH DOUBLE GRAVE
-  EXPECT_TRUE(unilib.IsUpper(0x0391));           // GREEK CAPITAL ALPHA
-  EXPECT_TRUE(unilib.IsUpper(0x03AB));           // GREEK CAPITAL UPSILON W DIAL
-  EXPECT_FALSE(unilib.IsUpper(0x03AC));          // GREEK SMALL ALPHA WITH TONOS
-  EXPECT_EQ(unilib.ToLower(0x0391), 0x03B1);     // GREEK ALPHA
-  EXPECT_EQ(unilib.ToLower(0x03AB), 0x03CB);     // GREEK UPSILON WITH DIALYTIKA
-  EXPECT_EQ(unilib.ToLower(0x03C0), 0x03C0);     // GREEK SMALL PI
-
-  EXPECT_EQ(unilib.GetPairedBracket(0x0F3C), 0x0F3D);
-  EXPECT_EQ(unilib.GetPairedBracket(0x0F3D), 0x0F3C);
-}
-#endif  // ndef LIBTEXTCLASSIFIER_UNILIB_DUMMY
-
-TEST(UniLibTest, RegexInterface) {
-  CREATE_UNILIB_FOR_TESTING;
-  const UnicodeText regex_pattern =
-      UTF8ToUnicodeText("[0-9]+", /*do_copy=*/true);
-  std::unique_ptr<UniLib::RegexPattern> pattern =
-      unilib.CreateRegexPattern(regex_pattern);
-  const UnicodeText input = UTF8ToUnicodeText("hello 0123", /*do_copy=*/false);
-  int status;
-  std::unique_ptr<UniLib::RegexMatcher> matcher = pattern->Matcher(input);
-  TC_LOG(INFO) << matcher->Matches(&status);
-  TC_LOG(INFO) << matcher->Find(&status);
-  TC_LOG(INFO) << matcher->Start(0, &status);
-  TC_LOG(INFO) << matcher->End(0, &status);
-  TC_LOG(INFO) << matcher->Group(0, &status).size_codepoints();
-}
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(UniLibTest, Regex) {
-  CREATE_UNILIB_FOR_TESTING;
-
-  // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
-  // test the regex functionality with it to verify we are handling the indices
-  // correctly.
-  const UnicodeText regex_pattern =
-      UTF8ToUnicodeText("[0-9]+😋", /*do_copy=*/false);
-  std::unique_ptr<UniLib::RegexPattern> pattern =
-      unilib.CreateRegexPattern(regex_pattern);
-  int status;
-  std::unique_ptr<UniLib::RegexMatcher> matcher;
-
-  matcher = pattern->Matcher(UTF8ToUnicodeText("0123😋", /*do_copy=*/false));
-  EXPECT_TRUE(matcher->Matches(&status));
-  EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_TRUE(matcher->Matches(&status));  // Check that the state is reset.
-  EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-
-  matcher = pattern->Matcher(
-      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
-  EXPECT_FALSE(matcher->Matches(&status));
-  EXPECT_FALSE(matcher->ApproximatelyMatches(&status));
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-
-  matcher = pattern->Matcher(
-      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
-  EXPECT_TRUE(matcher->Find(&status));
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Start(0, &status), 8);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->End(0, &status), 13);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123😋");
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(UniLibTest, RegexGroups) {
-  CREATE_UNILIB_FOR_TESTING;
-
-  // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
-  // test the regex functionality with it to verify we are handling the indices
-  // correctly.
-  const UnicodeText regex_pattern = UTF8ToUnicodeText(
-      "(?<group1>[0-9])(?<group2>[0-9]+)😋", /*do_copy=*/false);
-  std::unique_ptr<UniLib::RegexPattern> pattern =
-      unilib.CreateRegexPattern(regex_pattern);
-  int status;
-  std::unique_ptr<UniLib::RegexMatcher> matcher;
-
-  matcher = pattern->Matcher(
-      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
-  EXPECT_TRUE(matcher->Find(&status));
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Start(0, &status), 8);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Start(1, &status), 8);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Start(2, &status), 9);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->End(0, &status), 13);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->End(1, &status), 9);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->End(2, &status), 12);
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123😋");
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Group(1, &status).ToUTF8String(), "0");
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-  EXPECT_EQ(matcher->Group(2, &status).ToUTF8String(), "123");
-  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-
-TEST(UniLibTest, BreakIterator) {
-  CREATE_UNILIB_FOR_TESTING;
-  const UnicodeText text = UTF8ToUnicodeText("some text", /*do_copy=*/false);
-  std::unique_ptr<UniLib::BreakIterator> iterator =
-      unilib.CreateBreakIterator(text);
-  std::vector<int> break_indices;
-  int break_index = 0;
-  while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
-    break_indices.push_back(break_index);
-  }
-  EXPECT_THAT(break_indices, ElementsAre(4, 5, 9));
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(UniLibTest, BreakIterator4ByteUTF8) {
-  CREATE_UNILIB_FOR_TESTING;
-  const UnicodeText text = UTF8ToUnicodeText("😀😂😋", /*do_copy=*/false);
-  std::unique_ptr<UniLib::BreakIterator> iterator =
-      unilib.CreateBreakIterator(text);
-  std::vector<int> break_indices;
-  int break_index = 0;
-  while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
-    break_indices.push_back(break_index);
-  }
-  EXPECT_THAT(break_indices, ElementsAre(1, 2, 3));
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-#ifndef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-TEST(UniLibTest, IntegerParse) {
-  CREATE_UNILIB_FOR_TESTING;
-  int result;
-  EXPECT_TRUE(
-      unilib.ParseInt32(UTF8ToUnicodeText("123", /*do_copy=*/false), &result));
-  EXPECT_EQ(result, 123);
-}
-#endif  // ndef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(UniLibTest, IntegerParseFullWidth) {
-  CREATE_UNILIB_FOR_TESTING;
-  int result;
-  // The input string here is full width
-  EXPECT_TRUE(unilib.ParseInt32(UTF8ToUnicodeText("１２３", /*do_copy=*/false),
-                                &result));
-  EXPECT_EQ(result, 123);
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
-TEST(UniLibTest, IntegerParseFullWidthWithAlpha) {
-  CREATE_UNILIB_FOR_TESTING;
-  int result;
-  // The input string here is full width
-  EXPECT_FALSE(unilib.ParseInt32(UTF8ToUnicodeText("１a３", /*do_copy=*/false),
-                                 &result));
-}
-#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
-
-}  // namespace
-}  // namespace libtextclassifier2

diff --git a/util/base/casts.h b/utils/base/casts.h
similarity index 92%
rename from util/base/casts.h
rename to utils/base/casts.h
index a1d2056..175f56b 100644
--- a/util/base/casts.h
+++ b/utils/base/casts.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_CASTS_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_CASTS_H_
 
 #include <string.h>  // for memcpy
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // bit_cast<Dest, Source> is a template function that implements the equivalent
 // of "*reinterpret_cast<Dest*>(&source)".  We need this in very low-level
@@ -87,6 +87,6 @@
   return dest;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_CASTS_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_CASTS_H_

diff --git a/util/base/config.h b/utils/base/config.h
similarity index 84%
rename from util/base/config.h
rename to utils/base/config.h
index 8844b14..c476f13 100644
--- a/util/base/config.h
+++ b/utils/base/config.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 
 // Define macros to indicate C++ standard / platform / etc we use.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_CONFIG_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_CONFIG_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_CONFIG_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_CONFIG_H_
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Define LANG_CXX11 to 1 if current compiler supports C++11.
 //
@@ -38,6 +38,6 @@
 #define LANG_CXX11 1
 #endif
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_CONFIG_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_CONFIG_H_

diff --git a/util/base/endian.h b/utils/base/endian.h
similarity index 92%
rename from util/base/endian.h
rename to utils/base/endian.h
index 2dfbfd6..9312704 100644
--- a/util/base/endian.h
+++ b/utils/base/endian.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_ENDIAN_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_ENDIAN_H_
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 #if defined OS_LINUX || defined OS_CYGWIN || defined OS_ANDROID || \
     defined(__ANDROID__)
@@ -133,6 +133,6 @@
 #endif /* ENDIAN */
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_ENDIAN_H_

diff --git a/util/base/integral_types.h b/utils/base/integral_types.h
similarity index 84%
rename from util/base/integral_types.h
rename to utils/base/integral_types.h
index f82c9cd..e3253de 100644
--- a/util/base/integral_types.h
+++ b/utils/base/integral_types.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,12 +16,12 @@
 
 // Basic integer type definitions.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_INTEGRAL_TYPES_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_INTEGRAL_TYPES_H_
 
-#include "util/base/config.h"
+#include "utils/base/config.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 typedef unsigned int uint32;
 typedef unsigned long long uint64;
@@ -56,6 +56,6 @@
 static_assert(sizeof(int64) == 8, "wrong size");
 #endif  // LANG_CXX11
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_INTEGRAL_TYPES_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_INTEGRAL_TYPES_H_

diff --git a/util/base/logging.cc b/utils/base/logging.cc
similarity index 90%
rename from util/base/logging.cc
rename to utils/base/logging.cc
index 919bb36..2ff09d7 100644
--- a/util/base/logging.cc
+++ b/utils/base/logging.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
 #include <stdlib.h>
 
 #include <iostream>
 
-#include "util/base/logging_raw.h"
+#include "utils/base/logging_raw.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace logging {
 
 namespace {
@@ -64,4 +64,4 @@
 }
 
 }  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/utils/base/logging.h b/utils/base/logging.h
new file mode 100644
index 0000000..e8bde39
--- /dev/null
+++ b/utils/base/logging.h

@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_H_
+
+#include <cassert>
+#include <string>
+
+#include "utils/base/logging_levels.h"
+#include "utils/base/port.h"
+
+
+namespace libtextclassifier3 {
+namespace logging {
+
+// A tiny code footprint string stream for assembling log messages.
+struct LoggingStringStream {
+  LoggingStringStream() {}
+  LoggingStringStream &stream() { return *this; }
+  // Needed for invocation in TC3_CHECK macro.
+  explicit operator bool() const { return true; }
+
+  std::string message;
+};
+
+template <typename T>
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const T &entry) {
+  stream.message.append(std::to_string(entry));
+  return stream;
+}
+
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const char *message) {
+  stream.message.append(message);
+  return stream;
+}
+
+#if defined(HAS_GLOBAL_STRING)
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const ::string &message) {
+  stream.message.append(message);
+  return stream;
+}
+#endif
+
+inline LoggingStringStream &operator<<(LoggingStringStream &stream,
+                                       const std::string &message) {
+  stream.message.append(message);
+  return stream;
+}
+
+// The class that does all the work behind our TC3_LOG(severity) macros.  Each
+// TC3_LOG(severity) << obj1 << obj2 << ...; logging statement creates a
+// LogMessage temporary object containing a stringstream.  Each operator<< adds
+// info to that stringstream and the LogMessage destructor performs the actual
+// logging.  The reason this works is that in C++, "all temporary objects are
+// destroyed as the last step in evaluating the full-expression that (lexically)
+// contains the point where they were created."  For more info, see
+// http://en.cppreference.com/w/cpp/language/lifetime.  Hence, the destructor is
+// invoked after the last << from that logging statement.
+class LogMessage {
+ public:
+  LogMessage(LogSeverity severity, const char *file_name,
+             int line_number) TC3_ATTRIBUTE_NOINLINE;
+
+  ~LogMessage() TC3_ATTRIBUTE_NOINLINE;
+
+  // Returns the stream associated with the logger object.
+  LoggingStringStream &stream() { return stream_; }
+
+ private:
+  const LogSeverity severity_;
+
+  // Stream that "prints" all info into a string (not to a file).  We construct
+  // here the entire logging message and next print it in one operation.
+  LoggingStringStream stream_;
+};
+
+// Pseudo-stream that "eats" the tokens <<-pumped into it, without printing
+// anything.
+class NullStream {
+ public:
+  NullStream() {}
+  NullStream &stream() { return *this; }
+};
+template <typename T>
+inline NullStream &operator<<(NullStream &str, const T &) {
+  return str;
+}
+
+}  // namespace logging
+}  // namespace libtextclassifier3
+
+#define TC3_LOG(severity)                                          \
+  ::libtextclassifier3::logging::LogMessage(                       \
+      ::libtextclassifier3::logging::severity, __FILE__, __LINE__) \
+      .stream()
+
+// If condition x is true, does nothing.  Otherwise, crashes the program (liek
+// LOG(FATAL)) with an informative message.  Can be continued with extra
+// messages, via <<, like any logging macro, e.g.,
+//
+// TC3_CHECK(my_cond) << "I think we hit a problem";
+#define TC3_CHECK(x)                                                           \
+  (x) || TC3_LOG(FATAL) << __FILE__ << ":" << __LINE__ << ": check failed: \"" \
+                        << #x
+
+#define TC3_CHECK_EQ(x, y) TC3_CHECK((x) == (y))
+#define TC3_CHECK_LT(x, y) TC3_CHECK((x) < (y))
+#define TC3_CHECK_GT(x, y) TC3_CHECK((x) > (y))
+#define TC3_CHECK_LE(x, y) TC3_CHECK((x) <= (y))
+#define TC3_CHECK_GE(x, y) TC3_CHECK((x) >= (y))
+#define TC3_CHECK_NE(x, y) TC3_CHECK((x) != (y))
+
+#define TC3_NULLSTREAM ::libtextclassifier3::logging::NullStream().stream()
+
+// Debug checks: a TC3_DCHECK<suffix> macro should behave like TC3_CHECK<suffix>
+// in debug mode an don't check / don't print anything in non-debug mode.
+#ifdef NDEBUG
+
+#define TC3_DCHECK(x) TC3_NULLSTREAM
+#define TC3_DCHECK_EQ(x, y) TC3_NULLSTREAM
+#define TC3_DCHECK_LT(x, y) TC3_NULLSTREAM
+#define TC3_DCHECK_GT(x, y) TC3_NULLSTREAM
+#define TC3_DCHECK_LE(x, y) TC3_NULLSTREAM
+#define TC3_DCHECK_GE(x, y) TC3_NULLSTREAM
+#define TC3_DCHECK_NE(x, y) TC3_NULLSTREAM
+
+#else  // NDEBUG
+
+// In debug mode, each TC3_DCHECK<suffix> is equivalent to TC3_CHECK<suffix>,
+// i.e., a real check that crashes when the condition is not true.
+#define TC3_DCHECK(x) TC3_CHECK(x)
+#define TC3_DCHECK_EQ(x, y) TC3_CHECK_EQ(x, y)
+#define TC3_DCHECK_LT(x, y) TC3_CHECK_LT(x, y)
+#define TC3_DCHECK_GT(x, y) TC3_CHECK_GT(x, y)
+#define TC3_DCHECK_LE(x, y) TC3_CHECK_LE(x, y)
+#define TC3_DCHECK_GE(x, y) TC3_CHECK_GE(x, y)
+#define TC3_DCHECK_NE(x, y) TC3_CHECK_NE(x, y)
+
+#endif  // NDEBUG
+
+#ifdef TC3_VLOG
+#define TC3_VLOG(severity)                                     \
+  ::libtextclassifier3::logging::LogMessage(                   \
+      ::libtextclassifier3::logging::INFO, __FILE__, __LINE__) \
+      .stream()
+#else
+#define TC3_VLOG(severity) TC3_NULLSTREAM
+#endif
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_H_

diff --git a/util/base/logging_levels.h b/utils/base/logging_levels.h
similarity index 70%
rename from util/base/logging_levels.h
rename to utils/base/logging_levels.h
index 17c882f..dfcb267 100644
--- a/util/base/logging_levels.h
+++ b/utils/base/logging_levels.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_LEVELS_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_LEVELS_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_LEVELS_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_LEVELS_H_
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace logging {
 
 enum LogSeverity {
@@ -28,6 +28,6 @@
 };
 
 }  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_LEVELS_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_LEVELS_H_

diff --git a/util/base/logging_raw.cc b/utils/base/logging_raw.cc
similarity index 90%
rename from util/base/logging_raw.cc
rename to utils/base/logging_raw.cc
index 6d97852..ccaef22 100644
--- a/util/base/logging_raw.cc
+++ b/utils/base/logging_raw.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/base/logging_raw.h"
+#include "utils/base/logging_raw.h"
 
 #include <stdio.h>
 #include <string>
@@ -26,7 +26,7 @@
 // Compiled as part of Android.
 #include <android/log.h>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace logging {
 
 namespace {
@@ -50,7 +50,7 @@
 void LowLevelLogging(LogSeverity severity, const std::string& tag,
                      const std::string& message) {
   const int android_log_level = GetAndroidLogLevel(severity);
-#if !defined(TC_DEBUG_LOGGING)
+#if !defined(TC3_DEBUG_LOGGING)
   if (android_log_level != ANDROID_LOG_ERROR &&
       android_log_level != ANDROID_LOG_FATAL) {
     return;
@@ -60,12 +60,12 @@
 }
 
 }  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
 #else  // if defined(__ANDROID__)
 
 // Not on Android: implement LowLevelLogging to print to stderr (see below).
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace logging {
 
 namespace {
@@ -94,6 +94,6 @@
 }
 
 }  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
 #endif  // if defined(__ANDROID__)

diff --git a/util/base/logging_raw.h b/utils/base/logging_raw.h
similarity index 75%
rename from util/base/logging_raw.h
rename to utils/base/logging_raw.h
index e6265c7..be285ad 100644
--- a/util/base/logging_raw.h
+++ b/utils/base/logging_raw.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_RAW_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_RAW_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_RAW_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_RAW_H_
 
 #include <string>
 
-#include "util/base/logging_levels.h"
+#include "utils/base/logging_levels.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace logging {
 
 // Low-level logging primitive.  Logs a message, with the indicated log
@@ -31,6 +31,6 @@
                      const std::string &message);
 
 }  // namespace logging
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_LOGGING_RAW_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_LOGGING_RAW_H_

diff --git a/util/base/macros.h b/utils/base/macros.h
similarity index 62%
rename from util/base/macros.h
rename to utils/base/macros.h
index a021ab9..6739c0b 100644
--- a/util/base/macros.h
+++ b/utils/base/macros.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,28 +14,28 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_MACROS_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_MACROS_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_MACROS_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_MACROS_H_
 
-#include "util/base/config.h"
+#include "utils/base/config.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 #if LANG_CXX11
-#define TC_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName &) = delete;        \
+#define TC3_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName &) = delete;         \
   TypeName &operator=(const TypeName &) = delete
 #else  // C++98 case follows
 
 // Note that these C++98 implementations cannot completely disallow copying,
 // as members and friends can still accidentally make elided copies without
 // triggering a linker error.
-#define TC_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-  TypeName(const TypeName &);                 \
+#define TC3_DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName &);                  \
   TypeName &operator=(const TypeName &)
 #endif  // LANG_CXX11
 
-// The TC_FALLTHROUGH_INTENDED macro can be used to annotate implicit
+// The TC3_FALLTHROUGH_INTENDED macro can be used to annotate implicit
 // fall-through between switch labels:
 //
 //  switch (x) {
@@ -43,7 +43,7 @@
 //    case 41:
 //      if (truth_is_out_there) {
 //        ++x;
-//        TC_FALLTHROUGH_INTENDED;  // Use instead of/along with annotations in
+//        TC3_FALLTHROUGH_INTENDED;  // Use instead of/along with annotations in
 //                                  // comments.
 //      } else {
 //        return x;
@@ -51,35 +51,37 @@
 //    case 42:
 //      ...
 //
-//  As shown in the example above, the TC_FALLTHROUGH_INTENDED macro should be
+//  As shown in the example above, the TC3_FALLTHROUGH_INTENDED macro should be
 //  followed by a semicolon. It is designed to mimic control-flow statements
 //  like 'break;', so it can be placed in most places where 'break;' can, but
 //  only if there are no statements on the execution path between it and the
 //  next switch label.
 //
-//  When compiled with clang in C++11 mode, the TC_FALLTHROUGH_INTENDED macro is
-//  expanded to [[clang::fallthrough]] attribute, which is analysed when
+//  When compiled with clang in C++11 mode, the TC3_FALLTHROUGH_INTENDED macro
+//  is expanded to [[clang::fallthrough]] attribute, which is analysed when
 //  performing switch labels fall-through diagnostic ('-Wimplicit-fallthrough').
 //  See clang documentation on language extensions for details:
 //  http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
 //
-//  When used with unsupported compilers, the TC_FALLTHROUGH_INTENDED macro has
+//  When used with unsupported compilers, the TC3_FALLTHROUGH_INTENDED macro has
 //  no effect on diagnostics.
 //
 //  In either case this macro has no effect on runtime behavior and performance
 //  of code.
 #if defined(__clang__) && defined(__has_warning)
 #if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")
-#define TC_FALLTHROUGH_INTENDED [[clang::fallthrough]]
+#define TC3_FALLTHROUGH_INTENDED [[clang::fallthrough]]
 #endif
 #elif defined(__GNUC__) && __GNUC__ >= 7
-#define TC_FALLTHROUGH_INTENDED [[gnu::fallthrough]]
+#define TC3_FALLTHROUGH_INTENDED [[gnu::fallthrough]]
 #endif
 
-#ifndef TC_FALLTHROUGH_INTENDED
-#define TC_FALLTHROUGH_INTENDED do { } while (0)
+#ifndef TC3_FALLTHROUGH_INTENDED
+#define TC3_FALLTHROUGH_INTENDED \
+  do {                           \
+  } while (0)
 #endif
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_MACROS_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_MACROS_H_

diff --git a/util/base/port.h b/utils/base/port.h
similarity index 66%
rename from util/base/port.h
rename to utils/base/port.h
index 90a2bce..24344a0 100644
--- a/util/base/port.h
+++ b/utils/base/port.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,30 +16,30 @@
 
 // Various portability macros, type definitions, and inline functions.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_PORT_H_
-#define LIBTEXTCLASSIFIER_UTIL_BASE_PORT_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_PORT_H_
+#define LIBTEXTCLASSIFIER_UTILS_BASE_PORT_H_
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 #if defined(__GNUC__) && \
     (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
 
 // For functions we want to force inline.
 // Introduced in gcc 3.1.
-#define TC_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#define TC3_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
 
 // For functions we don't want to inline, e.g., to keep code size small.
-#define TC_ATTRIBUTE_NOINLINE __attribute__((noinline))
+#define TC3_ATTRIBUTE_NOINLINE __attribute__((noinline))
 
 #elif defined(_MSC_VER)
-#define TC_ATTRIBUTE_ALWAYS_INLINE __forceinline
+#define TC3_ATTRIBUTE_ALWAYS_INLINE __forceinline
 #else
 
 // Other compilers will have to figure it out for themselves.
-#define TC_ATTRIBUTE_ALWAYS_INLINE
-#define TC_ATTRIBUTE_NOINLINE
+#define TC3_ATTRIBUTE_ALWAYS_INLINE
+#define TC3_ATTRIBUTE_NOINLINE
 #endif
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_PORT_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_PORT_H_

diff --git a/utils/calendar/calendar-common.h b/utils/calendar/calendar-common.h
new file mode 100644
index 0000000..7e606de
--- /dev/null
+++ b/utils/calendar/calendar-common.h

@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_COMMON_H_
+#define LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_COMMON_H_
+
+#include "annotator/types.h"
+#include "utils/base/integral_types.h"
+#include "utils/base/logging.h"
+#include "utils/base/macros.h"
+
+namespace libtextclassifier3 {
+namespace calendar {
+
+// Macro to reduce the amount of boilerplate needed for propagating errors.
+#define TC3_CALENDAR_CHECK(EXPR) \
+  if (!(EXPR)) {                 \
+    return false;                \
+  }
+
+// An implementation of CalendarLib that is independent of the particular
+// calendar implementation used (implementation type is passed as template
+// argument).
+template <class TCalendar>
+class CalendarLibTempl {
+ public:
+  bool InterpretParseData(const DateParseData& parse_data,
+                          int64 reference_time_ms_utc,
+                          const std::string& reference_timezone,
+                          const std::string& reference_locale,
+                          DatetimeGranularity granularity,
+                          TCalendar* calendar) const;
+
+ private:
+  // Adjusts the calendar's time instant according to a relative date reference
+  // in the parsed data.
+  bool ApplyRelationField(const DateParseData& parse_data,
+                          TCalendar* calendar) const;
+
+  // Round the time instant's precision down to the given granularity.
+  bool RoundToGranularity(DatetimeGranularity granularity,
+                          TCalendar* calendar) const;
+
+  // Adjusts time in steps of relation_type, by distance steps.
+  // For example:
+  // - Adjusting by -2 MONTHS will return the beginning of the 1st
+  //   two weeks ago.
+  // - Adjusting by +4 Wednesdays will return the beginning of the next
+  //   Wednesday at least 4 weeks from now.
+  // If allow_today is true, the same day of the week may be kept
+  // if it already matches the relation type.
+  bool AdjustByRelation(DateParseData::RelationType relation_type, int distance,
+                        bool allow_today, TCalendar* calendar) const;
+};
+
+template <class TCalendar>
+bool CalendarLibTempl<TCalendar>::InterpretParseData(
+    const DateParseData& parse_data, int64 reference_time_ms_utc,
+    const std::string& reference_timezone, const std::string& reference_locale,
+    DatetimeGranularity granularity, TCalendar* calendar) const {
+  TC3_CALENDAR_CHECK(calendar->Initialize(reference_timezone, reference_locale,
+                                          reference_time_ms_utc))
+
+  // By default, the parsed time is interpreted to be on the reference day.
+  // But a parsed date should have time 0:00:00 unless specified.
+  TC3_CALENDAR_CHECK(calendar->SetHourOfDay(0))
+  TC3_CALENDAR_CHECK(calendar->SetMinute(0))
+  TC3_CALENDAR_CHECK(calendar->SetSecond(0))
+  TC3_CALENDAR_CHECK(calendar->SetMillisecond(0))
+
+  // Apply each of the parsed fields in order of increasing granularity.
+  static const int64 kMillisInHour = 1000 * 60 * 60;
+  if (parse_data.field_set_mask & DateParseData::Fields::ZONE_OFFSET_FIELD) {
+    TC3_CALENDAR_CHECK(
+        calendar->SetZoneOffset(parse_data.zone_offset * kMillisInHour))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::DST_OFFSET_FIELD) {
+    TC3_CALENDAR_CHECK(
+        calendar->SetDstOffset(parse_data.dst_offset * kMillisInHour))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::RELATION_FIELD) {
+    TC3_CALENDAR_CHECK(ApplyRelationField(parse_data, calendar));
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::YEAR_FIELD) {
+    TC3_CALENDAR_CHECK(calendar->SetYear(parse_data.year))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::MONTH_FIELD) {
+    // ICU has months starting at 0, Java and Datetime parser at 1, so we
+    // need to subtract 1.
+    TC3_CALENDAR_CHECK(calendar->SetMonth(parse_data.month - 1))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::DAY_FIELD) {
+    TC3_CALENDAR_CHECK(calendar->SetDayOfMonth(parse_data.day_of_month))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::HOUR_FIELD) {
+    if (parse_data.field_set_mask & DateParseData::Fields::AMPM_FIELD &&
+        parse_data.ampm == DateParseData::AMPM::PM && parse_data.hour < 12) {
+      TC3_CALENDAR_CHECK(calendar->SetHourOfDay(parse_data.hour + 12))
+    } else {
+      TC3_CALENDAR_CHECK(calendar->SetHourOfDay(parse_data.hour))
+    }
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::MINUTE_FIELD) {
+    TC3_CALENDAR_CHECK(calendar->SetMinute(parse_data.minute))
+  }
+  if (parse_data.field_set_mask & DateParseData::Fields::SECOND_FIELD) {
+    TC3_CALENDAR_CHECK(calendar->SetSecond(parse_data.second))
+  }
+
+  TC3_CALENDAR_CHECK(RoundToGranularity(granularity, calendar))
+  return true;
+}
+
+template <class TCalendar>
+bool CalendarLibTempl<TCalendar>::ApplyRelationField(
+    const DateParseData& parse_data, TCalendar* calendar) const {
+  constexpr int relation_type_mask = DateParseData::Fields::RELATION_TYPE_FIELD;
+  constexpr int relation_distance_mask =
+      DateParseData::Fields::RELATION_DISTANCE_FIELD;
+  switch (parse_data.relation) {
+    case DateParseData::Relation::NEXT:
+      if (parse_data.field_set_mask & relation_type_mask) {
+        TC3_CALENDAR_CHECK(AdjustByRelation(parse_data.relation_type,
+                                            /*distance=*/1,
+                                            /*allow_today=*/false, calendar));
+      }
+      return true;
+    case DateParseData::Relation::NEXT_OR_SAME:
+      if (parse_data.field_set_mask & relation_type_mask) {
+        TC3_CALENDAR_CHECK(AdjustByRelation(parse_data.relation_type,
+                                            /*distance=*/1,
+                                            /*allow_today=*/true, calendar))
+      }
+      return true;
+    case DateParseData::Relation::LAST:
+      if (parse_data.field_set_mask & relation_type_mask) {
+        TC3_CALENDAR_CHECK(AdjustByRelation(parse_data.relation_type,
+                                            /*distance=*/-1,
+                                            /*allow_today=*/false, calendar))
+      }
+      return true;
+    case DateParseData::Relation::NOW:
+      return true;  // NOOP
+    case DateParseData::Relation::TOMORROW:
+      TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(1));
+      return true;
+    case DateParseData::Relation::YESTERDAY:
+      TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(-1));
+      return true;
+    case DateParseData::Relation::PAST:
+      if ((parse_data.field_set_mask & relation_type_mask) &&
+          (parse_data.field_set_mask & relation_distance_mask)) {
+        TC3_CALENDAR_CHECK(AdjustByRelation(parse_data.relation_type,
+                                            -parse_data.relation_distance,
+                                            /*allow_today=*/false, calendar))
+      }
+      return true;
+    case DateParseData::Relation::FUTURE:
+      if ((parse_data.field_set_mask & relation_type_mask) &&
+          (parse_data.field_set_mask & relation_distance_mask)) {
+        TC3_CALENDAR_CHECK(AdjustByRelation(parse_data.relation_type,
+                                            parse_data.relation_distance,
+                                            /*allow_today=*/false, calendar))
+      }
+      return true;
+  }
+  return false;
+}
+
+template <class TCalendar>
+bool CalendarLibTempl<TCalendar>::RoundToGranularity(
+    DatetimeGranularity granularity, TCalendar* calendar) const {
+  // Force recomputation before doing the rounding.
+  int unused;
+  TC3_CALENDAR_CHECK(calendar->GetDayOfWeek(&unused));
+
+  switch (granularity) {
+    case GRANULARITY_YEAR:
+      TC3_CALENDAR_CHECK(calendar->SetMonth(0));
+      TC3_FALLTHROUGH_INTENDED;
+    case GRANULARITY_MONTH:
+      TC3_CALENDAR_CHECK(calendar->SetDayOfMonth(1));
+      TC3_FALLTHROUGH_INTENDED;
+    case GRANULARITY_DAY:
+      TC3_CALENDAR_CHECK(calendar->SetHourOfDay(0));
+      TC3_FALLTHROUGH_INTENDED;
+    case GRANULARITY_HOUR:
+      TC3_CALENDAR_CHECK(calendar->SetMinute(0));
+      TC3_FALLTHROUGH_INTENDED;
+    case GRANULARITY_MINUTE:
+      TC3_CALENDAR_CHECK(calendar->SetSecond(0));
+      break;
+
+    case GRANULARITY_WEEK:
+      int first_day_of_week;
+      TC3_CALENDAR_CHECK(calendar->GetFirstDayOfWeek(&first_day_of_week));
+      TC3_CALENDAR_CHECK(calendar->SetDayOfWeek(first_day_of_week));
+      TC3_CALENDAR_CHECK(calendar->SetHourOfDay(0));
+      TC3_CALENDAR_CHECK(calendar->SetMinute(0));
+      TC3_CALENDAR_CHECK(calendar->SetSecond(0));
+      break;
+
+    case GRANULARITY_UNKNOWN:
+    case GRANULARITY_SECOND:
+      break;
+  }
+  return true;
+}
+
+template <class TCalendar>
+bool CalendarLibTempl<TCalendar>::AdjustByRelation(
+    DateParseData::RelationType relation_type, int distance, bool allow_today,
+    TCalendar* calendar) const {
+  const int distance_sign = distance < 0 ? -1 : 1;
+  switch (relation_type) {
+    case DateParseData::MONDAY:
+    case DateParseData::TUESDAY:
+    case DateParseData::WEDNESDAY:
+    case DateParseData::THURSDAY:
+    case DateParseData::FRIDAY:
+    case DateParseData::SATURDAY:
+    case DateParseData::SUNDAY:
+      if (!allow_today) {
+        // If we're not including the same day as the reference, skip it.
+        TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(distance_sign))
+      }
+      // Keep walking back until we hit the desired day of the week.
+      while (distance != 0) {
+        int day_of_week;
+        TC3_CALENDAR_CHECK(calendar->GetDayOfWeek(&day_of_week))
+        if (day_of_week == relation_type) {
+          distance += -distance_sign;
+          if (distance == 0) break;
+        }
+        TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(distance_sign))
+      }
+      return true;
+    case DateParseData::DAY:
+      TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(distance));
+      return true;
+    case DateParseData::WEEK:
+      TC3_CALENDAR_CHECK(calendar->AddDayOfMonth(7 * distance))
+      TC3_CALENDAR_CHECK(calendar->SetDayOfWeek(1))
+      return true;
+    case DateParseData::MONTH:
+      TC3_CALENDAR_CHECK(calendar->AddMonth(distance))
+      TC3_CALENDAR_CHECK(calendar->SetDayOfMonth(1))
+      return true;
+    case DateParseData::YEAR:
+      TC3_CALENDAR_CHECK(calendar->AddYear(distance))
+      TC3_CALENDAR_CHECK(calendar->SetDayOfYear(1))
+      return true;
+    default:
+      return false;
+  }
+  return false;
+}
+
+};  // namespace calendar
+
+#undef TC3_CALENDAR_CHECK
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_COMMON_H_

diff --git a/utils/calendar/calendar-icu.cc b/utils/calendar/calendar-icu.cc
new file mode 100644
index 0000000..8c4978e
--- /dev/null
+++ b/utils/calendar/calendar-icu.cc

@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/calendar/calendar-icu.h"
+
+#include <memory>
+
+#include "utils/base/macros.h"
+#include "utils/calendar/calendar-common.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+// Make sure the constants are compatible with ICU.
+#define TC3_CHECK_DAY_CONSTANT(NAME)                                  \
+  static_assert(static_cast<int>(UCalendarDaysOfWeek::UCAL_##NAME) == \
+                    static_cast<int>(DateParseData::NAME),            \
+                "Mismatching constant value for " #NAME);
+TC3_CHECK_DAY_CONSTANT(SUNDAY)
+TC3_CHECK_DAY_CONSTANT(MONDAY)
+TC3_CHECK_DAY_CONSTANT(TUESDAY)
+TC3_CHECK_DAY_CONSTANT(WEDNESDAY)
+TC3_CHECK_DAY_CONSTANT(THURSDAY)
+TC3_CHECK_DAY_CONSTANT(FRIDAY)
+TC3_CHECK_DAY_CONSTANT(SATURDAY)
+#undef TC3_CHECK_DAY_CONSTANT
+
+// Generic version of icu::Calendar::add with error checking.
+bool CalendarAdd(UCalendarDateFields field, int value,
+                 icu::Calendar* calendar) {
+  UErrorCode status = U_ZERO_ERROR;
+  calendar->add(field, value, status);
+  if (U_FAILURE(status)) {
+    TC3_LOG(ERROR) << "failed to add " << field;
+    return false;
+  }
+  return true;
+}
+
+// Generic version of icu::Calendar::get with error checking.
+bool CalendarGet(UCalendarDateFields field, int* value,
+                 icu::Calendar* calendar) {
+  UErrorCode status = U_ZERO_ERROR;
+  *value = calendar->get(field, status);
+  if (U_FAILURE(status)) {
+    TC3_LOG(ERROR) << "failed to get " << field;
+    return false;
+  }
+  return true;
+}
+
+// Generic version of icu::Calendar::set with error checking.
+bool CalendarSet(UCalendarDateFields field, int value,
+                 icu::Calendar* calendar) {
+  calendar->set(field, value);
+  return true;
+}
+
+}  // namespace
+
+bool Calendar::Initialize(const std::string& time_zone,
+                          const std::string& locale, int64 time_ms_utc) {
+  UErrorCode status = U_ZERO_ERROR;
+  calendar_.reset(icu::Calendar::createInstance(
+      icu::Locale::createFromName(locale.c_str()), status));
+  if (U_FAILURE(status)) {
+    TC3_LOG(ERROR) << "error getting calendar instance";
+    return false;
+  }
+  calendar_->adoptTimeZone(
+      icu::TimeZone::createTimeZone(icu::UnicodeString::fromUTF8(time_zone)));
+  calendar_->setTime(time_ms_utc, status);
+  if (U_FAILURE(status)) {
+    TC3_LOG(ERROR) << "failed to set time";
+    return false;
+  }
+  return true;
+}
+
+bool Calendar::GetFirstDayOfWeek(int* value) const {
+  *value = calendar_->getFirstDayOfWeek();
+  return true;
+}
+
+bool Calendar::GetTimeInMillis(int64* value) const {
+  UErrorCode status = U_ZERO_ERROR;
+  *value = calendar_->getTime(status);
+  if (U_FAILURE(status)) {
+    TC3_LOG(ERROR) << "error getting time from instance";
+    return false;
+  }
+  return true;
+}
+
+// Below is the boilerplate code for implementing the specialisations of
+// get/set/add for the various field types.
+#define TC3_DEFINE_FIELD_ACCESSOR(NAME, CONST, KIND, TYPE)          \
+  bool Calendar::KIND##NAME(TYPE value) const {                     \
+    return Calendar##KIND(UCalendarDateFields::UCAL_##CONST, value, \
+                          calendar_.get());                         \
+  }
+#define TC3_DEFINE_ADD(NAME, CONST) \
+  TC3_DEFINE_FIELD_ACCESSOR(NAME, CONST, Add, int)
+#define TC3_DEFINE_GET(NAME, CONST) \
+  TC3_DEFINE_FIELD_ACCESSOR(NAME, CONST, Get, int*)
+#define TC3_DEFINE_SET(NAME, CONST) \
+  TC3_DEFINE_FIELD_ACCESSOR(NAME, CONST, Set, int)
+
+TC3_DEFINE_ADD(DayOfMonth, DAY_OF_MONTH)
+TC3_DEFINE_ADD(Year, YEAR)
+TC3_DEFINE_ADD(Month, MONTH)
+TC3_DEFINE_GET(DayOfWeek, DAY_OF_WEEK)
+TC3_DEFINE_SET(ZoneOffset, ZONE_OFFSET)
+TC3_DEFINE_SET(DstOffset, DST_OFFSET)
+TC3_DEFINE_SET(Year, YEAR)
+TC3_DEFINE_SET(Month, MONTH)
+TC3_DEFINE_SET(DayOfYear, DAY_OF_YEAR)
+TC3_DEFINE_SET(DayOfMonth, DAY_OF_MONTH)
+TC3_DEFINE_SET(DayOfWeek, DAY_OF_WEEK)
+TC3_DEFINE_SET(HourOfDay, HOUR_OF_DAY)
+TC3_DEFINE_SET(Minute, MINUTE)
+TC3_DEFINE_SET(Second, SECOND)
+TC3_DEFINE_SET(Millisecond, MILLISECOND)
+
+#undef TC3_DEFINE_FIELD_ACCESSOR
+#undef TC3_DEFINE_ADD
+#undef TC3_DEFINE_GET
+#undef TC3_DEFINE_SET
+
+}  // namespace libtextclassifier3

diff --git a/utils/calendar/calendar-icu.h b/utils/calendar/calendar-icu.h
new file mode 100644
index 0000000..3fa4a4e
--- /dev/null
+++ b/utils/calendar/calendar-icu.h

@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_ICU_H_
+#define LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_ICU_H_
+
+#include <memory>
+#include <string>
+
+#include "annotator/types.h"
+#include "utils/base/integral_types.h"
+#include "utils/base/logging.h"
+#include "utils/calendar/calendar-common.h"
+#include "unicode/gregocal.h"
+#include "unicode/timezone.h"
+#include "unicode/ucal.h"
+
+namespace libtextclassifier3 {
+
+class Calendar {
+ public:
+  bool Initialize(const std::string& time_zone, const std::string& locale,
+                  int64 time_ms_utc);
+  bool AddDayOfMonth(int value) const;
+  bool AddYear(int value) const;
+  bool AddMonth(int value) const;
+  bool GetDayOfWeek(int* value) const;
+  bool GetFirstDayOfWeek(int* value) const;
+  bool GetTimeInMillis(int64* value) const;
+  bool SetZoneOffset(int value) const;
+  bool SetDstOffset(int value) const;
+  bool SetYear(int value) const;
+  bool SetMonth(int value) const;
+  bool SetDayOfYear(int value) const;
+  bool SetDayOfMonth(int value) const;
+  bool SetDayOfWeek(int value) const;
+  bool SetHourOfDay(int value) const;
+  bool SetMinute(int value) const;
+  bool SetSecond(int value) const;
+  bool SetMillisecond(int value) const;
+
+ private:
+  // We don't use a unique_ptr here because icu::Calendar has an implicit
+  // destructor - meaning that we couldn't use a forward declaration and would
+  // have to put the ICU includes in the header.
+  std::unique_ptr<icu::Calendar> calendar_;
+};
+
+class CalendarLib {
+ public:
+  // Interprets parse_data as milliseconds since_epoch. Relative times are
+  // resolved against the current time (reference_time_ms_utc). Returns true if
+  // the interpratation was successful, false otherwise.
+  bool InterpretParseData(const DateParseData& parse_data,
+                          int64 reference_time_ms_utc,
+                          const std::string& reference_timezone,
+                          const std::string& reference_locale,
+                          DatetimeGranularity granularity,
+                          int64* interpreted_time_ms_utc) const {
+    Calendar calendar;
+    calendar::CalendarLibTempl<Calendar> impl_;
+    if (!impl_.InterpretParseData(parse_data, reference_time_ms_utc,
+                                  reference_timezone, reference_locale,
+                                  granularity, &calendar)) {
+      return false;
+    }
+    return calendar.GetTimeInMillis(interpreted_time_ms_utc);
+  }
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_ICU_H_

diff --git a/util/calendar/calendar.h b/utils/calendar/calendar.h
similarity index 65%
copy from util/calendar/calendar.h
copy to utils/calendar/calendar.h
index b0cf2e6..98c3490 100644
--- a/util/calendar/calendar.h
+++ b/utils/calendar/calendar.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_H_
+#define LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_H_
 
-#include "util/calendar/calendar-icu.h"
+#include "utils/calendar/calendar-icu.h"
+#define INIT_CALENDARLIB_FOR_TESTING(VAR) VAR()
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_CALENDAR_CALENDAR_H_

diff --git a/utils/calendar/calendar_test.cc b/utils/calendar/calendar_test.cc
new file mode 100644
index 0000000..a8c3af8
--- /dev/null
+++ b/utils/calendar/calendar_test.cc

@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This test serves the purpose of making sure all the different implementations
+// of the unspoken CalendarLib interface support the same methods.
+
+#include "utils/calendar/calendar.h"
+#include "utils/base/logging.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class CalendarTest : public ::testing::Test {
+ protected:
+  CalendarTest() : INIT_CALENDARLIB_FOR_TESTING(calendarlib_) {}
+  CalendarLib calendarlib_;
+};
+
+TEST_F(CalendarTest, Interface) {
+  int64 time;
+  std::string timezone;
+  bool result = calendarlib_.InterpretParseData(
+      DateParseData{/*field_set_mask=*/0, /*year=*/0, /*month=*/0,
+                    /*day_of_month=*/0, /*hour=*/0, /*minute=*/0, /*second=*/0,
+                    /*ampm=*/0, /*zone_offset=*/0, /*dst_offset=*/0,
+                    static_cast<DateParseData::Relation>(0),
+                    static_cast<DateParseData::RelationType>(0),
+                    /*relation_distance=*/0},
+      0L, "Zurich", "en-CH", GRANULARITY_UNKNOWN, &time);
+  TC3_LOG(INFO) << result;
+}
+
+#ifdef TC3_CALENDAR_ICU
+TEST_F(CalendarTest, RoundingToGranularity) {
+  int64 time;
+  DateParseData data;
+  data.year = 2018;
+  data.month = 4;
+  data.day_of_month = 25;
+  data.hour = 9;
+  data.minute = 33;
+  data.second = 59;
+  data.field_set_mask = DateParseData::YEAR_FIELD | DateParseData::MONTH_FIELD |
+                        DateParseData::DAY_FIELD | DateParseData::HOUR_FIELD |
+                        DateParseData::MINUTE_FIELD |
+                        DateParseData::SECOND_FIELD;
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_YEAR, &time));
+  EXPECT_EQ(time, 1514761200000L /* Jan 01 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_MONTH, &time));
+  EXPECT_EQ(time, 1522533600000L /* Apr 01 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_WEEK, &time));
+  EXPECT_EQ(time, 1524434400000L /* Mon Apr 23 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"*-CH",
+      /*granularity=*/GRANULARITY_WEEK, &time));
+  EXPECT_EQ(time, 1524434400000L /* Mon Apr 23 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_WEEK, &time));
+  EXPECT_EQ(time, 1524348000000L /* Sun Apr 22 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"*-US",
+      /*granularity=*/GRANULARITY_WEEK, &time));
+  EXPECT_EQ(time, 1524348000000L /* Sun Apr 22 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1524607200000L /* Apr 25 2018 00:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_HOUR, &time));
+  EXPECT_EQ(time, 1524639600000L /* Apr 25 2018 09:00:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_MINUTE, &time));
+  EXPECT_EQ(time, 1524641580000 /* Apr 25 2018 09:33:00 */);
+
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      data,
+      /*reference_time_ms_utc=*/0L, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-CH",
+      /*granularity=*/GRANULARITY_SECOND, &time));
+  EXPECT_EQ(time, 1524641639000 /* Apr 25 2018 09:33:59 */);
+}
+
+TEST_F(CalendarTest, RelativeTimeWeekday) {
+  const int field_mask = DateParseData::RELATION_FIELD |
+                         DateParseData::RELATION_TYPE_FIELD |
+                         DateParseData::RELATION_DISTANCE_FIELD;
+  const int64 ref_time = 1524648839000L; /* 25 April 2018 09:33:59 */
+  int64 time;
+
+  // Two Weds from now.
+  const DateParseData future_wed_parse = {
+      field_mask,
+      /*year=*/0,
+      /*month=*/0,
+      /*day_of_month=*/0,
+      /*hour=*/0,
+      /*minute=*/0,
+      /*second=*/0,
+      /*ampm=*/0,
+      /*zone_offset=*/0,
+      /*dst_offset=*/0,
+      DateParseData::Relation::FUTURE,
+      DateParseData::RelationType::WEDNESDAY,
+      /*relation_distance=*/2};
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      future_wed_parse, ref_time, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1525816800000L /* 9 May 2018 00:00:00 */);
+
+  // Next Wed.
+  const DateParseData next_wed_parse = {field_mask,
+                                        /*year=*/0,
+                                        /*month=*/0,
+                                        /*day_of_month=*/0,
+                                        /*hour=*/0,
+                                        /*minute=*/0,
+                                        /*second=*/0,
+                                        /*ampm=*/0,
+                                        /*zone_offset=*/0,
+                                        /*dst_offset=*/0,
+                                        DateParseData::Relation::NEXT,
+                                        DateParseData::RelationType::WEDNESDAY,
+                                        /*relation_distance=*/0};
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      next_wed_parse, ref_time, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1525212000000L /* 1 May 2018 00:00:00 */);
+
+  // Same Wed.
+  const DateParseData same_wed_parse = {field_mask,
+                                        /*year=*/0,
+                                        /*month=*/0,
+                                        /*day_of_month=*/0,
+                                        /*hour=*/0,
+                                        /*minute=*/0,
+                                        /*second=*/0,
+                                        /*ampm=*/0,
+                                        /*zone_offset=*/0,
+                                        /*dst_offset=*/0,
+                                        DateParseData::Relation::NEXT_OR_SAME,
+                                        DateParseData::RelationType::WEDNESDAY,
+                                        /*relation_distance=*/0};
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      same_wed_parse, ref_time, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1524607200000L /* 25 April 2018 00:00:00 */);
+
+  // Previous Wed.
+  const DateParseData last_wed_parse = {field_mask,
+                                        /*year=*/0,
+                                        /*month=*/0,
+                                        /*day_of_month=*/0,
+                                        /*hour=*/0,
+                                        /*minute=*/0,
+                                        /*second=*/0,
+                                        /*ampm=*/0,
+                                        /*zone_offset=*/0,
+                                        /*dst_offset=*/0,
+                                        DateParseData::Relation::LAST,
+                                        DateParseData::RelationType::WEDNESDAY,
+                                        /*relation_distance=*/0};
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      last_wed_parse, ref_time, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1524002400000L /* 18 April 2018 00:00:00 */);
+
+  // Two Weds ago.
+  const DateParseData past_wed_parse = {field_mask,
+                                        /*year=*/0,
+                                        /*month=*/0,
+                                        /*day_of_month=*/0,
+                                        /*hour=*/0,
+                                        /*minute=*/0,
+                                        /*second=*/0,
+                                        /*ampm=*/0,
+                                        /*zone_offset=*/0,
+                                        /*dst_offset=*/0,
+                                        DateParseData::Relation::PAST,
+                                        DateParseData::RelationType::WEDNESDAY,
+                                        /*relation_distance=*/2};
+  ASSERT_TRUE(calendarlib_.InterpretParseData(
+      past_wed_parse, ref_time, /*reference_timezone=*/"Europe/Zurich",
+      /*reference_locale=*/"en-US",
+      /*granularity=*/GRANULARITY_DAY, &time));
+  EXPECT_EQ(time, 1523397600000L /* 11 April 2018 00:00:00 */);
+}
+#endif  // TC3_UNILIB_DUMMY
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/checksum.cc b/utils/checksum.cc
new file mode 100644
index 0000000..87b2d37
--- /dev/null
+++ b/utils/checksum.cc

@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/checksum.h"
+#include "utils/strings/numbers.h"
+
+namespace libtextclassifier3 {
+
+bool VerifyLuhnChecksum(const std::string& input, bool ignore_whitespace) {
+  int sum = 0;
+  int num_digits = 0;
+  bool is_odd = true;
+
+  // http://en.wikipedia.org/wiki/Luhn_algorithm
+  static const int kPrecomputedSumsOfDoubledDigits[] = {0, 2, 4, 6, 8,
+                                                        1, 3, 5, 7, 9};
+  for (int i = input.size() - 1; i >= 0; i--) {
+    const char c = input[i];
+    if (ignore_whitespace && c == ' ') {
+      continue;
+    }
+    if (!isdigit(c)) {
+      return false;
+    }
+    ++num_digits;
+    const int digit = c - '0';
+    if (is_odd) {
+      sum += digit;
+    } else {
+      sum += kPrecomputedSumsOfDoubledDigits[digit];
+    }
+    is_odd = !is_odd;
+  }
+  return (num_digits > 1 && sum % 10 == 0);
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/checksum.h b/utils/checksum.h
new file mode 100644
index 0000000..2f94219
--- /dev/null
+++ b/utils/checksum.h

@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Utility functions for calculating and verifying checksums.
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_CHECKSUM_H_
+#define LIBTEXTCLASSIFIER_UTILS_CHECKSUM_H_
+
+#include <string>
+
+namespace libtextclassifier3 {
+
+// Computes and verifies that the last digit of `input` matches the Luhn
+// checksum. Returns false if presented with non-digits, or on whitespace
+// characters if `ignore_whitespace` is false.
+bool VerifyLuhnChecksum(const std::string& input,
+                        bool ignore_whitespace = true);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_CHECKSUM_H_

diff --git a/utils/checksum_test.cc b/utils/checksum_test.cc
new file mode 100644
index 0000000..dd04956
--- /dev/null
+++ b/utils/checksum_test.cc

@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/checksum.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(LuhnTest, CorrectlyHandlesSimpleCases) {
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0"));
+  EXPECT_FALSE(VerifyLuhnChecksum("1"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0A"));
+}
+
+TEST(LuhnTest, CorrectlyVerifiesPaymentCardNumbers) {
+  // Fake test numbers.
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_TRUE(VerifyLuhnChecksum("371449635398431"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5610591081018250"));
+  EXPECT_TRUE(VerifyLuhnChecksum("38520000023237"));
+  EXPECT_TRUE(VerifyLuhnChecksum("6011000990139424"));
+  EXPECT_TRUE(VerifyLuhnChecksum("3566002020360505"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5105105105105100"));
+  EXPECT_TRUE(VerifyLuhnChecksum("4012 8888 8888 1881"));
+}
+
+TEST(LuhnTest, HandlesWhitespace) {
+  EXPECT_TRUE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/false));
+}
+
+TEST(LuhnTest, HandlesEdgeCases) {
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/false));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/false));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/util/flatbuffers.cc b/utils/flatbuffers.cc
similarity index 81%
rename from util/flatbuffers.cc
rename to utils/flatbuffers.cc
index 6c0108e..c1c2625 100644
--- a/util/flatbuffers.cc
+++ b/utils/flatbuffers.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include "util/flatbuffers.h"
+#include "utils/flatbuffers.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 template <>
 const char* FlatbufferFileIdentifier<Model>() {
   return ModelIdentifier();
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/flatbuffers.h b/utils/flatbuffers.h
similarity index 90%
rename from util/flatbuffers.h
rename to utils/flatbuffers.h
index 93d73b6..4031f89 100644
--- a/util/flatbuffers.h
+++ b/utils/flatbuffers.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,16 +16,16 @@
 
 // Utility functions for working with FlatBuffers.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_FLATBUFFERS_H_
-#define LIBTEXTCLASSIFIER_UTIL_FLATBUFFERS_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_FLATBUFFERS_H_
+#define LIBTEXTCLASSIFIER_UTILS_FLATBUFFERS_H_
 
 #include <memory>
 #include <string>
 
-#include "model_generated.h"
+#include "annotator/model_generated.h"
 #include "flatbuffers/flatbuffers.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Loads and interprets the buffer as 'FlatbufferMessage' and verifies its
 // integrity.
@@ -93,6 +93,6 @@
                      builder.GetSize());
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_FLATBUFFERS_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_FLATBUFFERS_H_

diff --git a/util/hash/farmhash.cc b/utils/hash/farmhash.cc
similarity index 99%
rename from util/hash/farmhash.cc
rename to utils/hash/farmhash.cc
index 673f45f..f12fd3e 100644
--- a/util/hash/farmhash.cc
+++ b/utils/hash/farmhash.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/hash/farmhash.h"
+#include "utils/hash/farmhash.h"
 
 // FARMHASH ASSUMPTIONS: Modify as needed, or use -DFARMHASH_ASSUME_SSE42 etc.
 // Note that if you use -DFARMHASH_ASSUME_SSE42 you likely need -msse42

diff --git a/util/hash/farmhash.h b/utils/hash/farmhash.h
similarity index 96%
rename from util/hash/farmhash.h
rename to utils/hash/farmhash.h
index 477b7a8..f374c0b 100644
--- a/util/hash/farmhash.h
+++ b/utils/hash/farmhash.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
-#define LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_HASH_FARMHASH_H_
+#define LIBTEXTCLASSIFIER_UTILS_HASH_FARMHASH_H_
 
 #include <assert.h>
 #include <stdint.h>
@@ -24,7 +24,7 @@
 #include <utility>
 
 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
-#define NAMESPACE_FOR_HASH_FUNCTIONS tc2farmhash
+#define NAMESPACE_FOR_HASH_FUNCTIONS tc3farmhash
 #endif
 
 namespace NAMESPACE_FOR_HASH_FUNCTIONS {
@@ -261,4 +261,4 @@
 
 }  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_HASH_FARMHASH_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_HASH_FARMHASH_H_

diff --git a/util/i18n/locale.cc b/utils/i18n/locale.cc
similarity index 92%
rename from util/i18n/locale.cc
rename to utils/i18n/locale.cc
index c587d2d..acd0379 100644
--- a/util/i18n/locale.cc
+++ b/utils/i18n/locale.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "util/i18n/locale.h"
+#include "utils/i18n/locale.h"
 
-#include "util/strings/split.h"
+#include "utils/strings/split.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace {
 
@@ -107,4 +107,4 @@
   return Locale(language.ToString(), script.ToString(), region.ToString());
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/i18n/locale.h b/utils/i18n/locale.h
similarity index 83%
rename from util/i18n/locale.h
rename to utils/i18n/locale.h
index 16f10dc..4cfcc22 100644
--- a/util/i18n/locale.h
+++ b/utils/i18n/locale.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_I18N_LOCALE_H_
-#define LIBTEXTCLASSIFIER_UTIL_I18N_LOCALE_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_I18N_LOCALE_H_
+#define LIBTEXTCLASSIFIER_UTILS_I18N_LOCALE_H_
 
 #include <string>
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 class Locale {
  public:
@@ -58,6 +58,6 @@
   bool is_valid_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_I18N_LOCALE_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_I18N_LOCALE_H_

diff --git a/util/i18n/locale_test.cc b/utils/i18n/locale_test.cc
similarity index 92%
rename from util/i18n/locale_test.cc
rename to utils/i18n/locale_test.cc
index 72ece98..3722727 100644
--- a/util/i18n/locale_test.cc
+++ b/utils/i18n/locale_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "util/i18n/locale.h"
+#include "utils/i18n/locale.h"
 
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 TEST(LocaleTest, ParseUnknown) {
@@ -67,4 +67,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/utils/intents/intent-config.fbs b/utils/intents/intent-config.fbs
new file mode 100755
index 0000000..93a6fc9
--- /dev/null
+++ b/utils/intents/intent-config.fbs

@@ -0,0 +1,192 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// The type of variable to fetch.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorVariableType : int {
+  INVALID_VARIABLE = 0,
+
+  // The raw text that was classified.
+  RAW_TEXT = 1,
+
+  // Text as a URL with explicit protocol. If no protocol was specified, http
+  // is prepended.
+  URL_TEXT = 2,
+
+  // The raw text, but URL encoded.
+  URL_ENCODED_TEXT = 3,
+
+  // For dates/times: the instant of the event in UTC millis.
+  EVENT_TIME_MS_UTC = 4,
+
+  // For dates/times: the start of the event in UTC millis.
+  EVENT_START_MS_UTC = 5,
+
+  // For dates/times: the end of the event in UTC millis.
+  EVENT_END_MS_UTC = 6,
+
+  // Name of the package that's running the classifier.
+  PACKAGE_NAME = 7,
+}
+
+// Enumerates the possible extra types for the simple intent generator.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorExtraType : int {
+  INVALID_EXTRA_TYPE = 0,
+  STRING = 1,
+  BOOL = 2,
+  VARIABLE_AS_LONG = 3,
+}
+
+// Enumerates the possible condition types for the simple intent generator.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorConditionType : int {
+  INVALID_CONDITION_TYPE = 0,
+
+  // Queries the UserManager for the given boolean restriction. The condition
+  // passes if the result is of getBoolean is false. The name of the
+  // restriction to check is in the string_ field.
+  USER_RESTRICTION_NOT_SET = 1,
+
+  // Checks that the parsed event start time is at least a give number of
+  // milliseconds in the future. (Only valid if there is a parsed event
+  // time) The offset is stored in the int64_ field.
+  EVENT_START_IN_FUTURE_MS = 2,
+}
+
+// Describes how intents for the various entity types should be generated on
+// Android. This is distributed through the model, but not used by
+// libtextclassifier yet - rather, it's passed to the calling Java code, which
+// implements the Intent generation logic.
+namespace libtextclassifier3;
+table AndroidIntentFactoryOptions {
+  entity:[libtextclassifier3.AndroidIntentFactoryEntityOptions];
+}
+
+// Describes how intents should be generated for a particular entity type.
+namespace libtextclassifier3;
+table AndroidIntentFactoryEntityOptions {
+  // The entity type as defined by one of the TextClassifier ENTITY_TYPE
+  // constants. (e.g. "address", "phone", etc.)
+  entity_type:string;
+
+  // List of generators for all the different types of intents that should
+  // be made available for the entity type.
+  generator:[libtextclassifier3.AndroidIntentGeneratorOptions];
+}
+
+// Configures a single Android Intent generator.
+namespace libtextclassifier3;
+table AndroidIntentGeneratorOptions {
+  // Strings for UI elements.
+  strings:[libtextclassifier3.AndroidIntentGeneratorStrings];
+
+  // Generator specific configuration.
+  simple:libtextclassifier3.AndroidSimpleIntentGeneratorOptions;
+}
+
+// Language dependent configuration for an Android Intent generator.
+namespace libtextclassifier3;
+table AndroidIntentGeneratorStrings {
+  // BCP 47 tag for the supported locale. Note that because of API level
+  // restrictions, this must /not/ use wildcards. To e.g. match all English
+  // locales, use only "en" and not "en_*". Reference the java.util.Locale
+  // constructor for details.
+  language_tag:string;
+
+  // Title shown for the action (see RemoteAction.getTitle).
+  title:string;
+
+  // Description shown for the action (see
+  // RemoteAction.getContentDescription).
+  description:string;
+}
+
+// An extra to set on a simple intent generator Intent.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorExtra {
+  // The name of the extra to set.
+  name:string;
+
+  // The type of the extra to set.
+  type:libtextclassifier3.AndroidSimpleIntentGeneratorExtraType;
+
+  string_:string;
+
+  bool_:bool;
+  int32_:int;
+}
+
+// A condition that needs to be fulfilled for an Intent to get generated.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorCondition {
+  type:libtextclassifier3.AndroidSimpleIntentGeneratorConditionType;
+
+  string_:string;
+
+  int32_:int;
+  int64_:long;
+}
+
+// Configures an intent generator where the logic is simple to be expressed with
+// basic rules - which covers the vast majority of use cases and is analogous
+// to Android Actions.
+// Most strings (action, data, type, ...) may contain variable references. To
+// use them, the generator must first declare all the variables it wishes to use
+// in the variables field. The values then become available as numbered
+// arguments (using the normal java.util.Formatter syntax) in the order they
+// were specified.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorOptions {
+  // The action to set on the Intent (see Intent.setAction). Supports variables.
+  action:string;
+
+  // The data to set on the Intent (see Intent.setData). Supports variables.
+  data:string;
+
+  // The type to set on the Intent (see Intent.setType). Supports variables.
+  type:string;
+
+  // The list of all the extras to add to the Intent.
+  extra:[libtextclassifier3.AndroidSimpleIntentGeneratorExtra];
+
+  // The list of all the variables that become available for substitution in
+  // the action, data, type and extra strings. To e.g. set a field to the value
+  // of the first variable, use "%0$s".
+  variable:[libtextclassifier3.AndroidSimpleIntentGeneratorVariableType];
+
+  // The list of all conditions that need to be fulfilled for Intent generation.
+  condition:[libtextclassifier3.AndroidSimpleIntentGeneratorCondition];
+}
+
+// Describes how intents should be generated for a particular entity type.
+namespace libtextclassifier3.IntentFactoryModel_;
+table IntentGenerator {
+  // The entity type as defined by on the TextClassifier ENTITY_TYPE constants
+  // e.g. "address", "phone", etc.
+  entity_type:string;
+
+  // The template generator lua code, either as text source or precompiled
+  // bytecode.
+  lua_template_generator:[ubyte];
+}
+
+// Describes how intents for the various entity types should be generated.
+namespace libtextclassifier3;
+table IntentFactoryModel {
+  entities:[libtextclassifier3.IntentFactoryModel_.IntentGenerator];
+}
+

diff --git a/utils/java/jni-base.cc b/utils/java/jni-base.cc
new file mode 100644
index 0000000..330732c
--- /dev/null
+++ b/utils/java/jni-base.cc

@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/java/jni-base.h"
+
+#include <jni.h>
+#include <type_traits>
+#include <vector>
+
+#include "utils/base/integral_types.h"
+#include "utils/java/scoped_local_ref.h"
+#include "utils/java/string_utils.h"
+#include "utils/memory/mmap.h"
+
+using libtextclassifier3::JStringToUtf8String;
+using libtextclassifier3::ScopedLocalRef;
+
+namespace libtextclassifier3 {
+
+std::string ToStlString(JNIEnv* env, const jstring& str) {
+  std::string result;
+  JStringToUtf8String(env, str, &result);
+  return result;
+}
+
+jint GetFdFromFileDescriptor(JNIEnv* env, jobject fd) {
+  ScopedLocalRef<jclass> fd_class(env->FindClass("java/io/FileDescriptor"),
+                                  env);
+  if (fd_class == nullptr) {
+    TC3_LOG(ERROR) << "Couldn't find FileDescriptor.";
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  jfieldID fd_class_descriptor =
+      env->GetFieldID(fd_class.get(), "descriptor", "I");
+  if (fd_class_descriptor == nullptr) {
+    TC3_LOG(ERROR) << "Couldn't find descriptor.";
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  return env->GetIntField(fd, fd_class_descriptor);
+}
+
+jint GetFdFromAssetFileDescriptor(JNIEnv* env, jobject afd) {
+  ScopedLocalRef<jclass> afd_class(
+      env->FindClass("android/content/res/AssetFileDescriptor"), env);
+  if (afd_class == nullptr) {
+    TC3_LOG(ERROR) << "Couldn't find AssetFileDescriptor.";
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  jmethodID afd_class_getFileDescriptor = env->GetMethodID(
+      afd_class.get(), "getFileDescriptor", "()Ljava/io/FileDescriptor;");
+  if (afd_class_getFileDescriptor == nullptr) {
+    TC3_LOG(ERROR) << "Couldn't find getFileDescriptor.";
+    return reinterpret_cast<jlong>(nullptr);
+  }
+  jobject bundle_jfd = env->CallObjectMethod(afd, afd_class_getFileDescriptor);
+  return GetFdFromFileDescriptor(env, bundle_jfd);
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/java/jni-base.h b/utils/java/jni-base.h
new file mode 100644
index 0000000..23658a3
--- /dev/null
+++ b/utils/java/jni-base.h

@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_BASE_H_
+#define LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_BASE_H_
+
+#include <jni.h>
+#include <string>
+
+// When we use a macro as an argument for a macro, an additional level of
+// indirection is needed, if the macro argument is used with # or ##.
+#define TC3_ADD_QUOTES_HELPER(TOKEN) #TOKEN
+#define TC3_ADD_QUOTES(TOKEN) TC3_ADD_QUOTES_HELPER(TOKEN)
+
+#ifndef TC3_PACKAGE_NAME
+#define TC3_PACKAGE_NAME com_google_android_textclassifier
+#endif
+
+#ifndef TC3_PACKAGE_PATH
+#define TC3_PACKAGE_PATH \
+  "com/google/android/textclassifier/"
+#endif
+
+#define TC3_JNI_METHOD_NAME_INTERNAL(package_name, class_name, method_name) \
+  Java_##package_name##_##class_name##_##method_name
+
+#define TC3_JNI_METHOD_PRIMITIVE(return_type, package_name, class_name, \
+                                 method_name)                           \
+  JNIEXPORT return_type JNICALL TC3_JNI_METHOD_NAME_INTERNAL(           \
+      package_name, class_name, method_name)
+
+// The indirection is needed to correctly expand the TC3_PACKAGE_NAME macro.
+// See the explanation near TC3_ADD_QUOTES macro.
+#define TC3_JNI_METHOD2(return_type, package_name, class_name, method_name) \
+  TC3_JNI_METHOD_PRIMITIVE(return_type, package_name, class_name, method_name)
+
+#define TC3_JNI_METHOD(return_type, class_name, method_name) \
+  TC3_JNI_METHOD2(return_type, TC3_PACKAGE_NAME, class_name, method_name)
+
+#define TC3_JNI_METHOD_NAME2(package_name, class_name, method_name) \
+  TC3_JNI_METHOD_NAME_INTERNAL(package_name, class_name, method_name)
+
+#define TC3_JNI_METHOD_NAME(class_name, method_name) \
+  TC3_JNI_METHOD_NAME2(TC3_PACKAGE_NAME, class_name, method_name)
+
+namespace libtextclassifier3 {
+
+template <typename T, typename F>
+std::pair<bool, T> CallJniMethod0(JNIEnv* env, jobject object,
+                                  jclass class_object, F function,
+                                  const std::string& method_name,
+                                  const std::string& return_java_type) {
+  const jmethodID method = env->GetMethodID(class_object, method_name.c_str(),
+                                            ("()" + return_java_type).c_str());
+  if (!method) {
+    return std::make_pair(false, T());
+  }
+  return std::make_pair(true, (env->*function)(object, method));
+}
+
+std::string ToStlString(JNIEnv* env, const jstring& str);
+
+// Get system-level file descriptor from AssetFileDescriptor.
+jint GetFdFromAssetFileDescriptor(JNIEnv* env, jobject afd);
+
+// Get system-level file descriptor from FileDescriptor.
+jint GetFdFromFileDescriptor(JNIEnv* env, jobject fd);
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_BASE_H_

diff --git a/utils/java/jni-cache.cc b/utils/java/jni-cache.cc
new file mode 100644
index 0000000..4bb9523
--- /dev/null
+++ b/utils/java/jni-cache.cc

@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/java/jni-cache.h"
+
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+JniCache::JniCache(JavaVM* jvm)
+    : jvm(jvm),
+      string_class(nullptr, jvm),
+      string_utf8(nullptr, jvm),
+      pattern_class(nullptr, jvm),
+      matcher_class(nullptr, jvm),
+      locale_class(nullptr, jvm),
+      locale_us(nullptr, jvm),
+      breakiterator_class(nullptr, jvm),
+      integer_class(nullptr, jvm),
+      calendar_class(nullptr, jvm),
+      timezone_class(nullptr, jvm),
+      urlencoder_class(nullptr, jvm)
+{
+}
+
+// The macros below are intended to reduce the boilerplate in Create and avoid
+// easily introduced copy/paste errors.
+#define TC3_CHECK_JNI_PTR(PTR) \
+  TC3_DCHECK(PTR);             \
+  if (!(PTR)) return nullptr;
+
+#define TC3_GET_CLASS(FIELD, NAME)                                       \
+  result->FIELD##_class = MakeGlobalRef(env->FindClass(NAME), env, jvm); \
+  TC3_CHECK_JNI_PTR(result->FIELD##_class)
+
+#define TC3_GET_OPTIONAL_CLASS(FIELD, NAME)                              \
+  result->FIELD##_class = MakeGlobalRef(env->FindClass(NAME), env, jvm); \
+  env->ExceptionClear();
+
+#define TC3_GET_METHOD(CLASS, FIELD, NAME, SIGNATURE)                 \
+  result->CLASS##_##FIELD =                                           \
+      env->GetMethodID(result->CLASS##_class.get(), NAME, SIGNATURE); \
+  TC3_CHECK_JNI_PTR(result->CLASS##_##FIELD)
+
+#define TC3_GET_OPTIONAL_METHOD(CLASS, FIELD, NAME, SIGNATURE)          \
+  if (result->CLASS##_class != nullptr) {                               \
+    result->CLASS##_##FIELD =                                           \
+        env->GetMethodID(result->CLASS##_class.get(), NAME, SIGNATURE); \
+    env->ExceptionClear();                                              \
+  }
+
+#define TC3_GET_OPTIONAL_STATIC_METHOD(CLASS, FIELD, NAME, SIGNATURE)         \
+  if (result->CLASS##_class != nullptr) {                                     \
+    result->CLASS##_##FIELD =                                                 \
+        env->GetStaticMethodID(result->CLASS##_class.get(), NAME, SIGNATURE); \
+    env->ExceptionClear();                                                    \
+  }
+
+#define TC3_GET_STATIC_METHOD(CLASS, FIELD, NAME, SIGNATURE)                \
+  result->CLASS##_##FIELD =                                                 \
+      env->GetStaticMethodID(result->CLASS##_class.get(), NAME, SIGNATURE); \
+  TC3_CHECK_JNI_PTR(result->CLASS##_##FIELD)
+
+#define TC3_GET_STATIC_OBJECT_FIELD(CLASS, FIELD, NAME, SIGNATURE)         \
+  const jfieldID CLASS##_##FIELD##_field =                                 \
+      env->GetStaticFieldID(result->CLASS##_class.get(), NAME, SIGNATURE); \
+  TC3_CHECK_JNI_PTR(CLASS##_##FIELD##_field)                               \
+  result->CLASS##_##FIELD =                                                \
+      MakeGlobalRef(env->GetStaticObjectField(result->CLASS##_class.get(), \
+                                              CLASS##_##FIELD##_field),    \
+                    env, jvm);                                             \
+  TC3_CHECK_JNI_PTR(result->CLASS##_##FIELD)
+
+#define TC3_GET_STATIC_INT_FIELD(CLASS, FIELD, NAME)                 \
+  const jfieldID CLASS##_##FIELD##_field =                           \
+      env->GetStaticFieldID(result->CLASS##_class.get(), NAME, "I"); \
+  TC3_CHECK_JNI_PTR(CLASS##_##FIELD##_field)                         \
+  result->CLASS##_##FIELD = env->GetStaticIntField(                  \
+      result->CLASS##_class.get(), CLASS##_##FIELD##_field);         \
+  TC3_CHECK_JNI_PTR(result->CLASS##_##FIELD)
+
+std::unique_ptr<JniCache> JniCache::Create(JNIEnv* env) {
+  if (env == nullptr) {
+    return nullptr;
+  }
+  JavaVM* jvm = nullptr;
+  if (JNI_OK != env->GetJavaVM(&jvm) || jvm == nullptr) {
+    return nullptr;
+  }
+  std::unique_ptr<JniCache> result(new JniCache(jvm));
+
+  // String
+  TC3_GET_CLASS(string, "java/lang/String");
+  TC3_GET_METHOD(string, init_bytes_charset, "<init>",
+                 "([BLjava/lang/String;)V");
+  TC3_GET_METHOD(string, code_point_count, "codePointCount", "(II)I");
+  TC3_GET_METHOD(string, length, "length", "()I");
+  result->string_utf8 = MakeGlobalRef(env->NewStringUTF("UTF-8"), env, jvm);
+  TC3_CHECK_JNI_PTR(result->string_utf8)
+
+  // Pattern
+  TC3_GET_CLASS(pattern, "java/util/regex/Pattern");
+  TC3_GET_STATIC_METHOD(pattern, compile, "compile",
+                        "(Ljava/lang/String;)Ljava/util/regex/Pattern;");
+  TC3_GET_METHOD(pattern, matcher, "matcher",
+                 "(Ljava/lang/CharSequence;)Ljava/util/regex/Matcher;");
+
+  // Matcher
+  TC3_GET_CLASS(matcher, "java/util/regex/Matcher");
+  TC3_GET_METHOD(matcher, matches, "matches", "()Z");
+  TC3_GET_METHOD(matcher, find, "find", "()Z");
+  TC3_GET_METHOD(matcher, reset, "reset", "()Ljava/util/regex/Matcher;");
+  TC3_GET_METHOD(matcher, start_idx, "start", "(I)I");
+  TC3_GET_METHOD(matcher, end_idx, "end", "(I)I");
+  TC3_GET_METHOD(matcher, group, "group", "()Ljava/lang/String;");
+  TC3_GET_METHOD(matcher, group_idx, "group", "(I)Ljava/lang/String;");
+
+  // Locale
+  TC3_GET_CLASS(locale, "java/util/Locale");
+  TC3_GET_STATIC_OBJECT_FIELD(locale, us, "US", "Ljava/util/Locale;");
+  TC3_GET_METHOD(locale, init_string, "<init>", "(Ljava/lang/String;)V");
+  TC3_GET_OPTIONAL_STATIC_METHOD(locale, for_language_tag, "forLanguageTag",
+                                 "(Ljava/lang/String;)Ljava/util/Locale;");
+
+  // BreakIterator
+  TC3_GET_CLASS(breakiterator, "java/text/BreakIterator");
+  TC3_GET_STATIC_METHOD(breakiterator, getwordinstance, "getWordInstance",
+                        "(Ljava/util/Locale;)Ljava/text/BreakIterator;");
+  TC3_GET_METHOD(breakiterator, settext, "setText", "(Ljava/lang/String;)V");
+  TC3_GET_METHOD(breakiterator, next, "next", "()I");
+
+  // Integer
+  TC3_GET_CLASS(integer, "java/lang/Integer");
+  TC3_GET_STATIC_METHOD(integer, parse_int, "parseInt",
+                        "(Ljava/lang/String;)I");
+
+  // Calendar.
+  TC3_GET_CLASS(calendar, "java/util/Calendar");
+  TC3_GET_STATIC_METHOD(
+      calendar, get_instance, "getInstance",
+      "(Ljava/util/TimeZone;Ljava/util/Locale;)Ljava/util/Calendar;");
+  TC3_GET_METHOD(calendar, get_first_day_of_week, "getFirstDayOfWeek", "()I");
+  TC3_GET_METHOD(calendar, get_time_in_millis, "getTimeInMillis", "()J");
+  TC3_GET_METHOD(calendar, set_time_in_millis, "setTimeInMillis", "(J)V");
+  TC3_GET_METHOD(calendar, add, "add", "(II)V");
+  TC3_GET_METHOD(calendar, get, "get", "(I)I");
+  TC3_GET_METHOD(calendar, set, "set", "(II)V");
+  TC3_GET_STATIC_INT_FIELD(calendar, zone_offset, "ZONE_OFFSET");
+  TC3_GET_STATIC_INT_FIELD(calendar, dst_offset, "DST_OFFSET");
+  TC3_GET_STATIC_INT_FIELD(calendar, year, "YEAR");
+  TC3_GET_STATIC_INT_FIELD(calendar, month, "MONTH");
+  TC3_GET_STATIC_INT_FIELD(calendar, day_of_year, "DAY_OF_YEAR");
+  TC3_GET_STATIC_INT_FIELD(calendar, day_of_month, "DAY_OF_MONTH");
+  TC3_GET_STATIC_INT_FIELD(calendar, day_of_week, "DAY_OF_WEEK");
+  TC3_GET_STATIC_INT_FIELD(calendar, hour_of_day, "HOUR_OF_DAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, minute, "MINUTE");
+  TC3_GET_STATIC_INT_FIELD(calendar, second, "SECOND");
+  TC3_GET_STATIC_INT_FIELD(calendar, millisecond, "MILLISECOND");
+  TC3_GET_STATIC_INT_FIELD(calendar, sunday, "SUNDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, monday, "MONDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, tuesday, "TUESDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, wednesday, "WEDNESDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, thursday, "THURSDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, friday, "FRIDAY");
+  TC3_GET_STATIC_INT_FIELD(calendar, saturday, "SATURDAY");
+
+  // TimeZone.
+  TC3_GET_CLASS(timezone, "java/util/TimeZone");
+  TC3_GET_STATIC_METHOD(timezone, get_timezone, "getTimeZone",
+                        "(Ljava/lang/String;)Ljava/util/TimeZone;");
+
+  // URLEncoder.
+  TC3_GET_CLASS(urlencoder, "java/net/URLEncoder");
+  TC3_GET_STATIC_METHOD(
+      urlencoder, encode, "encode",
+      "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;");
+
+  // Context.
+  TC3_GET_CLASS(context, "android/content/Context");
+  TC3_GET_METHOD(context, get_package_name, "getPackageName",
+                 "()Ljava/lang/String;");
+  TC3_GET_METHOD(context, get_system_service, "getSystemService",
+                 "(Ljava/lang/String;)Ljava/lang/Object;");
+
+  // Uri.
+  TC3_GET_CLASS(uri, "android/net/Uri");
+  TC3_GET_STATIC_METHOD(uri, parse, "parse",
+                        "(Ljava/lang/String;)Landroid/net/Uri;");
+  TC3_GET_METHOD(uri, get_scheme, "getScheme", "()Ljava/lang/String;");
+
+  // UserManager.
+  TC3_GET_OPTIONAL_CLASS(usermanager, "android/os/UserManager");
+  TC3_GET_OPTIONAL_METHOD(usermanager, get_user_restrictions,
+                          "getUserRestrictions", "()Landroid/os/Bundle;");
+
+  // Bundle.
+  TC3_GET_CLASS(bundle, "android/os/Bundle");
+  TC3_GET_METHOD(bundle, get_boolean, "getBoolean", "(Ljava/lang/String;)Z");
+
+  return result;
+}
+
+#undef TC3_GET_STATIC_INT_FIELD
+#undef TC3_GET_STATIC_OBJECT_FIELD
+#undef TC3_GET_STATIC_METHOD
+#undef TC3_GET_METHOD
+#undef TC3_GET_CLASS
+#undef TC3_CHECK_JNI_PTR
+
+JNIEnv* JniCache::GetEnv() const {
+  void* env;
+  if (JNI_OK == jvm->GetEnv(&env, JNI_VERSION_1_4)) {
+    return reinterpret_cast<JNIEnv*>(env);
+  } else {
+    TC3_LOG(ERROR) << "JavaICU UniLib used on unattached thread";
+    return nullptr;
+  }
+}
+
+bool JniCache::ExceptionCheckAndClear() const {
+  JNIEnv* env = GetEnv();
+  TC3_CHECK(env != nullptr);
+  const bool result = env->ExceptionCheck();
+  if (result) {
+    env->ExceptionDescribe();
+    env->ExceptionClear();
+  }
+  return result;
+}
+
+ScopedLocalRef<jstring> JniCache::ConvertToJavaString(
+    const UnicodeText& text) const {
+  // Create java byte array.
+  JNIEnv* jenv = GetEnv();
+  const ScopedLocalRef<jbyteArray> text_java_utf8(
+      jenv->NewByteArray(text.size_bytes()), jenv);
+  if (!text_java_utf8) {
+    return nullptr;
+  }
+
+  jenv->SetByteArrayRegion(text_java_utf8.get(), 0, text.size_bytes(),
+                           reinterpret_cast<const jbyte*>(text.data()));
+
+  // Create the string with a UTF-8 charset.
+  return ScopedLocalRef<jstring>(
+      reinterpret_cast<jstring>(
+          jenv->NewObject(string_class.get(), string_init_bytes_charset,
+                          text_java_utf8.get(), string_utf8.get())),
+      jenv);
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/java/jni-cache.h b/utils/java/jni-cache.h
new file mode 100644
index 0000000..8163817
--- /dev/null
+++ b/utils/java/jni-cache.h

@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_CACHE_H_
+#define LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_CACHE_H_
+
+#include <jni.h>
+#include "utils/java/scoped_global_ref.h"
+#include "utils/java/scoped_local_ref.h"
+#include "utils/utf8/unicodetext.h"
+
+namespace libtextclassifier3 {
+
+// A helper class to cache class and method pointers for calls from JNI to Java.
+// (for implementations such as Java ICU that need to make calls from C++ to
+// Java)
+struct JniCache {
+  static std::unique_ptr<JniCache> Create(JNIEnv* env);
+
+  JNIEnv* GetEnv() const;
+  bool ExceptionCheckAndClear() const;
+
+  JavaVM* jvm = nullptr;
+
+  // java.lang.String
+  ScopedGlobalRef<jclass> string_class;
+  jmethodID string_init_bytes_charset = nullptr;
+  jmethodID string_code_point_count = nullptr;
+  jmethodID string_length = nullptr;
+  ScopedGlobalRef<jstring> string_utf8;
+
+  // java.util.regex.Pattern
+  ScopedGlobalRef<jclass> pattern_class;
+  jmethodID pattern_compile = nullptr;
+  jmethodID pattern_matcher = nullptr;
+
+  // java.util.regex.Matcher
+  ScopedGlobalRef<jclass> matcher_class;
+  jmethodID matcher_matches = nullptr;
+  jmethodID matcher_find = nullptr;
+  jmethodID matcher_reset = nullptr;
+  jmethodID matcher_start_idx = nullptr;
+  jmethodID matcher_end_idx = nullptr;
+  jmethodID matcher_group = nullptr;
+  jmethodID matcher_group_idx = nullptr;
+
+  // java.util.Locale
+  ScopedGlobalRef<jclass> locale_class;
+  ScopedGlobalRef<jobject> locale_us;
+  jmethodID locale_init_string = nullptr;
+  jmethodID locale_for_language_tag = nullptr;
+
+  // java.text.BreakIterator
+  ScopedGlobalRef<jclass> breakiterator_class;
+  jmethodID breakiterator_getwordinstance = nullptr;
+  jmethodID breakiterator_settext = nullptr;
+  jmethodID breakiterator_next = nullptr;
+
+  // java.lang.Integer
+  ScopedGlobalRef<jclass> integer_class;
+  jmethodID integer_parse_int = nullptr;
+
+  // java.util.Calendar
+  ScopedGlobalRef<jclass> calendar_class;
+  jmethodID calendar_get_instance = nullptr;
+  jmethodID calendar_get_first_day_of_week = nullptr;
+  jmethodID calendar_get_time_in_millis = nullptr;
+  jmethodID calendar_set_time_in_millis = nullptr;
+  jmethodID calendar_add = nullptr;
+  jmethodID calendar_get = nullptr;
+  jmethodID calendar_set = nullptr;
+  jint calendar_zone_offset;
+  jint calendar_dst_offset;
+  jint calendar_year;
+  jint calendar_month;
+  jint calendar_day_of_year;
+  jint calendar_day_of_month;
+  jint calendar_day_of_week;
+  jint calendar_hour_of_day;
+  jint calendar_minute;
+  jint calendar_second;
+  jint calendar_millisecond;
+  jint calendar_sunday;
+  jint calendar_monday;
+  jint calendar_tuesday;
+  jint calendar_wednesday;
+  jint calendar_thursday;
+  jint calendar_friday;
+  jint calendar_saturday;
+
+  // java.util.TimeZone
+  ScopedGlobalRef<jclass> timezone_class;
+  jmethodID timezone_get_timezone = nullptr;
+
+  // java.net.URLEncoder
+  ScopedGlobalRef<jclass> urlencoder_class;
+  jmethodID urlencoder_encode = nullptr;
+
+  // android.content.Context
+  ScopedGlobalRef<jclass> context_class;
+  jmethodID context_get_package_name = nullptr;
+  jmethodID context_get_system_service = nullptr;
+
+  // android.net.Uri
+  ScopedGlobalRef<jclass> uri_class;
+  jmethodID uri_parse = nullptr;
+  jmethodID uri_get_scheme = nullptr;
+
+  // android.os.UserManager
+  ScopedGlobalRef<jclass> usermanager_class;
+  jmethodID usermanager_get_user_restrictions = nullptr;
+
+  // android.os.Bundle
+  ScopedGlobalRef<jclass> bundle_class;
+  jmethodID bundle_get_boolean = nullptr;
+
+  // Helper to convert lib3 UnicodeText to Java strings.
+  ScopedLocalRef<jstring> ConvertToJavaString(const UnicodeText& text) const;
+
+ private:
+  explicit JniCache(JavaVM* jvm);
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_JAVA_JNI_CACHE_H_

diff --git a/util/java/scoped_global_ref.h b/utils/java/scoped_global_ref.h
similarity index 75%
rename from util/java/scoped_global_ref.h
rename to utils/java/scoped_global_ref.h
index 3f8754d..de0608e 100644
--- a/util/java/scoped_global_ref.h
+++ b/utils/java/scoped_global_ref.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_GLOBAL_REF_H_
-#define LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_GLOBAL_REF_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_GLOBAL_REF_H_
+#define LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_GLOBAL_REF_H_
 
 #include <jni.h>
 #include <memory>
 #include <type_traits>
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // A deleter to be used with std::unique_ptr to delete JNI global references.
 class GlobalRefDeleter {
@@ -38,7 +38,7 @@
 
   // Copy assignment to allow move semantics in ScopedGlobalRef.
   GlobalRefDeleter& operator=(const GlobalRefDeleter& rhs) {
-    TC_CHECK_EQ(jvm_, rhs.jvm_);
+    TC3_CHECK_EQ(jvm_, rhs.jvm_);
     return *this;
   }
 
@@ -64,13 +64,15 @@
 using ScopedGlobalRef =
     std::unique_ptr<typename std::remove_pointer<T>::type, GlobalRefDeleter>;
 
-// A helper to create global references.
+// A helper to create global references. Assumes the object has a local
+// reference, which it deletes.
 template <typename T>
 ScopedGlobalRef<T> MakeGlobalRef(T object, JNIEnv* env, JavaVM* jvm) {
-  const jobject globalObject = env->NewGlobalRef(object);
-  return ScopedGlobalRef<T>(reinterpret_cast<T>(globalObject), jvm);
+  const jobject global_object = env->NewGlobalRef(object);
+  env->DeleteLocalRef(object);
+  return ScopedGlobalRef<T>(reinterpret_cast<T>(global_object), jvm);
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_GLOBAL_REF_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_GLOBAL_REF_H_

diff --git a/util/java/scoped_local_ref.h b/utils/java/scoped_local_ref.h
similarity index 84%
rename from util/java/scoped_local_ref.h
rename to utils/java/scoped_local_ref.h
index 8476767..f439c45 100644
--- a/util/java/scoped_local_ref.h
+++ b/utils/java/scoped_local_ref.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
-#define LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_LOCAL_REF_H_
+#define LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_LOCAL_REF_H_
 
 #include <jni.h>
 #include <memory>
 #include <type_traits>
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // A deleter to be used with std::unique_ptr to delete JNI local references.
 class LocalRefDeleter {
@@ -40,7 +40,7 @@
   LocalRefDeleter& operator=(const LocalRefDeleter& rhs) {
     // As the deleter and its state are thread-local, ensure the envs
     // are consistent but do nothing.
-    TC_CHECK_EQ(env_, rhs.env_);
+    TC3_CHECK_EQ(env_, rhs.env_);
     return *this;
   }
 
@@ -66,6 +66,6 @@
 using ScopedLocalRef =
     std::unique_ptr<typename std::remove_pointer<T>::type, LocalRefDeleter>;
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_JAVA_SCOPED_LOCAL_REF_H_

diff --git a/util/java/string_utils.cc b/utils/java/string_utils.cc
similarity index 62%
rename from util/java/string_utils.cc
rename to utils/java/string_utils.cc
index ffd5b11..457a667 100644
--- a/util/java/string_utils.cc
+++ b/utils/java/string_utils.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,26 @@
  * limitations under the License.
  */
 
-#include "util/java/string_utils.h"
+#include "utils/java/string_utils.h"
 
-#include "util/base/logging.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
+
+bool JByteArrayToString(JNIEnv* env, const jbyteArray& array,
+                        std::string* result) {
+  jbyte* const array_bytes = env->GetByteArrayElements(array, JNI_FALSE);
+  if (array_bytes == nullptr) {
+    return false;
+  }
+
+  const int array_length = env->GetArrayLength(array);
+  *result = std::string(reinterpret_cast<char*>(array_bytes), array_length);
+
+  env->ReleaseByteArrayElements(array, array_bytes, JNI_ABORT);
+
+  return true;
+}
 
 bool JStringToUtf8String(JNIEnv* env, const jstring& jstr,
                          std::string* result) {
@@ -29,7 +44,7 @@
 
   jclass string_class = env->FindClass("java/lang/String");
   if (!string_class) {
-    TC_LOG(ERROR) << "Can't find String class";
+    TC3_LOG(ERROR) << "Can't find String class";
     return false;
   }
 
@@ -37,16 +52,13 @@
       env->GetMethodID(string_class, "getBytes", "(Ljava/lang/String;)[B");
 
   jstring encoding = env->NewStringUTF("UTF-8");
+
   jbyteArray array = reinterpret_cast<jbyteArray>(
       env->CallObjectMethod(jstr, get_bytes_id, encoding));
 
-  jbyte* const array_bytes = env->GetByteArrayElements(array, JNI_FALSE);
-  int length = env->GetArrayLength(array);
-
-  *result = std::string(reinterpret_cast<char*>(array_bytes), length);
+  JByteArrayToString(env, array, result);
 
   // Release the array.
-  env->ReleaseByteArrayElements(array, array_bytes, JNI_ABORT);
   env->DeleteLocalRef(array);
   env->DeleteLocalRef(string_class);
   env->DeleteLocalRef(encoding);
@@ -54,4 +66,10 @@
   return true;
 }
 
-}  // namespace libtextclassifier2
+ScopedStringChars GetScopedStringChars(JNIEnv* env, jstring string,
+                                       jboolean* is_copy) {
+  return ScopedStringChars(env->GetStringUTFChars(string, is_copy),
+                           StringCharsReleaser(env, string));
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/java/string_utils.h b/utils/java/string_utils.h
new file mode 100644
index 0000000..172a938
--- /dev/null
+++ b/utils/java/string_utils.h

@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_JAVA_STRING_UTILS_H_
+#define LIBTEXTCLASSIFIER_UTILS_JAVA_STRING_UTILS_H_
+
+#include <jni.h>
+#include <memory>
+#include <string>
+
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+bool JByteArrayToString(JNIEnv* env, const jbyteArray& array,
+                        std::string* result);
+bool JStringToUtf8String(JNIEnv* env, const jstring& jstr, std::string* result);
+
+// A deleter to be used with std::unique_ptr to release Java string chars.
+class StringCharsReleaser {
+ public:
+  StringCharsReleaser() : env_(nullptr) {}
+
+  StringCharsReleaser(JNIEnv* env, jstring jstr) : env_(env), jstr_(jstr) {}
+
+  StringCharsReleaser(const StringCharsReleaser& orig) = default;
+
+  // Copy assignment to allow move semantics in StringCharsReleaser.
+  StringCharsReleaser& operator=(const StringCharsReleaser& rhs) {
+    // As the releaser and its state are thread-local, it's enough to only
+    // ensure the envs are consistent but do nothing.
+    TC3_CHECK_EQ(env_, rhs.env_);
+    return *this;
+  }
+
+  // The delete operator.
+  void operator()(const char* chars) const {
+    if (env_ != nullptr) {
+      env_->ReleaseStringUTFChars(jstr_, chars);
+    }
+  }
+
+ private:
+  // The env_ stashed to use for deletion. Thread-local, don't share!
+  JNIEnv* const env_;
+
+  // The referenced jstring.
+  jstring jstr_;
+};
+
+// A smart pointer that releases string chars when it goes out of scope.
+// of scope.
+// Note that this class is not thread-safe since it caches JNIEnv in
+// the deleter. Do not use the same jobject across different threads.
+using ScopedStringChars = std::unique_ptr<const char, StringCharsReleaser>;
+
+// Returns a scoped pointer to the array of Unicode characters of a string.
+ScopedStringChars GetScopedStringChars(JNIEnv* env, jstring string,
+                                       jboolean* is_copy = nullptr);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_JAVA_STRING_UTILS_H_

diff --git a/util/math/fastexp.cc b/utils/math/fastexp.cc
similarity index 92%
rename from util/math/fastexp.cc
rename to utils/math/fastexp.cc
index 4bf8592..b319eae 100644
--- a/util/math/fastexp.cc
+++ b/utils/math/fastexp.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "util/math/fastexp.h"
+#include "utils/math/fastexp.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 const int FastMathClass::kBits;
 const int FastMathClass::kMask1;
@@ -45,4 +45,4 @@
      7940441, 8029106, 8118253, 8207884, 8298001}
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/math/fastexp.h b/utils/math/fastexp.h
similarity index 78%
rename from util/math/fastexp.h
rename to utils/math/fastexp.h
index af7a08c..63e5d5d 100644
--- a/util/math/fastexp.h
+++ b/utils/math/fastexp.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,18 +16,18 @@
 
 // Fast approximation for exp.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
-#define LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_MATH_FASTEXP_H_
+#define LIBTEXTCLASSIFIER_UTILS_MATH_FASTEXP_H_
 
 #include <cassert>
 #include <cmath>
 #include <limits>
 
-#include "util/base/casts.h"
-#include "util/base/integral_types.h"
-#include "util/base/logging.h"
+#include "utils/base/casts.h"
+#include "utils/base/integral_types.h"
+#include "utils/base/logging.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 class FastMathClass {
  private:
@@ -42,7 +42,7 @@
 
  public:
   float VeryFastExp2(float f) const {
-    TC_DCHECK_LE(fabs(f), 126);
+    TC3_DCHECK_LE(fabs(f), 126);
     const float g = f + (127 + (1 << (23 - kBits)));
     const int32 x = bit_cast<int32>(g);
     int32 ret = ((x & kMask2) << (23 - kBits))
@@ -63,6 +63,6 @@
 inline float VeryFastExp2(float f) { return FastMathInstance.VeryFastExp2(f); }
 inline float VeryFastExp(float f) { return FastMathInstance.VeryFastExp(f); }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_MATH_FASTEXP_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_MATH_FASTEXP_H_

diff --git a/util/math/softmax.cc b/utils/math/softmax.cc
similarity index 90%
rename from util/math/softmax.cc
rename to utils/math/softmax.cc
index 986787f..c278625 100644
--- a/util/math/softmax.cc
+++ b/utils/math/softmax.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,19 +14,19 @@
  * limitations under the License.
  */
 
-#include "util/math/softmax.h"
+#include "utils/math/softmax.h"
 
 #include <limits>
 
-#include "util/base/logging.h"
-#include "util/math/fastexp.h"
+#include "utils/base/logging.h"
+#include "utils/math/fastexp.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 float ComputeSoftmaxProbability(const std::vector<float> &scores, int label) {
   if ((label < 0) || (label >= scores.size())) {
-    TC_LOG(ERROR) << "label " << label << " outside range "
-                  << "[0, " << scores.size() << ")";
+    TC3_LOG(ERROR) << "label " << label << " outside range "
+                   << "[0, " << scores.size() << ")";
     return 0.0f;
   }
 
@@ -101,4 +101,4 @@
   return softmax;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/math/softmax.h b/utils/math/softmax.h
similarity index 81%
rename from util/math/softmax.h
rename to utils/math/softmax.h
index f70a9ab..8ac198b 100644
--- a/util/math/softmax.h
+++ b/utils/math/softmax.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
-#define LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_MATH_SOFTMAX_H_
+#define LIBTEXTCLASSIFIER_UTILS_MATH_SOFTMAX_H_
 
 #include <vector>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Computes probability of a softmax label.  Parameter "scores" is the vector of
 // softmax logits.  Returns 0.0f if "label" is outside the range [0,
@@ -33,6 +33,6 @@
 // Same as above but operates on an array of floats.
 std::vector<float> ComputeSoftmax(const float *scores, int scores_size);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_MATH_SOFTMAX_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_MATH_SOFTMAX_H_

diff --git a/util/memory/mmap.cc b/utils/memory/mmap.cc
similarity index 85%
rename from util/memory/mmap.cc
rename to utils/memory/mmap.cc
index 6b0bdf2..a251024 100644
--- a/util/memory/mmap.cc
+++ b/utils/memory/mmap.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/memory/mmap.h"
+#include "utils/memory/mmap.h"
 
 #include <errno.h>
 #include <fcntl.h>
@@ -24,10 +24,10 @@
 #include <sys/stat.h>
 #include <unistd.h>
 
-#include "util/base/logging.h"
-#include "util/base/macros.h"
+#include "utils/base/logging.h"
+#include "utils/base/macros.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace {
 inline std::string GetLastSystemError() { return std::string(strerror(errno)); }
@@ -41,14 +41,14 @@
     int result = close(fd_);
     if (result != 0) {
       const std::string last_error = GetLastSystemError();
-      TC_LOG(ERROR) << "Error closing file descriptor: " << last_error;
+      TC3_LOG(ERROR) << "Error closing file descriptor: " << last_error;
     }
   }
 
  private:
   const int fd_;
 
-  TC_DISALLOW_COPY_AND_ASSIGN(FileCloser);
+  TC3_DISALLOW_COPY_AND_ASSIGN(FileCloser);
 };
 
 }  // namespace
@@ -58,7 +58,7 @@
 
   if (fd < 0) {
     const std::string last_error = GetLastSystemError();
-    TC_LOG(ERROR) << "Error opening " << filename << ": " << last_error;
+    TC3_LOG(ERROR) << "Error opening " << filename << ": " << last_error;
     return GetErrorMmapHandle();
   }
 
@@ -75,7 +75,7 @@
   struct stat sb;
   if (fstat(fd, &sb) != 0) {
     const std::string last_error = GetLastSystemError();
-    TC_LOG(ERROR) << "Unable to stat fd: " << last_error;
+    TC3_LOG(ERROR) << "Unable to stat fd: " << last_error;
     return GetErrorMmapHandle();
   }
 
@@ -111,7 +111,7 @@
       aligned_offset);
   if (mmap_addr == MAP_FAILED) {
     const std::string last_error = GetLastSystemError();
-    TC_LOG(ERROR) << "Error while mmapping: " << last_error;
+    TC3_LOG(ERROR) << "Error while mmapping: " << last_error;
     return GetErrorMmapHandle();
   }
 
@@ -126,10 +126,10 @@
   }
   if (munmap(mmap_handle.unmap_addr(), mmap_handle.num_bytes()) != 0) {
     const std::string last_error = GetLastSystemError();
-    TC_LOG(ERROR) << "Error during Unmap / munmap: " << last_error;
+    TC3_LOG(ERROR) << "Error during Unmap / munmap: " << last_error;
     return false;
   }
   return true;
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/memory/mmap.h b/utils/memory/mmap.h
similarity index 91%
rename from util/memory/mmap.h
rename to utils/memory/mmap.h
index 7d28b64..acce7db 100644
--- a/util/memory/mmap.h
+++ b/utils/memory/mmap.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
-#define LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_MEMORY_MMAP_H_
+#define LIBTEXTCLASSIFIER_UTILS_MEMORY_MMAP_H_
 
 #include <stddef.h>
 
 #include <string>
 
-#include "util/base/integral_types.h"
-#include "util/strings/stringpiece.h"
+#include "utils/base/integral_types.h"
+#include "utils/strings/stringpiece.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Handle for a memory area where a file has been mmapped.
 //
@@ -86,7 +86,7 @@
 // Sample usage:
 //
 // MmapHandle mmap_handle = MmapFile(filename);
-// TC_DCHECK(mmap_handle.ok()) << "Unable to mmap " << filename;
+// TC3_DCHECK(mmap_handle.ok()) << "Unable to mmap " << filename;
 //
 // ... use data from addresses
 // ... [mmap_handle.start, mmap_handle.start + mmap_handle.num_bytes)
@@ -136,6 +136,6 @@
   MmapHandle handle_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_MEMORY_MMAP_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_MEMORY_MMAP_H_

diff --git a/utils/optional.h b/utils/optional.h
new file mode 100644
index 0000000..15d2619
--- /dev/null
+++ b/utils/optional.h

@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_OPTIONAL_H_
+#define LIBTEXTCLASSIFIER_UTILS_OPTIONAL_H_
+
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+// Holds an optional value.
+template <class T>
+class Optional {
+ public:
+  Optional() : init_(false) {}
+
+  Optional(const Optional& other) {
+    init_ = other.init_;
+    if (other.init_) {
+      value_ = other.value_;
+    }
+  }
+
+  explicit Optional(T value) : init_(true), value_(value) {}
+
+  Optional& operator=(Optional&& other) {
+    init_ = other.init_;
+    if (other.init_) {
+      value_ = std::move(other);
+    }
+    return *this;
+  }
+
+  Optional& operator=(T&& other) {
+    init_ = true;
+    value_ = std::move(other);
+    return *this;
+  }
+
+  constexpr bool has_value() const { return init_; }
+
+  T const* operator->() const {
+    TC3_CHECK(init_) << "Bad optional access.";
+    return value_;
+  }
+
+  T const& value() const& {
+    TC3_CHECK(init_) << "Bad optional access.";
+    return value_;
+  }
+
+  T const& value_or(T&& default_value) {
+    return (init_ ? value_ : default_value);
+  }
+
+  void set(const T& value) {
+    init_ = true;
+    value_ = value;
+  }
+
+ private:
+  bool init_;
+  T value_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_OPTIONAL_H_

diff --git a/utils/sentencepiece/double_array_trie.cc b/utils/sentencepiece/double_array_trie.cc
new file mode 100644
index 0000000..4a6fb3c
--- /dev/null
+++ b/utils/sentencepiece/double_array_trie.cc

@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/double_array_trie.h"
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+void DoubleArrayTrie::GatherPrefixMatches(
+    StringPiece input, const std::function<void(TrieMatch)>& update_fn) const {
+  int pos = 0;
+  TC3_CHECK(pos >= 0 && pos < nodes_length_);
+  pos = offset(0);
+  for (int i = 0; i < input.size(); i++) {
+    pos ^= input[i];
+    TC3_CHECK(pos >= 0 && pos < nodes_length_);
+    if (label(pos) != input[i]) {
+      break;
+    }
+    const bool node_has_leaf = has_leaf(pos);
+    pos ^= offset(pos);
+    TC3_CHECK(pos >= 0 && pos < nodes_length_);
+    if (node_has_leaf) {
+      update_fn(TrieMatch(/*id=*/value(pos), /*match_length=*/i + 1));
+    }
+  }
+}
+
+std::vector<TrieMatch> DoubleArrayTrie::FindAllPrefixMatches(
+    StringPiece input) const {
+  std::vector<TrieMatch> result;
+  GatherPrefixMatches(
+      input, [&result](const TrieMatch match) { result.push_back(match); });
+  return result;
+}
+
+TrieMatch DoubleArrayTrie::LongestPrefixMatch(StringPiece input) const {
+  TrieMatch longest_match;
+  GatherPrefixMatches(input, [&longest_match](const TrieMatch match) {
+    longest_match = match;
+  });
+  return longest_match;
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/double_array_trie.h b/utils/sentencepiece/double_array_trie.h
new file mode 100644
index 0000000..050c466
--- /dev/null
+++ b/utils/sentencepiece/double_array_trie.h

@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_DOUBLE_ARRAY_TRIE_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_DOUBLE_ARRAY_TRIE_H_
+
+#include <functional>
+#include <vector>
+
+#include "utils/sentencepiece/matcher.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+// A trie node specifies a node in the tree, either an intermediate node or
+// a leaf node.
+// A leaf node contains the id as an int of the string match. This id is encoded
+// in the lower 30 bits, thus the number of distinct ids is 2^30.
+// An intermediate node has an associated label and an offset to it's children.
+// The label is encoded in the least significant byte and must match the input
+// character during matching.
+typedef unsigned int TrieNode;
+
+// A memory mappable trie, compatible with Darts::DoubleArray.
+class DoubleArrayTrie : public SentencePieceMatcher {
+ public:
+  // nodes and nodes_length specify the array of the nodes of the trie.
+  DoubleArrayTrie(const TrieNode* nodes, const int nodes_length)
+      : nodes_(nodes), nodes_length_(nodes_length) {}
+
+  // Find matches that are prefixes of a string.
+  std::vector<TrieMatch> FindAllPrefixMatches(StringPiece input) const override;
+
+  // Find the longest prefix match of a string.
+  TrieMatch LongestPrefixMatch(StringPiece input) const override;
+
+ private:
+  // Returns whether a node as a leaf as a child.
+  bool has_leaf(int i) const { return nodes_[i] & 0x100; }
+
+  // Available when a node is a leaf.
+  int value(int i) const { return static_cast<int>(nodes_[i] & 0x7fffffff); }
+
+  // Label associated with a node.
+  // A leaf node will have the MSB set and thus return an invalid label.
+  unsigned int label(int i) const { return nodes_[i] & 0x800000ff; }
+
+  // Returns offset to children.
+  unsigned int offset(int i) const {
+    return (nodes_[i] >> 10) << ((nodes_[i] & 0x200) >> 6);
+  }
+
+  void GatherPrefixMatches(
+      StringPiece input, const std::function<void(TrieMatch)>& update_fn) const;
+
+  const TrieNode* nodes_;
+  const int nodes_length_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_DOUBLE_ARRAY_TRIE_H_

diff --git a/utils/sentencepiece/double_array_trie_test.cc b/utils/sentencepiece/double_array_trie_test.cc
new file mode 100644
index 0000000..99fc6d0
--- /dev/null
+++ b/utils/sentencepiece/double_array_trie_test.cc

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/sentencepiece/double_array_trie.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+std::string GetTestConfigPath() {
+  return "";
+}
+
+TEST(DoubleArrayTest, Lookup) {
+  // Test trie that contains pieces "hell", "hello", "o", "there".
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  DoubleArrayTrie trie(reinterpret_cast<const TrieNode*>(config.data()),
+                       config.size() / sizeof(TrieNode));
+
+  auto matches = trie.FindAllPrefixMatches("hello there");
+  EXPECT_EQ(matches.size(), 2);
+  EXPECT_EQ(matches[0].id, 0 /*hell*/);
+  EXPECT_EQ(matches[0].match_length, 4 /*hell*/);
+  EXPECT_EQ(matches[1].id, 1 /*hello*/);
+  EXPECT_EQ(matches[1].match_length, 5 /*hello*/);
+
+  matches = trie.FindAllPrefixMatches("he");
+  EXPECT_EQ(matches.size(), 0);
+
+  matches = trie.FindAllPrefixMatches("abcd");
+  EXPECT_EQ(matches.size(), 0);
+
+  matches = trie.FindAllPrefixMatches("");
+  EXPECT_EQ(matches.size(), 0);
+
+  EXPECT_THAT(trie.FindAllPrefixMatches("hi there"), testing::IsEmpty());
+
+  EXPECT_EQ(trie.LongestPrefixMatch("hella there").id, 0 /*hell*/);
+  EXPECT_EQ(trie.LongestPrefixMatch("hello there").id, 1 /*hello*/);
+  EXPECT_EQ(trie.LongestPrefixMatch("abcd").id, -1);
+  EXPECT_EQ(trie.LongestPrefixMatch("").id, -1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/encoder.cc b/utils/sentencepiece/encoder.cc
new file mode 100644
index 0000000..8f218ec
--- /dev/null
+++ b/utils/sentencepiece/encoder.cc

@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/encoder.h"
+
+namespace libtextclassifier3 {
+
+std::vector<int> Encoder::Encode(StringPiece normalized_text) const {
+  const int len = normalized_text.size();
+  if (len <= 0) {
+    return {start_code_, end_code_};
+  }
+  // We use `previous_pos` to indicate whether a dynamic programming state was
+  // reachable.
+  std::vector<SegmentationEntry> segmentation(
+      len + 1, {/*score=*/0, /*previous_pos=*/-1, /*piece_id=*/-1,
+                /*num_pieces=*/0});
+  for (int i = 0; i < len; i++) {
+    // State couldn't be reached.
+    if (i > 0 && segmentation[i].previous_pos < 0) {
+      // Advance position.
+      normalized_text.RemovePrefix(1);
+      continue;
+    }
+    // Check whether we can use the unknown token.
+    if (unknown_code_ >= 0) {
+      const int pos = i + 1;
+      const float unknown_penalty = segmentation[i].score + unknown_score_;
+      if (segmentation[pos].previous_pos < 0 ||
+          segmentation[pos].score < unknown_penalty) {
+        // Merge multiple unknown tokens into one.
+        if (segmentation[i].piece_id == unknown_code_) {
+          segmentation[pos] = {/*score=*/unknown_penalty,
+                               /*previous_pos=*/segmentation[i].previous_pos,
+                               /*piece_id=*/unknown_code_,
+                               /*num_pieces=*/segmentation[i].num_pieces};
+        } else {
+          segmentation[pos] = {/*score=*/unknown_penalty,
+                               /*previous_pos=*/i,
+                               /*piece_id=*/unknown_code_,
+                               /*num_pieces=*/segmentation[i].num_pieces + 1};
+        }
+      }
+    }
+    for (const auto& match : matcher_->FindAllPrefixMatches(normalized_text)) {
+      TC3_CHECK(match.id >= 0 && match.id < num_pieces_);
+      const int pos = i + match.match_length;
+      const float candidate_score = segmentation[i].score + scores_[match.id];
+      if (segmentation[pos].previous_pos < 0 ||
+          segmentation[pos].score < candidate_score) {
+        segmentation[pos] = {/*score=*/candidate_score, /*previous_pos=*/i,
+                             /*piece_id=*/match.id + encoding_offset_,
+                             /*num_pieces=*/segmentation[i].num_pieces + 1};
+      }
+    }
+    // Advance position.
+    normalized_text.RemovePrefix(1);
+  }
+  if (segmentation[len].num_pieces <= 0) {
+    return {start_code_, end_code_};
+  }
+  const int num_pieces = segmentation[len].num_pieces;
+  std::vector<int> result(num_pieces + 2);
+  result[num_pieces + 1] = end_code_;
+  int pos = len;
+  for (int i = num_pieces; i > 0; i--) {
+    result[i] = segmentation[pos].piece_id;
+    pos = segmentation[pos].previous_pos;
+  }
+  result[0] = start_code_;
+  return result;
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/encoder.h b/utils/sentencepiece/encoder.h
new file mode 100644
index 0000000..0f1bfd3
--- /dev/null
+++ b/utils/sentencepiece/encoder.h

@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_ENCODER_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_ENCODER_H_
+
+#include <vector>
+
+#include "utils/base/logging.h"
+#include "utils/sentencepiece/matcher.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+// Encoder to segment/tokenize strings into pieces such that the sum of the
+// scores of the pieces used is maximized.
+class Encoder {
+ public:
+  // matcher: the list of valid sentence pieces represented as a matcher, e.g.
+  //     a trie.
+  // num_pieces: the number of pieces in the trie.
+  // pieces_scores: the scores of the individual pieces.
+  // start_code: code that is used as encoding of the start of input.
+  // end_code: code that is used as encoding of the end of input.
+  // encoding_offset: value added to the sentence piece ids to make them
+  //     not interesecting with start_code and end_code.
+  // unknown_code: code that is used for out-of-dictionary characters.
+  // unknown_score: the penality score associated with the unknown code.
+  Encoder(const SentencePieceMatcher* matcher, const int num_pieces,
+          const float* pieces_scores, int start_code = 0, int end_code = 1,
+          int encoding_offset = 2, int unknown_code = -1,
+          float unknown_score = 0.f)
+      : num_pieces_(num_pieces),
+        scores_(pieces_scores),
+        matcher_(matcher),
+        start_code_(start_code),
+        end_code_(end_code),
+        encoding_offset_(encoding_offset),
+        unknown_code_(unknown_code),
+        unknown_score_(unknown_score) {}
+
+  // Segment the input so that the total score of the pieces used is maximized.
+  // This is a simplified implementation of the general Viterbi algorithm,
+  // assuming independence between individual pieces.
+  std::vector<int> Encode(StringPiece normalized_text) const;
+
+ private:
+  // State in the dynamic programming algorithm.
+  struct SegmentationEntry {
+    // Accumulated score.
+    float score;
+
+    // Position before last piece.
+    int previous_pos;
+
+    // Last piece used.
+    int piece_id;
+
+    // Total number of pieces used.
+    int num_pieces;
+  };
+
+  const int num_pieces_;
+  const float* scores_;
+  const SentencePieceMatcher* matcher_;
+  const int start_code_;
+  const int end_code_;
+  const int encoding_offset_;
+  const int unknown_code_;
+  const int unknown_score_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_ENCODER_H_

diff --git a/utils/sentencepiece/encoder_test.cc b/utils/sentencepiece/encoder_test.cc
new file mode 100644
index 0000000..6bc9aeb
--- /dev/null
+++ b/utils/sentencepiece/encoder_test.cc

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/sentencepiece/encoder.h"
+#include "utils/sentencepiece/sorted_strings_table.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+using testing::IsEmpty;
+
+TEST(EncoderTest, SimpleTokenization) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const int offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<SentencePieceMatcher> matcher(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces, 18)));
+  const Encoder encoder(matcher.get(),
+                        /*num_pieces=*/4, scores);
+
+  EXPECT_THAT(encoder.Encode("hellothere"), ElementsAre(0, 3, 5, 1));
+
+  // Make probability of hello very low:
+  // hello gets now tokenized as hell + o.
+  scores[1] = -100.0;
+  EXPECT_THAT(encoder.Encode("hellothere"), ElementsAre(0, 2, 4, 5, 1));
+}
+
+TEST(EncoderTest, HandlesEdgeCases) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const int offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<SentencePieceMatcher> matcher(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces, 18)));
+  const Encoder encoder(matcher.get(),
+                        /*num_pieces=*/4, scores);
+  EXPECT_THAT(encoder.Encode("hellhello"), ElementsAre(0, 2, 3, 1));
+  EXPECT_THAT(encoder.Encode("hellohell"), ElementsAre(0, 3, 2, 1));
+  EXPECT_THAT(encoder.Encode(""), ElementsAre(0, 1));
+  EXPECT_THAT(encoder.Encode("hellathere"), ElementsAre(0, 1));
+}
+
+TEST(EncoderTest, HandlesOutOfDictionary) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const int offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<SentencePieceMatcher> matcher(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces, 18)));
+  const Encoder encoder(matcher.get(),
+                        /*num_pieces=*/4, scores,
+                        /*start_code=*/0, /*end_code=*/1,
+                        /*encoding_offset=*/3, /*unknown_code=*/2,
+                        /*unknown_score=*/-100.0);
+  EXPECT_THAT(encoder.Encode("hellhello"), ElementsAre(0, 3, 4, 1));
+  EXPECT_THAT(encoder.Encode("hellohell"), ElementsAre(0, 4, 3, 1));
+  EXPECT_THAT(encoder.Encode(""), ElementsAre(0, 1));
+  EXPECT_THAT(encoder.Encode("hellathere"),
+              ElementsAre(0, /*hell*/ 3, /*unknown*/ 2, /*there*/ 6, 1));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/matcher.h b/utils/sentencepiece/matcher.h
new file mode 100644
index 0000000..b538d69
--- /dev/null
+++ b/utils/sentencepiece/matcher.h

@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_MATCHER_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_MATCHER_H_
+
+#include <vector>
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+struct TrieMatch {
+  TrieMatch() {}
+  TrieMatch(int id, int match_length) : id(id), match_length(match_length) {}
+  int id = -1;
+  int match_length = -1;
+};
+
+class SentencePieceMatcher {
+ public:
+  virtual ~SentencePieceMatcher() {}
+
+  // Find matches that are prefixes of a string.
+  virtual std::vector<TrieMatch> FindAllPrefixMatches(
+      StringPiece input) const = 0;
+
+  // Find the longest prefix match of a string.
+  virtual TrieMatch LongestPrefixMatch(StringPiece input) const = 0;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_MATCHER_H_

diff --git a/utils/sentencepiece/normalizer.cc b/utils/sentencepiece/normalizer.cc
new file mode 100644
index 0000000..1dd20da
--- /dev/null
+++ b/utils/sentencepiece/normalizer.cc

@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/normalizer.h"
+
+#include "utils/base/logging.h"
+#include "utils/strings/utf8.h"
+
+namespace libtextclassifier3 {
+
+std::string SentencePieceNormalizer::Normalize(StringPiece input) const {
+  std::string normalized;
+
+  // Ignores heading space.
+  if (remove_extra_whitespaces_) {
+    while (!input.empty()) {
+      const auto suffix_and_length = NormalizePrefix(input);
+      if (suffix_and_length.second <= 0) {
+        TC3_LOG(ERROR) << "Consumed string is empty.";
+        return normalized;
+      }
+      if (suffix_and_length.first.size() != 1 ||
+          suffix_and_length.first[0] != ' ') {
+        break;
+      }
+      input.RemovePrefix(suffix_and_length.second);
+    }
+  }
+
+  if (input.empty()) {
+    return normalized;
+  }
+
+  // Reserves the output buffer to avoid re-allocations.
+  const int kReservedSize = input.size() * 3;
+  normalized.reserve(kReservedSize);
+
+  // Replaces white space with U+2581 (LOWER ONE EIGHT BLOCK)
+  // if escape_whitespaces() is set (default = true).
+  const StringPiece kSpaceSymbol = "\xe2\x96\x81";
+
+  // Adds a space symbol as a prefix (default is true)
+  // With this prefix, "world" and "hello world" are converted into
+  // "_world" and "_hello_world", which help the trainer to extract
+  // "_world" as one symbol.
+  if (add_dummy_prefix_) {
+    if (escape_whitespaces_) {
+      normalized.append(kSpaceSymbol.data(), kSpaceSymbol.size());
+    } else {
+      normalized.append(" ");
+    }
+  }
+
+  bool is_prev_space = remove_extra_whitespaces_;
+  while (!input.empty()) {
+    auto p = NormalizePrefix(input);
+    if (p.second <= 0) {
+      TC3_LOG(ERROR) << "Consumed string is empty.";
+      return normalized;
+    }
+
+    StringPiece sp = p.first;
+
+    // Removes heading spaces in sentence piece,
+    // if the previous sentence piece ends with whitespace.
+    while (is_prev_space && ConsumePrefix(&sp, " ")) {
+    }
+
+    if (!sp.empty()) {
+      const char *data = sp.data();
+      for (int n = 0; n < sp.size(); ++n) {
+        if (escape_whitespaces_ && data[n] == ' ') {
+          normalized.append(kSpaceSymbol.data(), kSpaceSymbol.size());
+        } else {
+          normalized += data[n];
+        }
+      }
+      // Checks whether the last character of sp is whitespace.
+      is_prev_space = EndsWith(sp, " ");
+    }
+    input.RemovePrefix(p.second);
+    is_prev_space = is_prev_space && remove_extra_whitespaces_;
+  }
+
+  // Ignores tailing space.
+  if (remove_extra_whitespaces_) {
+    const StringPiece space = escape_whitespaces_ ? kSpaceSymbol : " ";
+    while (EndsWith(normalized, space)) {
+      const int length = normalized.size() - space.size();
+      normalized.resize(length);
+    }
+  }
+  return normalized;
+}
+
+std::pair<StringPiece, int> SentencePieceNormalizer::NormalizePrefix(
+    StringPiece input) const {
+  std::pair<StringPiece, int> result;
+  if (input.empty()) return result;
+  const TrieMatch match = charsmap_trie_.LongestPrefixMatch(input);
+  const bool no_match = match.match_length <= 0;
+  if (no_match) {
+    const int char_length = ValidUTF8CharLength(input.data(), input.size());
+    if (char_length <= 0) {
+      // Found a malformed utf8.
+      // The rune is set to be 0xFFFD (REPLACEMENT CHARACTER),
+      // which is a valid Unicode of three bytes in utf8,
+      // but here we only consume one byte.
+      static const char kReplacementChar[] = "\xEF\xBF\xBD";
+      result.first = StringPiece(kReplacementChar, 3);
+      result.second = 1;  // Consumes 1 byte, buts emit 0xFFFD.
+    } else {
+      result.first = StringPiece(input.data(), char_length);
+      result.second = char_length;
+    }
+  } else {
+    TC3_CHECK(match.id >= 0 && match.id < charsmap_normalized_.size());
+    result.first = StringPiece(&charsmap_normalized_.data()[match.id]);
+    result.second = match.match_length;
+  }
+  return result;
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/normalizer.h b/utils/sentencepiece/normalizer.h
new file mode 100644
index 0000000..227e09b
--- /dev/null
+++ b/utils/sentencepiece/normalizer.h

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_NORMALIZER_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_NORMALIZER_H_
+
+#include <memory>
+#include <string>
+
+#include "utils/sentencepiece/double_array_trie.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+// Normalizer implements a simple text normalizer with user-defined
+// string-to-string rules and leftmost longest matching.
+class SentencePieceNormalizer {
+ public:
+  // charsmap_trie and charsmap_normalized specify the normalization/replacement
+  // string-to-string rules in the following way:
+  // A match in the trie for a string will return the offset in
+  // charsmap_normalized that contains the replacement string.
+  //
+  // add_dummy_prefix: Whether to add dummy whitespace at the beginning of the
+  //   text in order to treat "world" in "world" and "hello world" uniformly.
+  //
+  // remove_extra_whitespaces: Whether to remove leading, trailing and duplicate
+  //   internal whitespace.
+  //
+  // escape_whitespaces: Whether to replace whitespace with a meta symbol.
+  SentencePieceNormalizer(const DoubleArrayTrie &charsmap_trie,
+                          StringPiece charsmap_normalized,
+                          bool add_dummy_prefix = true,
+                          bool remove_extra_whitespaces = true,
+                          bool escape_whitespaces = true)
+      : charsmap_trie_(charsmap_trie),
+        charsmap_normalized_(charsmap_normalized),
+        add_dummy_prefix_(add_dummy_prefix),
+        remove_extra_whitespaces_(remove_extra_whitespaces),
+        escape_whitespaces_(escape_whitespaces) {}
+
+  // Normalizes a plain utf8 string into an internal representation for
+  // Sentencepiece model.
+  std::string Normalize(StringPiece input) const;
+
+ private:
+  // Normalizes the prefix of `input` and returns the pair of
+  // normalized prefix and the length of the prefix of `input` processed in the
+  // normalization.
+  std::pair<StringPiece, int> NormalizePrefix(StringPiece input) const;
+
+  // Internal trie for efficient longest prefix string matching.
+  DoubleArrayTrie charsmap_trie_;
+
+  // "\0" delimitered concatenated normalized strings.
+  // the value of `charsmap_trie_` stores offsets into this string.
+  StringPiece charsmap_normalized_;
+
+  const bool add_dummy_prefix_;
+  const bool remove_extra_whitespaces_;
+  const bool escape_whitespaces_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_NORMALIZER_H_

diff --git a/utils/sentencepiece/normalizer_test.cc b/utils/sentencepiece/normalizer_test.cc
new file mode 100644
index 0000000..f6018ab
--- /dev/null
+++ b/utils/sentencepiece/normalizer_test.cc

@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/sentencepiece/double_array_trie.h"
+#include "utils/sentencepiece/normalizer.h"
+#include "utils/sentencepiece/test_utils.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+std::string GetTestConfigPath() {
+  return "";
+}
+
+TEST(NormalizerTest, NormalizesAsReferenceNormalizer) {
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  SentencePieceNormalizer normalizer =
+      NormalizerFromSpec(config, /*add_dummy_prefix=*/true,
+                         /*remove_extra_whitespaces=*/true,
+                         /*escape_whitespaces=*/true);
+
+  EXPECT_EQ(normalizer.Normalize("hello there"), "▁hello▁there");
+
+  // Redundant whitespace.
+  EXPECT_EQ(normalizer.Normalize("when is  the  world cup?"),
+            "▁when▁is▁the▁world▁cup?");
+
+  // Different whitespace.
+  EXPECT_EQ(normalizer.Normalize("general\tkenobi"), "▁general▁kenobi");
+
+  // NFKC char to multi-char normalization.
+  EXPECT_EQ(normalizer.Normalize("㍿"), "▁株式会社");
+
+  // Half width katakana, character composition happens.
+  EXPECT_EQ(normalizer.Normalize(" ｸﾞｰｸﾞﾙ "), "▁グーグル");
+
+  // NFKC char to char normalization.
+  EXPECT_EQ(normalizer.Normalize("①②③"), "▁123");
+}
+
+TEST(NormalizerTest, NoDummyPrefix) {
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  SentencePieceNormalizer normalizer =
+      NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+                         /*remove_extra_whitespaces=*/true,
+                         /*escape_whitespaces=*/true);
+
+  EXPECT_EQ(normalizer.Normalize("hello there"), "hello▁there");
+
+  // Redundant whitespace.
+  EXPECT_EQ(normalizer.Normalize("when is  the  world cup?"),
+            "when▁is▁the▁world▁cup?");
+
+  // Different whitespace.
+  EXPECT_EQ(normalizer.Normalize("general\tkenobi"), "general▁kenobi");
+
+  // NFKC char to multi-char normalization.
+  EXPECT_EQ(normalizer.Normalize("㍿"), "株式会社");
+
+  // Half width katakana, character composition happens.
+  EXPECT_EQ(normalizer.Normalize(" ｸﾞｰｸﾞﾙ "), "グーグル");
+
+  // NFKC char to char normalization.
+  EXPECT_EQ(normalizer.Normalize("①②③"), "123");
+}
+
+TEST(NormalizerTest, NoRemoveExtraWhitespace) {
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  SentencePieceNormalizer normalizer =
+      NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+                         /*remove_extra_whitespaces=*/false,
+                         /*escape_whitespaces=*/true);
+
+  EXPECT_EQ(normalizer.Normalize("hello there"), "hello▁there");
+
+  // Redundant whitespace.
+  EXPECT_EQ(normalizer.Normalize("when is  the  world cup?"),
+            "when▁is▁▁the▁▁world▁cup?");
+
+  // Different whitespace.
+  EXPECT_EQ(normalizer.Normalize("general\tkenobi"), "general▁kenobi");
+}
+
+TEST(NormalizerTest, NoEscapeWhitespaces) {
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  SentencePieceNormalizer normalizer =
+      NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+                         /*remove_extra_whitespaces=*/false,
+                         /*escape_whitespaces=*/false);
+
+  EXPECT_EQ(normalizer.Normalize("hello there"), "hello there");
+
+  // Redundant whitespace.
+  EXPECT_EQ(normalizer.Normalize("when is  the  world cup?"),
+            "when is  the  world cup?");
+
+  // Different whitespace.
+  EXPECT_EQ(normalizer.Normalize("general\tkenobi"), "general kenobi");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/sorted_strings_table.cc b/utils/sentencepiece/sorted_strings_table.cc
new file mode 100644
index 0000000..332ce46
--- /dev/null
+++ b/utils/sentencepiece/sorted_strings_table.cc

@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/sorted_strings_table.h"
+
+#include <algorithm>
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+void SortedStringsTable::GatherPrefixMatches(
+    StringPiece input, const std::function<void(TrieMatch)>& update_fn) const {
+  int left = 0;
+  int right = num_pieces_;
+  int span_size = right - left;
+  int match_length = 0;
+
+  // Loop invariant:
+  // at the ith iteration, all strings from `left` ... `right` match the input
+  // on the first `match_length` characters.
+  while (span_size > use_linear_scan_threshold_) {
+    if (match_length >= input.length()) {
+      return;
+    }
+
+    // We find the possible range of pieces in `left` ... `right` matching the
+    // `match_length` + 1 character with two binary searches:
+    //     `lower_bound` to find the start of the range of matching pieces.
+    //     `upper_bound` to find the non-inclusive end of the range.
+    left = (std::lower_bound(
+                offsets_ + left, offsets_ + right, input[match_length],
+                [this, match_length](int piece_offset, int c) -> bool {
+                  return pieces_[piece_offset + match_length] < c;
+                }) -
+            offsets_);
+    right = (std::upper_bound(
+                 offsets_ + left, offsets_ + right, input[match_length],
+                 [this, match_length](int c, int piece_offset) -> bool {
+                   return c < pieces_[piece_offset + match_length];
+                 }) -
+             offsets_);
+    span_size = right - left;
+    if (span_size <= 0) {
+      return;
+    }
+    ++match_length;
+
+    // Due to the loop invariant and the fact that the strings are sorted, there
+    // can only be one piece matching completely now, namely at left.
+    if (pieces_[offsets_[left] + match_length] == 0) {
+      update_fn(TrieMatch(/*id=*/left,
+                          /*match_length=*/match_length));
+      left++;
+    }
+  }
+
+  // Use linear scan for small problem instances.
+  // By the loop invariant characters 0...`match_length` of all pieces in
+  // in `left`...`right` match the input on 0...`match_length`.
+  for (int i = left; i < right; i++) {
+    bool matches = true;
+    int piece_match_length = match_length;
+    for (int k = offsets_[i] + piece_match_length; pieces_[k] != 0; k++) {
+      if (match_length >= input.size() ||
+          input[piece_match_length] != pieces_[k]) {
+        matches = false;
+        break;
+      }
+      piece_match_length++;
+    }
+    if (matches) {
+      update_fn(TrieMatch(/*id=*/i,
+                          /*match_length=*/piece_match_length));
+    }
+  }
+}
+
+std::vector<TrieMatch> SortedStringsTable::FindAllPrefixMatches(
+    StringPiece input) const {
+  std::vector<TrieMatch> result;
+  GatherPrefixMatches(
+      input, [&result](const TrieMatch match) { result.push_back(match); });
+  return result;
+}
+
+TrieMatch SortedStringsTable::LongestPrefixMatch(StringPiece input) const {
+  TrieMatch longest_match;
+  GatherPrefixMatches(input, [&longest_match](const TrieMatch match) {
+    longest_match = match;
+  });
+  return longest_match;
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/sorted_strings_table.h b/utils/sentencepiece/sorted_strings_table.h
new file mode 100644
index 0000000..82cda5c
--- /dev/null
+++ b/utils/sentencepiece/sorted_strings_table.h

@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_SORTED_STRINGS_TABLE_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_SORTED_STRINGS_TABLE_H_
+
+#include <functional>
+#include <vector>
+
+#include "utils/sentencepiece/matcher.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+// A matcher to find string pieces matching prefixes of an input string.
+// The list of reference strings are kept in sorted order in a zero separated
+// string.
+// binary search is used to find all prefix matches.
+// num_pieces: Number of sentence pieces.
+// offsets: Offsets into `pieces` where a string starts.
+// pieces: String pieces, concatenated in sorted order and zero byte separated.
+// use_linear_scan_threshold: Minimum size of binary search range before
+//     switching to a linear sweep for prefix match testing.
+class SortedStringsTable : public SentencePieceMatcher {
+ public:
+  SortedStringsTable(const int num_pieces, const int* offsets,
+                     StringPiece pieces,
+                     const int use_linear_scan_threshold = 10)
+      : num_pieces_(num_pieces),
+        offsets_(offsets),
+        pieces_(pieces),
+        use_linear_scan_threshold_(use_linear_scan_threshold) {}
+
+  // Find matches that are prefixes of a string.
+  std::vector<TrieMatch> FindAllPrefixMatches(StringPiece input) const override;
+
+  // Find the longest prefix match of a string.
+  TrieMatch LongestPrefixMatch(StringPiece input) const override;
+
+ private:
+  void GatherPrefixMatches(
+      StringPiece input, const std::function<void(TrieMatch)>& update_fn) const;
+
+  const int num_pieces_;
+  const int* offsets_;
+  const StringPiece pieces_;
+  const int use_linear_scan_threshold_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_SORTED_STRINGS_TABLE_H_

diff --git a/utils/sentencepiece/sorted_strings_table_test.cc b/utils/sentencepiece/sorted_strings_table_test.cc
new file mode 100644
index 0000000..61a0ef4
--- /dev/null
+++ b/utils/sentencepiece/sorted_strings_table_test.cc

@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/sentencepiece/sorted_strings_table.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(SortedStringsTest, Lookup) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const int offsets[] = {0, 5, 11, 13};
+
+  SortedStringsTable table(/*num_pieces=*/4, offsets, StringPiece(pieces, 18),
+                           /*use_linear_scan_threshold=*/1);
+
+  auto matches = table.FindAllPrefixMatches("hello there");
+  EXPECT_EQ(matches.size(), 2);
+  EXPECT_EQ(matches[0].id, 0 /*hell*/);
+  EXPECT_EQ(matches[0].match_length, 4 /*hell*/);
+  EXPECT_EQ(matches[1].id, 1 /*hello*/);
+  EXPECT_EQ(matches[1].match_length, 5 /*hello*/);
+
+  matches = table.FindAllPrefixMatches("he");
+  EXPECT_EQ(matches.size(), 0);
+
+  matches = table.FindAllPrefixMatches("abcd");
+  EXPECT_EQ(matches.size(), 0);
+
+  matches = table.FindAllPrefixMatches("");
+  EXPECT_EQ(matches.size(), 0);
+
+  EXPECT_THAT(table.FindAllPrefixMatches("hi there"), testing::IsEmpty());
+
+  EXPECT_EQ(table.LongestPrefixMatch("hella there").id, 0 /*hell*/);
+  EXPECT_EQ(table.LongestPrefixMatch("hello there").id, 1 /*hello*/);
+  EXPECT_EQ(table.LongestPrefixMatch("abcd").id, -1);
+  EXPECT_EQ(table.LongestPrefixMatch("").id, -1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/test_utils.cc b/utils/sentencepiece/test_utils.cc
new file mode 100644
index 0000000..1ed2bf3
--- /dev/null
+++ b/utils/sentencepiece/test_utils.cc

@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/test_utils.h"
+
+#include <memory>
+
+#include "utils/base/integral_types.h"
+#include "utils/sentencepiece/double_array_trie.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
+                                           bool add_dummy_prefix,
+                                           bool remove_extra_whitespaces,
+                                           bool escape_whitespaces) {
+  const uint32 trie_blob_size = reinterpret_cast<const uint32*>(spec.data())[0];
+  spec.RemovePrefix(sizeof(trie_blob_size));
+  const TrieNode* trie_blob = reinterpret_cast<const TrieNode*>(spec.data());
+  spec.RemovePrefix(trie_blob_size);
+  const int num_nodes = trie_blob_size / sizeof(TrieNode);
+  return SentencePieceNormalizer(
+      DoubleArrayTrie(trie_blob, num_nodes),
+      /*charsmap_normalized=*/StringPiece(spec.data(), spec.size()),
+      add_dummy_prefix, remove_extra_whitespaces, escape_whitespaces);
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/sentencepiece/test_utils.h b/utils/sentencepiece/test_utils.h
new file mode 100644
index 0000000..0c833da
--- /dev/null
+++ b/utils/sentencepiece/test_utils.h

@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
+#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
+
+#include <string>
+#include <vector>
+
+#include "utils/sentencepiece/normalizer.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
+                                           bool add_dummy_prefix,
+                                           bool remove_extra_whitespaces,
+                                           bool escape_whitespaces);
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_

diff --git a/util/strings/numbers.cc b/utils/strings/numbers.cc
similarity index 92%
rename from util/strings/numbers.cc
rename to utils/strings/numbers.cc
index a89c0ef..3028c69 100644
--- a/util/strings/numbers.cc
+++ b/utils/strings/numbers.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "util/strings/numbers.h"
+#include "utils/strings/numbers.h"
 
 #ifdef COMPILER_MSVC
 #include <sstream>
@@ -22,7 +22,7 @@
 
 #include <stdlib.h>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 bool ParseInt32(const char *c_str, int32 *value) {
   char *temp;
@@ -72,4 +72,4 @@
 }
 #endif  // COMPILER_MSVC
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/strings/numbers.h b/utils/strings/numbers.h
similarity index 83%
rename from util/strings/numbers.h
rename to utils/strings/numbers.h
index a2c8c6e..ae48068 100644
--- a/util/strings/numbers.h
+++ b/utils/strings/numbers.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,14 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_STRINGS_NUMBERS_H_
-#define LIBTEXTCLASSIFIER_UTIL_STRINGS_NUMBERS_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_NUMBERS_H_
+#define LIBTEXTCLASSIFIER_UTILS_STRINGS_NUMBERS_H_
 
 #include <string>
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Parses an int32 from a C-style string.
 //
@@ -47,6 +47,6 @@
 // int types.
 std::string IntToString(int64 input);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_STRINGS_NUMBERS_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_NUMBERS_H_

diff --git a/util/strings/numbers_test.cc b/utils/strings/numbers_test.cc
similarity index 93%
rename from util/strings/numbers_test.cc
rename to utils/strings/numbers_test.cc
index 1fdd78a..57e812f 100644
--- a/util/strings/numbers_test.cc
+++ b/utils/strings/numbers_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "util/strings/numbers.h"
+#include "utils/strings/numbers.h"
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 void TestParseInt32(const char *c_str, bool expected_parsing_success,
@@ -100,4 +100,4 @@
   TestParseDouble("23.5a", false);
 }
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/strings/split.cc b/utils/strings/split.cc
similarity index 86%
rename from util/strings/split.cc
rename to utils/strings/split.cc
index 2c610ba..584760a 100644
--- a/util/strings/split.cc
+++ b/utils/strings/split.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "util/strings/split.h"
+#include "utils/strings/split.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace strings {
 
 std::vector<StringPiece> Split(const StringPiece &text, char delim) {
@@ -35,4 +35,4 @@
 }
 
 }  // namespace strings
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/strings/split.h b/utils/strings/split.h
similarity index 70%
rename from util/strings/split.h
rename to utils/strings/split.h
index 96f73fe..b565258 100644
--- a/util/strings/split.h
+++ b/utils/strings/split.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,20 +14,20 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_STRINGS_SPLIT_H_
-#define LIBTEXTCLASSIFIER_UTIL_STRINGS_SPLIT_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_SPLIT_H_
+#define LIBTEXTCLASSIFIER_UTILS_STRINGS_SPLIT_H_
 
 #include <string>
 #include <vector>
 
-#include "util/strings/stringpiece.h"
+#include "utils/strings/stringpiece.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace strings {
 
 std::vector<StringPiece> Split(const StringPiece &text, char delim);
 
 }  // namespace strings
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_STRINGS_SPLIT_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_SPLIT_H_

diff --git a/utils/strings/stringpiece.h b/utils/strings/stringpiece.h
new file mode 100644
index 0000000..3ec414f
--- /dev/null
+++ b/utils/strings/stringpiece.h

@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
+#define LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_
+
+#include <stddef.h>
+#include <string>
+
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+// Read-only "view" of a piece of data.  Does not own the underlying data.
+class StringPiece {
+ public:
+  StringPiece() : StringPiece(nullptr, 0) {}
+
+  StringPiece(const char *str)  // NOLINT(runtime/explicit)
+      : start_(str), size_(strlen(str)) {}
+
+  StringPiece(const char *start, size_t size) : start_(start), size_(size) {}
+
+  // Intentionally no "explicit" keyword: in function calls, we want strings to
+  // be converted to StringPiece implicitly.
+  StringPiece(const std::string &s)  // NOLINT(runtime/explicit)
+      : StringPiece(s.data(), s.size()) {}
+
+  StringPiece(const std::string &s, int offset, int len)
+      : StringPiece(s.data() + offset, len) {}
+
+  char operator[](size_t i) const { return start_[i]; }
+
+  // Returns start address of underlying data.
+  const char *data() const { return start_; }
+
+  // Returns number of bytes of underlying data.
+  size_t size() const { return size_; }
+  size_t length() const { return size_; }
+
+  bool empty() const { return size_ == 0; }
+
+  // Returns a std::string containing a copy of the underlying data.
+  std::string ToString() const { return std::string(data(), size()); }
+
+  // Returns whether string ends with a given suffix.
+  bool EndsWith(StringPiece suffix) const {
+    return suffix.empty() || (size_ >= suffix.size() &&
+                              memcmp(start_ + (size_ - suffix.size()),
+                                     suffix.data(), suffix.size()) == 0);
+  }
+
+  // Returns whether the string begins with a given prefix.
+  bool StartsWith(StringPiece prefix) const {
+    return prefix.empty() ||
+           (size_ >= prefix.size() &&
+            memcmp(start_, prefix.data(), prefix.size()) == 0);
+  }
+
+  // Removes the first `n` characters from the string piece. Note that the
+  // underlying string is not changed, only the view.
+  void RemovePrefix(int n) {
+    TC3_CHECK_LE(n, size_);
+    start_ += n;
+    size_ -= n;
+  }
+
+ private:
+  const char *start_;  // Not owned.
+  size_t size_;
+};
+
+inline bool EndsWith(StringPiece text, StringPiece suffix) {
+  return text.EndsWith(suffix);
+}
+
+inline bool StartsWith(StringPiece text, StringPiece prefix) {
+  return text.StartsWith(prefix);
+}
+
+inline bool ConsumePrefix(StringPiece *text, StringPiece prefix) {
+  if (!text->StartsWith(prefix)) {
+    return false;
+  }
+  text->RemovePrefix(prefix.size());
+  return true;
+}
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_STRINGPIECE_H_

diff --git a/utils/strings/stringpiece_test.cc b/utils/strings/stringpiece_test.cc
new file mode 100644
index 0000000..713a7f9
--- /dev/null
+++ b/utils/strings/stringpiece_test.cc

@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StringPieceTest, EndsWith) {
+  EXPECT_TRUE(EndsWith("hello there!", "there!"));
+  EXPECT_TRUE(EndsWith("hello there!", "!"));
+  EXPECT_FALSE(EndsWith("hello there!", "there"));
+  EXPECT_FALSE(EndsWith("hello there!", " hello there!"));
+  EXPECT_TRUE(EndsWith("hello there!", ""));
+  EXPECT_FALSE(EndsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, StartsWith) {
+  EXPECT_TRUE(StartsWith("hello there!", "hello"));
+  EXPECT_TRUE(StartsWith("hello there!", "hello "));
+  EXPECT_FALSE(StartsWith("hello there!", "there!"));
+  EXPECT_FALSE(StartsWith("hello there!", " hello there! "));
+  EXPECT_TRUE(StartsWith("hello there!", ""));
+  EXPECT_FALSE(StartsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, ConsumePrefix) {
+  StringPiece str("hello there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "hello "));
+  EXPECT_EQ(str.ToString(), "there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "there"));
+  EXPECT_EQ(str.ToString(), "!");
+  EXPECT_FALSE(ConsumePrefix(&str, "!!"));
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_TRUE(ConsumePrefix(&str, "!"));
+  EXPECT_EQ(str.ToString(), "");
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_FALSE(ConsumePrefix(&str, "!"));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/strings/utf8.cc b/utils/strings/utf8.cc
new file mode 100644
index 0000000..faaf854
--- /dev/null
+++ b/utils/strings/utf8.cc

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/utf8.h"
+
+namespace libtextclassifier3 {
+bool IsValidUTF8(const char *src, int size) {
+  for (int i = 0; i < size;) {
+    const int char_length = ValidUTF8CharLength(src + i, size - i);
+    if (char_length <= 0) {
+      return false;
+    }
+    i += char_length;
+  }
+  return true;
+}
+
+int ValidUTF8CharLength(const char *src, int size) {
+  // Unexpected trail byte.
+  if (IsTrailByte(src[0])) {
+    return -1;
+  }
+
+  const int num_codepoint_bytes = GetNumBytesForUTF8Char(&src[0]);
+  if (num_codepoint_bytes <= 0 || num_codepoint_bytes > size) {
+    return -1;
+  }
+
+  // Check that remaining bytes in the codepoint are trailing bytes.
+  for (int k = 1; k < num_codepoint_bytes; k++) {
+    if (!IsTrailByte(src[k])) {
+      return -1;
+    }
+  }
+
+  return num_codepoint_bytes;
+}
+
+}  // namespace libtextclassifier3

diff --git a/util/strings/utf8.h b/utils/strings/utf8.h
similarity index 79%
rename from util/strings/utf8.h
rename to utils/strings/utf8.h
index 1e75da2..6c4c8a0 100644
--- a/util/strings/utf8.h
+++ b/utils/strings/utf8.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_STRINGS_UTF8_H_
-#define LIBTEXTCLASSIFIER_UTIL_STRINGS_UTF8_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_STRINGS_UTF8_H_
+#define LIBTEXTCLASSIFIER_UTILS_STRINGS_UTF8_H_
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // Returns the length (number of bytes) of the Unicode code point starting at
 // src, based on inspecting just that one byte.  Preconditions: src != NULL,
@@ -47,6 +47,10 @@
 // Returns true iff src points to a well-formed UTF-8 string.
 bool IsValidUTF8(const char *src, int size);
 
-}  // namespace libtextclassifier2
+// Returns byte length of the first valid codepoint in the string, otherwise -1
+// if pointing to an ill-formed UTF-8 character.
+int ValidUTF8CharLength(const char *src, int size);
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_STRINGS_UTF8_H_
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_STRINGS_UTF8_H_

diff --git a/utils/strings/utf8_test.cc b/utils/strings/utf8_test.cc
new file mode 100644
index 0000000..a71d4f2
--- /dev/null
+++ b/utils/strings/utf8_test.cc

@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/utf8.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(Utf8Test, GetNumBytesForUTF8Char) {
+  EXPECT_EQ(GetNumBytesForUTF8Char("\x00"), 0);
+  EXPECT_EQ(GetNumBytesForUTF8Char("h"), 1);
+  EXPECT_EQ(GetNumBytesForUTF8Char("😋"), 4);
+  EXPECT_EQ(GetNumBytesForUTF8Char("㍿"), 3);
+}
+
+TEST(Utf8Test, IsValidUTF8) {
+  EXPECT_TRUE(IsValidUTF8("1234😋hello", 13));
+  EXPECT_TRUE(IsValidUTF8("\u304A\u00B0\u106B", 8));
+  EXPECT_TRUE(IsValidUTF8("this is a test😋😋😋", 26));
+  EXPECT_TRUE(IsValidUTF8("\xf0\x9f\x98\x8b", 4));
+  // Too short (string is too short).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f", 2));
+  // Too long (too many trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x8b\x8b", 5));
+  // Too short (too few trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x61\x61", 5));
+}
+
+TEST(Utf8Test, ValidUTF8CharLength) {
+  EXPECT_EQ(ValidUTF8CharLength("1234😋hello", 13), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\u304A\u00B0\u106B", 8), 3);
+  EXPECT_EQ(ValidUTF8CharLength("this is a test😋😋😋", 26), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b", 4), 4);
+  // Too short (string is too short).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f", 2), -1);
+  // Too long (too many trailing bytes). First character is valid.
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b\x8b", 5), 4);
+  // Too short (too few trailing bytes).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x61\x61", 5), -1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/tensor-view.cc b/utils/tensor-view.cc
similarity index 83%
rename from tensor-view.cc
rename to utils/tensor-view.cc
index 4acadc5..0ca0b7f 100644
--- a/tensor-view.cc
+++ b/utils/tensor-view.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "tensor-view.h"
+#include "utils/tensor-view.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 namespace internal {
 int NumberOfElements(const std::vector<int>& shape) {
@@ -28,4 +28,4 @@
 }
 }  // namespace internal
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/tensor-view.h b/utils/tensor-view.h
similarity index 87%
rename from tensor-view.h
rename to utils/tensor-view.h
index 00ab08c..a46ebd1 100644
--- a/tensor-view.h
+++ b/utils/tensor-view.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_TENSOR_VIEW_H_
-#define LIBTEXTCLASSIFIER_TENSOR_VIEW_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_TENSOR_VIEW_H_
+#define LIBTEXTCLASSIFIER_UTILS_TENSOR_VIEW_H_
 
 #include <algorithm>
 #include <vector>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace internal {
 // Computes the number of elements in a tensor of given shape.
 int NumberOfElements(const std::vector<int>& shape);
@@ -67,6 +67,6 @@
   const int size_;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_TENSOR_VIEW_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_TENSOR_VIEW_H_

diff --git a/tensor-view_test.cc b/utils/tensor-view_test.cc
similarity index 91%
rename from tensor-view_test.cc
rename to utils/tensor-view_test.cc
index d50fac7..9467264 100644
--- a/tensor-view_test.cc
+++ b/utils/tensor-view_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include "tensor-view.h"
+#include "utils/tensor-view.h"
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 TEST(TensorViewTest, TestSize) {
@@ -49,4 +49,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/utils/testing/logging_event_listener.h b/utils/testing/logging_event_listener.h
new file mode 100644
index 0000000..2663a9c
--- /dev/null
+++ b/utils/testing/logging_event_listener.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TESTING_LOGGING_EVENT_LISTENER_H_
+#define LIBTEXTCLASSIFIER_UTILS_TESTING_LOGGING_EVENT_LISTENER_H_
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+
+// TestEventListener that writes test results to the log so that they will be
+// visible in the logcat output in Sponge.
+// The formatting of the output is patterend after the output produced by the
+// standard PrettyUnitTestResultPrinter.
+class LoggingEventListener : public ::testing::TestEventListener {
+ public:
+  void OnTestProgramStart(const testing::UnitTest& unit_test) override;
+
+  void OnTestIterationStart(const testing::UnitTest& unit_test,
+                            int iteration) override;
+
+  void OnEnvironmentsSetUpStart(const testing::UnitTest& unit_test) override;
+
+  void OnEnvironmentsSetUpEnd(const testing::UnitTest& unit_test) override;
+
+  void OnTestCaseStart(const testing::TestCase& test_case) override;
+
+  void OnTestStart(const testing::TestInfo& test_info) override;
+
+  void OnTestPartResult(
+      const testing::TestPartResult& test_part_result) override;
+
+  void OnTestEnd(const testing::TestInfo& test_info) override;
+
+  void OnTestCaseEnd(const testing::TestCase& test_case) override;
+
+  void OnEnvironmentsTearDownStart(const testing::UnitTest& unit_test) override;
+
+  void OnEnvironmentsTearDownEnd(const testing::UnitTest& unit_test) override;
+
+  void OnTestIterationEnd(const testing::UnitTest& unit_test,
+                          int iteration) override;
+
+  void OnTestProgramEnd(const testing::UnitTest& unit_test) override;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TESTING_LOGGING_EVENT_LISTENER_H_

diff --git a/utils/tflite-model-executor.cc b/utils/tflite-model-executor.cc
new file mode 100644
index 0000000..05762d0
--- /dev/null
+++ b/utils/tflite-model-executor.cc

@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tflite-model-executor.h"
+
+#include "utils/base/logging.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+
+// Forward declaration of custom TensorFlow Lite ops for registration.
+namespace tflite {
+namespace ops {
+namespace builtin {
+TfLiteRegistration* Register_DIV();
+TfLiteRegistration* Register_FULLY_CONNECTED();
+TfLiteRegistration* Register_SOFTMAX();  // TODO(smillius): remove.
+}  // namespace builtin
+}  // namespace ops
+}  // namespace tflite
+
+#ifdef TC3_WITH_ACTIONS_OPS
+#include "utils/tflite/dist_diversification.h"
+#include "utils/tflite/text_encoder.h"
+// This function is defined in the file generated by :smart_reply_ops target.
+void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
+#else
+void RegisterSelectedOps(::tflite::MutableOpResolver* resolver) {
+  resolver->AddBuiltin(::tflite::BuiltinOperator_FULLY_CONNECTED,
+                       ::tflite::ops::builtin::Register_FULLY_CONNECTED());
+}
+#endif  // TC3_WITH_ACTIONS_OPS
+
+namespace libtextclassifier3 {
+
+inline std::unique_ptr<tflite::OpResolver> BuildOpResolver() {
+#ifdef TC3_USE_SELECTIVE_REGISTRATION
+  std::unique_ptr<tflite::MutableOpResolver> resolver(
+      new tflite::MutableOpResolver);
+  resolver->AddBuiltin(tflite::BuiltinOperator_DIV,
+                       tflite::ops::builtin::Register_DIV());
+  resolver->AddBuiltin(tflite::BuiltinOperator_FULLY_CONNECTED,
+                       tflite::ops::builtin::Register_FULLY_CONNECTED());
+  resolver->AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
+                       tflite::ops::builtin::Register_SOFTMAX());
+  RegisterSelectedOps(resolver.get());
+#else
+  std::unique_ptr<tflite::ops::builtin::BuiltinOpResolver> resolver(
+      new tflite::ops::builtin::BuiltinOpResolver);
+#ifdef TC3_WITH_ACTIONS_OPS
+  resolver->AddCustom("DistanceDiversification",
+                      tflite::ops::custom::Register_DISTANCE_DIVERSIFICATION());
+  resolver->AddCustom("TextEncoder",
+                      tflite::ops::custom::Register_TEXT_ENCODER());
+#endif  // TC3_WITH_ACTIONS_OPS
+#endif
+  return std::unique_ptr<tflite::OpResolver>(std::move(resolver));
+}
+
+std::unique_ptr<const tflite::FlatBufferModel> TfLiteModelFromModelSpec(
+    const tflite::Model* model_spec) {
+  std::unique_ptr<const tflite::FlatBufferModel> model(
+      tflite::FlatBufferModel::BuildFromModel(model_spec));
+  if (!model || !model->initialized()) {
+    TC3_LOG(ERROR) << "Could not build TFLite model from a model spec.";
+    return nullptr;
+  }
+  return model;
+}
+
+std::unique_ptr<const tflite::FlatBufferModel> TfLiteModelFromBuffer(
+    const flatbuffers::Vector<uint8_t>* model_spec_buffer) {
+  const tflite::Model* model =
+      flatbuffers::GetRoot<tflite::Model>(model_spec_buffer->data());
+  flatbuffers::Verifier verifier(model_spec_buffer->data(),
+                                 model_spec_buffer->Length());
+  if (!model->Verify(verifier)) {
+    return nullptr;
+  }
+  return TfLiteModelFromModelSpec(model);
+}
+
+TfLiteModelExecutor::TfLiteModelExecutor(
+    std::unique_ptr<const tflite::FlatBufferModel> model)
+    : model_(std::move(model)), resolver_(BuildOpResolver()) {}
+
+std::unique_ptr<tflite::Interpreter> TfLiteModelExecutor::CreateInterpreter()
+    const {
+  std::unique_ptr<tflite::Interpreter> interpreter;
+  tflite::InterpreterBuilder(*model_, *resolver_)(&interpreter);
+  return interpreter;
+}
+
+template <>
+void TfLiteModelExecutor::SetInput(const int input_index,
+                                   const std::vector<std::string>& input_data,
+                                   tflite::Interpreter* interpreter) const {
+  tflite::DynamicBuffer buf;
+  for (const std::string& s : input_data) {
+    buf.AddString(s.data(), s.length());
+  }
+  // TODO(b/120230709): Use WriteToTensorAsVector() instead, once available in
+  // AOSP.
+  buf.WriteToTensor(interpreter->tensor(interpreter->inputs()[input_index]));
+}
+
+template <>
+std::vector<tflite::StringRef> TfLiteModelExecutor::Output(
+    const int output_index, tflite::Interpreter* interpreter) const {
+  const TfLiteTensor* output_tensor =
+      interpreter->tensor(interpreter->outputs()[output_index]);
+  const int num_strings = tflite::GetStringCount(output_tensor);
+  std::vector<tflite::StringRef> output(num_strings);
+  for (int i = 0; i < num_strings; i++) {
+    output[i] = tflite::GetString(output_tensor, i);
+  }
+  return output;
+}
+
+template <>
+std::vector<std::string> TfLiteModelExecutor::Output(
+    const int output_index, tflite::Interpreter* interpreter) const {
+  std::vector<std::string> output;
+  for (const tflite::StringRef& s :
+       Output<tflite::StringRef>(output_index, interpreter)) {
+    output.push_back(std::string(s.str, s.len));
+  }
+  return output;
+}
+
+}  // namespace libtextclassifier3

diff --git a/utils/tflite-model-executor.h b/utils/tflite-model-executor.h
new file mode 100644
index 0000000..fd00924
--- /dev/null
+++ b/utils/tflite-model-executor.h

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Contains classes that can execute different models/parts of a model.
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TFLITE_MODEL_EXECUTOR_H_
+#define LIBTEXTCLASSIFIER_UTILS_TFLITE_MODEL_EXECUTOR_H_
+
+#include <memory>
+
+#include "utils/base/logging.h"
+#include "utils/tensor-view.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/op_resolver.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace libtextclassifier3 {
+
+std::unique_ptr<tflite::OpResolver> BuildOpResolver();
+std::unique_ptr<const tflite::FlatBufferModel> TfLiteModelFromModelSpec(
+    const tflite::Model*);
+std::unique_ptr<const tflite::FlatBufferModel> TfLiteModelFromBuffer(
+    const flatbuffers::Vector<uint8_t>*);
+
+// Executor for the text selection prediction and classification models.
+class TfLiteModelExecutor {
+ public:
+  static std::unique_ptr<TfLiteModelExecutor> FromModelSpec(
+      const tflite::Model* model_spec) {
+    auto model = TfLiteModelFromModelSpec(model_spec);
+    if (!model) {
+      return nullptr;
+    }
+    return std::unique_ptr<TfLiteModelExecutor>(
+        new TfLiteModelExecutor(std::move(model)));
+  }
+
+  static std::unique_ptr<TfLiteModelExecutor> FromBuffer(
+      const flatbuffers::Vector<uint8_t>* model_spec_buffer) {
+    auto model = TfLiteModelFromBuffer(model_spec_buffer);
+    if (!model) {
+      return nullptr;
+    }
+    return std::unique_ptr<TfLiteModelExecutor>(
+        new TfLiteModelExecutor(std::move(model)));
+  }
+
+  // Creates an Interpreter for the model that serves as a scratch-pad for the
+  // inference. The Interpreter is NOT thread-safe.
+  std::unique_ptr<tflite::Interpreter> CreateInterpreter() const;
+
+  template <typename T>
+  void SetInput(const int input_index, const TensorView<T>& input_data,
+                tflite::Interpreter* interpreter) const {
+    input_data.copy_to(interpreter->typed_input_tensor<T>(input_index),
+                       input_data.size());
+  }
+
+  template <typename T>
+  void SetInput(const int input_index, const std::vector<T>& input_data,
+                tflite::Interpreter* interpreter) const {
+    std::copy(input_data.begin(), input_data.end(),
+              interpreter->typed_input_tensor<T>(input_index));
+  }
+
+  template <typename T>
+  TensorView<T> OutputView(const int output_index,
+                           tflite::Interpreter* interpreter) const {
+    TfLiteTensor* output_tensor =
+        interpreter->tensor(interpreter->outputs()[output_index]);
+    return TensorView<T>(interpreter->typed_output_tensor<T>(output_index),
+                         std::vector<int>(output_tensor->dims->data,
+                                          output_tensor->dims->data +
+                                              output_tensor->dims->size));
+  }
+
+  template <typename T>
+  std::vector<T> Output(const int output_index,
+                        tflite::Interpreter* interpreter) const {
+    TensorView<T> output_view = OutputView<T>(output_index, interpreter);
+    return std::vector<T>(output_view.data(),
+                          output_view.data() + output_view.size());
+  }
+
+ protected:
+  explicit TfLiteModelExecutor(
+      std::unique_ptr<const tflite::FlatBufferModel> model);
+
+  std::unique_ptr<const tflite::FlatBufferModel> model_;
+  std::unique_ptr<tflite::OpResolver> resolver_;
+};
+
+template <>
+void TfLiteModelExecutor::SetInput(const int input_index,
+                                   const std::vector<std::string>& input_data,
+                                   tflite::Interpreter* interpreter) const;
+
+template <>
+std::vector<tflite::StringRef> TfLiteModelExecutor::Output(
+    const int output_index, tflite::Interpreter* interpreter) const;
+
+template <>
+std::vector<std::string> TfLiteModelExecutor::Output(
+    const int output_index, tflite::Interpreter* interpreter) const;
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TFLITE_MODEL_EXECUTOR_H_

diff --git a/utils/tflite/dist_diversification.cc b/utils/tflite/dist_diversification.cc
new file mode 100644
index 0000000..faf9be0
--- /dev/null
+++ b/utils/tflite/dist_diversification.cc

@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tflite/dist_diversification.h"
+
+#include <algorithm>
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+// Returns a vector of row indices in a distance matrix.
+// Indices are increasing and the distance of every selected index to others
+// is larger than `min_distance`.
+template <typename DistanceMatrixType>
+std::vector<int> DiversifyByDistance(const DistanceMatrixType& distance_matrix,
+                                     const int matrix_size,
+                                     const float min_distance,
+                                     const int max_num_results) {
+  std::vector<int> result{0};
+  result.reserve(max_num_results);
+  int index = 1;
+  while (result.size() < max_num_results && index < matrix_size) {
+    for (; index < matrix_size; ++index) {
+      bool too_close = false;
+      for (const int selected_index : result) {
+        if (distance_matrix(index, selected_index) < min_distance) {
+          too_close = true;
+          break;
+        }
+      }
+      if (!too_close) {
+        result.push_back(index);
+        ++index;
+        break;
+      }
+    }
+  }
+  return result;
+}
+
+// Input parameters for the op.
+enum DistDiversificationInputs {
+  DIST_DIVERSIFICATION_INPUT_DISTANCE_MATRIX = 0,
+  DIST_DIVERSIFICATION_INPUT_MIN_DISTANCE = 1,
+  DIST_DIVERSIFICATION_INPUT_NUM_RESULTS = 2
+};
+
+// Output parameters for the op.
+enum DistDiversificationOutputs {
+  DIST_DIVERSIFICATION_OUTPUT_INDICES = 0,
+  DIST_DIVERSIFICATION_OUTPUT_LENGTH = 1,
+};
+
+TfLiteIntArray* CreateSizeArray(const std::initializer_list<int>& sizes) {
+  TfLiteIntArray* array_size = TfLiteIntArrayCreate(sizes.size());
+  int index = 0;
+  for (const int size : sizes) {
+    array_size->data[index++] = size;
+  }
+  return array_size;
+}
+
+TfLiteStatus AllocateOutputIndexes(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor& num_results =
+      context
+          ->tensors[node->inputs->data[DIST_DIVERSIFICATION_INPUT_NUM_RESULTS]];
+  TfLiteTensor& output_indices =
+      context
+          ->tensors[node->outputs->data[DIST_DIVERSIFICATION_OUTPUT_INDICES]];
+  return context->ResizeTensor(context, &output_indices,
+                               CreateSizeArray({num_results.data.i32[0]}));
+}
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  const TfLiteTensor& num_results =
+      context
+          ->tensors[node->inputs->data[DIST_DIVERSIFICATION_INPUT_NUM_RESULTS]];
+  if (tflite::IsConstantTensor(&num_results)) {
+    TF_LITE_ENSURE_OK(context, AllocateOutputIndexes(context, node));
+  } else {
+    TfLiteTensor& output_indices =
+        context
+            ->tensors[node->outputs->data[DIST_DIVERSIFICATION_OUTPUT_INDICES]];
+    tflite::SetTensorToDynamic(&output_indices);
+  }
+  TfLiteTensor& output_length =
+      context->tensors[node->outputs->data[DIST_DIVERSIFICATION_OUTPUT_LENGTH]];
+  TF_LITE_ENSURE_OK(context, context->ResizeTensor(context, &output_length,
+                                                   CreateSizeArray({1})));
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  TfLiteTensor& output_indices =
+      context
+          ->tensors[node->outputs->data[DIST_DIVERSIFICATION_OUTPUT_INDICES]];
+  if (tflite::IsDynamicTensor(&output_indices)) {
+    TF_LITE_ENSURE_OK(context, AllocateOutputIndexes(context, node));
+  }
+  const TfLiteTensor& distance_matrix =
+      context->tensors[node->inputs
+                           ->data[DIST_DIVERSIFICATION_INPUT_DISTANCE_MATRIX]];
+  const int distance_matrix_dim = distance_matrix.dims->data[0];
+  const float min_distance =
+      context
+          ->tensors[node->inputs->data[DIST_DIVERSIFICATION_INPUT_MIN_DISTANCE]]
+          .data.f[0];
+  const int num_results =
+      context
+          ->tensors[node->inputs->data[DIST_DIVERSIFICATION_INPUT_NUM_RESULTS]]
+          .data.i32[0];
+  const auto indices = DiversifyByDistance(
+      [&](int row, int col) {
+        return distance_matrix.data.f[row * distance_matrix_dim + col];
+      },
+      distance_matrix_dim, min_distance, num_results);
+  std::copy(indices.begin(), indices.end(), output_indices.data.i32);
+  std::fill_n(output_indices.data.i32 + indices.size(),
+              num_results - indices.size(), -1);
+  TfLiteTensor& output_length =
+      context->tensors[node->outputs->data[DIST_DIVERSIFICATION_OUTPUT_LENGTH]];
+  *output_length.data.i32 = indices.size();
+  return kTfLiteOk;
+}
+
+}  // namespace
+}  // namespace libtextclassifier3
+
+namespace tflite {
+namespace ops {
+namespace custom {
+TfLiteRegistration* Register_DISTANCE_DIVERSIFICATION() {
+  static TfLiteRegistration r = {nullptr, nullptr, libtextclassifier3::Prepare,
+                                 libtextclassifier3::Eval};
+  return &r;
+}
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite

diff --git a/utils/tflite/dist_diversification.h b/utils/tflite/dist_diversification.h
new file mode 100644
index 0000000..924186d
--- /dev/null
+++ b/utils/tflite/dist_diversification.h

@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TFLITE_DIST_DIVERSIFICATION_H_
+#define LIBTEXTCLASSIFIER_UTILS_TFLITE_DIST_DIVERSIFICATION_H_
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_DISTANCE_DIVERSIFICATION();
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TFLITE_DIST_DIVERSIFICATION_H_

diff --git a/utils/tflite/dist_diversification_test.cc b/utils/tflite/dist_diversification_test.cc
new file mode 100644
index 0000000..6ed578c
--- /dev/null
+++ b/utils/tflite/dist_diversification_test.cc

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tflite/dist_diversification.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class DistanceDiversificationOpModel : public tflite::SingleOpModel {
+ public:
+  explicit DistanceDiversificationOpModel(int matrix_rows);
+  void SetDistanceMatrix(const std::initializer_list<float>& values) {
+    PopulateTensor(distance_matrix_, values);
+  }
+  void SetNumOutput(int length) { PopulateTensor(num_results_, {length}); }
+  void SetMinDistance(float min_distance) {
+    PopulateTensor(min_distance_, {min_distance});
+  }
+  int GetOutputLen() { return ExtractVector<int>(output_len_).front(); }
+  std::vector<int> GetOutputIndexes(int output_length) {
+    auto res = ExtractVector<int>(output_indexes_);
+    res.resize(output_length);
+    return res;
+  }
+
+ private:
+  int distance_matrix_;
+  int num_results_;
+  int min_distance_;
+
+  int output_len_;
+  int output_indexes_;
+};
+
+DistanceDiversificationOpModel::DistanceDiversificationOpModel(
+    int matrix_rows) {
+  distance_matrix_ = AddInput(tflite::TensorType_FLOAT32);
+  min_distance_ = AddInput(tflite::TensorType_FLOAT32);
+  num_results_ = AddInput(tflite::TensorType_INT32);
+
+  output_indexes_ = AddOutput(tflite::TensorType_INT32);
+  output_len_ = AddOutput(tflite::TensorType_INT32);
+  SetCustomOp("DistanceDiversification", {},
+              tflite::ops::custom::Register_DISTANCE_DIVERSIFICATION);
+  BuildInterpreter({{matrix_rows, matrix_rows}, {1}, {1}});
+}
+
+// Tests
+TEST(DistanceDiversificationOp, Simple) {
+  DistanceDiversificationOpModel m(5);
+  m.SetDistanceMatrix({0.0, 0.1, 0.2, 0.3, 0.4, 0.1, 0.0, 0.1, 0.2,
+                       0.3, 0.2, 0.1, 0.0, 0.1, 0.2, 0.3, 0.2, 0.1,
+                       0.0, 0.1, 0.4, 0.3, 0.2, 0.1, 0.0});
+  m.SetMinDistance(0.21);
+  m.SetNumOutput(3);
+  m.Invoke();
+  const int output_length = m.GetOutputLen();
+  EXPECT_EQ(output_length, 2);
+  EXPECT_THAT(m.GetOutputIndexes(output_length), testing::ElementsAre(0, 3));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/tflite/text_encoder.cc b/utils/tflite/text_encoder.cc
new file mode 100644
index 0000000..abc472e
--- /dev/null
+++ b/utils/tflite/text_encoder.cc

@@ -0,0 +1,377 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include "utils/base/logging.h"
+#include "utils/sentencepiece/double_array_trie.h"
+#include "utils/sentencepiece/encoder.h"
+#include "utils/sentencepiece/normalizer.h"
+#include "utils/sentencepiece/sorted_strings_table.h"
+#include "utils/strings/stringpiece.h"
+#include "utils/tflite/text_encoder.h"
+#include "utils/tflite/text_encoder_config_generated.h"
+#include "flatbuffers/flatbuffers.h"
+#include "flatbuffers/flexbuffers.h"
+#include "tensorflow/contrib/lite/kernels/kernel_util.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+struct TextEncoderOp {
+  std::unique_ptr<SentencePieceNormalizer> normalizer;
+  std::unique_ptr<Encoder> encoder;
+  std::unique_ptr<SentencePieceMatcher> matcher;
+};
+
+// Input parameters for the op.
+enum TextEncoderInputs {
+  TEXT_ENCODER_INPUT_TEXTS = 0,
+  TEXT_ENCODER_INPUT_NUM_TEXTS = 1,
+  TEXT_ENCODER_INPUT_MAX_LENGTH = 2,
+  TEXT_ENCODER_INPUT_ATTR = 3
+};
+
+// Output parameters for the op.
+enum SmartReplyModelOutputs {
+  TEXT_ENCODER_OUTPUT_ENCODED = 0,
+  TEXT_ENCODER_OUTPUT_POSITION = 1,
+  TEXT_ENCODER_OUTPUT_LENGTHS = 2,
+  TEXT_ENCODER_OUTPUT_ATTR = 3,
+};
+
+const char kTextEncoderConfigAttr[] = "text_encoder_config";
+
+// Input rank is 2 since there is a dummy batch dimension of 1.
+const int kInputRank = 2;
+const int kBatchSize = 1;
+
+// Initializes text encoder object from serialized options:
+//   The options are a flexbuffers attribute map that contain the op config
+//   with the key `text_encoder_config` as `TextEncoderConfig`.
+void* Initialize(TfLiteContext* context, const char* buffer, size_t length) {
+  const flexbuffers::Map& attr_map =
+      flexbuffers::GetRoot(reinterpret_cast<const uint8_t*>(buffer), length)
+          .AsMap();
+  const flexbuffers::Blob serialized_config =
+      attr_map[kTextEncoderConfigAttr].AsBlob();
+  const TextEncoderConfig* config =
+      flatbuffers::GetRoot<TextEncoderConfig>(serialized_config.data());
+
+  std::unique_ptr<TextEncoderOp> encoder_op(new TextEncoderOp());
+
+  // Create normalizer from options.
+  const TrieNode* charsmap_trie_nodes = reinterpret_cast<const TrieNode*>(
+      config->normalization_charsmap()->Data());
+  const int charsmap_trie_nodes_length =
+      config->normalization_charsmap()->Length() / sizeof(TrieNode);
+  encoder_op->normalizer.reset(new SentencePieceNormalizer(
+      DoubleArrayTrie(charsmap_trie_nodes, charsmap_trie_nodes_length),
+      StringPiece(config->normalization_charsmap_values()->data(),
+                  config->normalization_charsmap_values()->size()),
+      config->add_dummy_prefix(), config->remove_extra_whitespaces(),
+      config->escape_whitespaces()));
+
+  const int num_pieces = config->pieces_scores()->Length();
+
+  switch (config->matcher_type()) {
+    case SentencePieceMatcherType_MAPPED_TRIE: {
+      const TrieNode* pieces_trie_nodes =
+          reinterpret_cast<const TrieNode*>(config->pieces()->Data());
+      const int pieces_trie_nodes_length =
+          config->pieces()->Length() / sizeof(TrieNode);
+      encoder_op->matcher.reset(
+          new DoubleArrayTrie(pieces_trie_nodes, pieces_trie_nodes_length));
+      break;
+    }
+    case SentencePieceMatcherType_SORTED_STRING_TABLE: {
+      encoder_op->matcher.reset(new SortedStringsTable(
+          num_pieces, config->pieces_offsets()->data(),
+          StringPiece(config->pieces()->data(), config->pieces()->Length())));
+      break;
+    }
+    default: {
+      TC3_LOG(ERROR) << "Unknown sentence piece matcher type.";
+      return nullptr;
+    }
+  }
+  encoder_op->encoder.reset(new Encoder(
+      encoder_op->matcher.get(), num_pieces, config->pieces_scores()->data(),
+      config->start_code(), config->end_code(), config->encoding_offset(),
+      config->unknown_code(), config->unknown_score()));
+  return encoder_op.release();
+}
+
+void Free(TfLiteContext* context, void* buffer) {
+  delete reinterpret_cast<TextEncoderOp*>(buffer);
+}
+
+namespace {
+TfLiteIntArray* CreateSizeArray(const std::initializer_list<int>& sizes) {
+  TfLiteIntArray* array_size = TfLiteIntArrayCreate(sizes.size());
+  int index = 0;
+  for (const int size : sizes) {
+    array_size->data[index++] = size;
+  }
+  return array_size;
+}
+
+// Copies attributes values according to the encoding_offsets of every string.
+TfLiteStatus CopyAttribute(const TfLiteTensor& in,
+                           const std::vector<int>& encoding_end_offsets,
+                           int start_offset, TfLiteContext* context,
+                           TfLiteTensor* out) {
+  TF_LITE_ENSURE_EQ(context, in.dims->size, kInputRank);
+  TF_LITE_ENSURE_EQ(context, in.dims->data[0], kBatchSize);
+  const int output_size = out->dims->data[1];
+  int output_offset = 0;
+  for (int value_index = 0;
+       value_index < encoding_end_offsets.size() && output_offset < output_size;
+       ++value_index) {
+    // Calculate how many elements need to be set with this value.
+    // The low bound depends on the offset from the beggining. If this is 0, it
+    // means that this value it truncated.
+    // The upper bound depends on how many elements are in the output tensor.
+    const int from_this_element =
+        std::min(std::max(0, encoding_end_offsets[value_index] - start_offset -
+                                 output_offset),
+                 output_size - output_offset);
+    if (from_this_element == 0) {
+      continue;
+    }
+
+    switch (in.type) {
+      case kTfLiteInt32: {
+        std::fill(out->data.i32 + output_offset,
+                  out->data.i32 + output_offset + from_this_element,
+                  in.data.i32[value_index]);
+      } break;
+      case kTfLiteFloat32: {
+        std::fill(out->data.f + output_offset,
+                  out->data.f + output_offset + from_this_element,
+                  in.data.f[value_index]);
+      } break;
+      default:
+        context->ReportError(
+            (context), __FILE__ " Not supported attribute type %d", in.type);
+        return kTfLiteError;
+    }
+    output_offset += from_this_element;
+  }
+  // Do final padding.
+  switch (in.type) {
+    case kTfLiteInt32: {
+      const int32_t value =
+          (output_offset > 0) ? out->data.i32[output_offset - 1] : 0;
+      std::fill(out->data.i32 + output_offset, out->data.i32 + output_size,
+                value);
+    } break;
+    case kTfLiteFloat32: {
+      const float value =
+          (output_offset > 0) ? out->data.f[output_offset - 1] : 0;
+      std::fill(out->data.f + output_offset, out->data.f + output_size, value);
+    } break;
+    default:
+      break;
+  }
+  return kTfLiteOk;
+}
+
+TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
+                                 int max_output_length) {
+  TfLiteTensor& output_encoded =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ENCODED]];
+
+  TF_LITE_ENSURE_OK(
+      context,
+      context->ResizeTensor(context, &output_encoded,
+                            CreateSizeArray({kBatchSize, max_output_length})));
+
+  TfLiteTensor& output_positions =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_POSITION]];
+
+  TF_LITE_ENSURE_OK(
+      context,
+      context->ResizeTensor(context, &output_positions,
+                            CreateSizeArray({kBatchSize, max_output_length})));
+
+  const int num_output_attrs = node->outputs->size - TEXT_ENCODER_OUTPUT_ATTR;
+  for (int i = 0; i < num_output_attrs; ++i) {
+    TfLiteTensor& output =
+        context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ATTR + i]];
+    TF_LITE_ENSURE_OK(context,
+                      context->ResizeTensor(
+                          context, &output,
+                          CreateSizeArray({kBatchSize, max_output_length})));
+  }
+  return kTfLiteOk;
+}
+
+}  // namespace
+
+TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
+  // Check that the batch dimension is kBatchSize.
+  const TfLiteTensor& input_text =
+      context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_TEXTS]];
+  TF_LITE_ENSURE_EQ(context, input_text.dims->size, kInputRank);
+  TF_LITE_ENSURE_EQ(context, input_text.dims->data[0], kBatchSize);
+
+  TfLiteTensor& output_lengths =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_LENGTHS]];
+  TfLiteTensor& output_encoded =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ENCODED]];
+  TfLiteTensor& output_positions =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_POSITION]];
+
+  TF_LITE_ENSURE_OK(context,
+                    context->ResizeTensor(context, &output_lengths,
+                                          CreateSizeArray({kBatchSize})));
+
+  // Check that there are enough outputs for attributes.
+  const int num_output_attrs = node->outputs->size - TEXT_ENCODER_OUTPUT_ATTR;
+  TF_LITE_ENSURE_EQ(context, node->inputs->size - TEXT_ENCODER_INPUT_ATTR,
+                    num_output_attrs);
+
+  // Copy attribute types from input to output tensors.
+  for (int i = 0; i < num_output_attrs; ++i) {
+    TfLiteTensor& input =
+        context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_ATTR + i]];
+    TfLiteTensor& output =
+        context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ATTR + i]];
+    output.type = input.type;
+  }
+
+  const TfLiteTensor& output_length =
+      context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_MAX_LENGTH]];
+
+  if (tflite::IsConstantTensor(&output_length)) {
+    return ResizeOutputTensors(context, node, output_length.data.i64[0]);
+  } else {
+    tflite::SetTensorToDynamic(&output_encoded);
+    tflite::SetTensorToDynamic(&output_positions);
+    for (int i = 0; i < num_output_attrs; ++i) {
+      TfLiteTensor& output_attr =
+          context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ATTR + i]];
+      tflite::SetTensorToDynamic(&output_attr);
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+  if (node->user_data == nullptr) {
+    return kTfLiteError;
+  }
+  const TextEncoderOp* encoder_op =
+      reinterpret_cast<TextEncoderOp*>(node->user_data);
+  const TfLiteTensor& input_text =
+      context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_TEXTS]];
+  const int num_strings = tflite::GetStringCount(&input_text);
+  // Check that the number of strings matches the length parameter.
+  const int num_strings_param =
+      context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_NUM_TEXTS]]
+          .data.i32[0];
+  TF_LITE_ENSURE_EQ(context, num_strings, num_strings_param);
+
+  TfLiteTensor& output_encoded =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ENCODED]];
+  if (tflite::IsDynamicTensor(&output_encoded)) {
+    const TfLiteTensor& output_length =
+        context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_MAX_LENGTH]];
+    TF_LITE_ENSURE_OK(
+        context, ResizeOutputTensors(context, node, output_length.data.i64[0]));
+  }
+  TfLiteTensor& output_positions =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_POSITION]];
+
+  std::vector<int> encoded_total;
+  std::vector<int> encoded_offsets;
+  std::vector<int> encoded_positions;
+  encoded_offsets.reserve(num_strings);
+  const int max_output_length = output_encoded.dims->data[1];
+  const int max_encoded_position = max_output_length;
+
+  for (int i = 0; i < num_strings; ++i) {
+    const auto& strref = tflite::GetString(&input_text, i);
+    const std::vector<int> encoded = encoder_op->encoder->Encode(
+        encoder_op->normalizer->Normalize(StringPiece(strref.str, strref.len)));
+    encoded_total.insert(encoded_total.end(), encoded.begin(), encoded.end());
+    encoded_offsets.push_back(encoded_total.size());
+    for (int i = 0; i < encoded.size(); i++) {
+      encoded_positions.push_back(std::min(i, max_encoded_position - 1));
+    }
+  }
+
+  // Copy encoding to output tensor.
+  const int start_offset =
+      std::max(0, static_cast<int>(encoded_total.size()) - max_output_length);
+  int output_offset = 0;
+  int32_t* output_buffer = output_encoded.data.i32;
+  int32_t* output_positions_buffer = output_positions.data.i32;
+  for (int i = start_offset; i < encoded_total.size(); ++i, ++output_offset) {
+    output_buffer[output_offset] = encoded_total[i];
+    output_positions_buffer[output_offset] = encoded_positions[i];
+  }
+
+  // Save output encoded length.
+  TfLiteTensor& output_lengths =
+      context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_LENGTHS]];
+  output_lengths.data.i32[0] = output_offset;
+
+  // Do padding.
+  for (; output_offset < max_output_length; ++output_offset) {
+    output_buffer[output_offset] = encoded_total.back();
+    output_positions_buffer[output_offset] = max_encoded_position;
+  }
+
+  // Process attributes, all checks of sizes and types are done in Prepare.
+  const int num_output_attrs = node->outputs->size - TEXT_ENCODER_OUTPUT_ATTR;
+  TF_LITE_ENSURE_EQ(context, node->inputs->size - TEXT_ENCODER_INPUT_ATTR,
+                    num_output_attrs);
+  for (int i = 0; i < num_output_attrs; ++i) {
+    TfLiteStatus attr_status = CopyAttribute(
+        context->tensors[node->inputs->data[TEXT_ENCODER_INPUT_ATTR + i]],
+        encoded_offsets, start_offset, context,
+        &context->tensors[node->outputs->data[TEXT_ENCODER_OUTPUT_ATTR + i]]);
+    if (attr_status != kTfLiteOk) {
+      return attr_status;
+    }
+  }
+
+  return kTfLiteOk;
+}
+
+}  // namespace
+}  // namespace libtextclassifier3
+
+namespace tflite {
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_TEXT_ENCODER() {
+  static TfLiteRegistration registration = {
+      libtextclassifier3::Initialize, libtextclassifier3::Free,
+      libtextclassifier3::Prepare, libtextclassifier3::Eval};
+  return &registration;
+}
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite

diff --git a/utils/tflite/text_encoder.h b/utils/tflite/text_encoder.h
new file mode 100644
index 0000000..1143031
--- /dev/null
+++ b/utils/tflite/text_encoder.h

@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TFLITE_TEXT_ENCODER_H_
+#define LIBTEXTCLASSIFIER_UTILS_TFLITE_TEXT_ENCODER_H_
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_TEXT_ENCODER();
+
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TFLITE_TEXT_ENCODER_H_

diff --git a/utils/tflite/text_encoder_config.fbs b/utils/tflite/text_encoder_config.fbs
new file mode 100644
index 0000000..8ae8fc5
--- /dev/null
+++ b/utils/tflite/text_encoder_config.fbs

@@ -0,0 +1,65 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// Configuration for the text encoder op.
+
+namespace libtextclassifier3;
+
+enum SentencePieceMatcherType : byte {
+  MAPPED_TRIE = 0,
+  SORTED_STRING_TABLE = 1,
+}
+
+table TextEncoderConfig {
+  // Code that is used as encoding of the start code.
+  start_code:int32 = 0;
+
+  // Code that is used as encoding of the end code.
+  end_code:int32 = 1;
+
+  // This value is added to all codes to make them not intersect with
+  // `start_code` and `end_code`.
+  encoding_offset:int32 = 2;
+
+  // Code that is used for out-of-dictionary characters.
+  unknown_code:int32 = -1;
+
+  // Penalty associated with the unknown code.
+  unknown_score:float;
+
+  // Normalization options.
+  // Serialized normalization charsmap.
+  normalization_charsmap:string;
+  normalization_charsmap_values:string;
+
+  // Whether to add dummy whitespace at the beginning of the text in order to
+  // treat "world" in "world" and "hello world" uniformly.
+  add_dummy_prefix:bool = true;
+
+  // Whether to remove leading, trailing and duplicate internal whitespace.
+  remove_extra_whitespaces:bool = true;
+
+  // Whether to replace whitespace with a meta symbol.
+  escape_whitespaces:bool = true;
+
+  // Sentence pieces scores.
+  pieces_scores:[float];
+
+  // Serialized sentence pieces.
+  pieces:string;
+  pieces_offsets:[int32];
+  matcher_type: SentencePieceMatcherType = MAPPED_TRIE;
+}

diff --git a/utils/tflite/text_encoder_test.cc b/utils/tflite/text_encoder_test.cc
new file mode 100644
index 0000000..0cd67ce
--- /dev/null
+++ b/utils/tflite/text_encoder_test.cc

@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "utils/tflite/text_encoder.h"
+#include "gtest/gtest.h"
+#include "third_party/absl/flags/flag.h"
+#include "flatbuffers/flexbuffers.h"
+#include "tensorflow/contrib/lite/interpreter.h"
+#include "tensorflow/contrib/lite/kernels/register.h"
+#include "tensorflow/contrib/lite/kernels/test_util.h"
+#include "tensorflow/contrib/lite/model.h"
+#include "tensorflow/contrib/lite/string_util.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+std::string GetTestConfigPath() {
+  return "";
+}
+
+class TextEncoderOpModel : public tflite::SingleOpModel {
+ public:
+  TextEncoderOpModel(std::initializer_list<int> input_strings_shape,
+                     std::initializer_list<int> attribute_shape);
+  void SetInputText(const std::initializer_list<string>& strings) {
+    PopulateStringTensor(input_string_, strings);
+    PopulateTensor(input_length_, {static_cast<int32_t>(strings.size())});
+  }
+  void SetMaxOutputLength(int length) {
+    PopulateTensor(input_output_maxlength_, {length});
+  }
+  void SetInt32Attribute(const std::initializer_list<int>& attribute) {
+    PopulateTensor(input_attributes_int32_, attribute);
+  }
+  void SetFloatAttribute(const std::initializer_list<float>& attribute) {
+    PopulateTensor(input_attributes_float_, attribute);
+  }
+
+  std::vector<int> GetOutputEncoding() {
+    return ExtractVector<int>(output_encoding_);
+  }
+  std::vector<int> GetOutputPositions() {
+    return ExtractVector<int>(output_positions_);
+  }
+  std::vector<int> GetOutputAttributeInt32() {
+    return ExtractVector<int>(output_attributes_int32_);
+  }
+  std::vector<float> GetOutputAttributeFloat() {
+    return ExtractVector<float>(output_attributes_float_);
+  }
+  int GetEncodedLength() { return ExtractVector<int>(output_length_)[0]; }
+
+ private:
+  int input_string_;
+  int input_length_;
+  int input_output_maxlength_;
+  int input_attributes_int32_;
+  int input_attributes_float_;
+
+  int output_encoding_;
+  int output_positions_;
+  int output_length_;
+  int output_attributes_int32_;
+  int output_attributes_float_;
+};
+
+TextEncoderOpModel::TextEncoderOpModel(
+    std::initializer_list<int> input_strings_shape,
+    std::initializer_list<int> attribute_shape) {
+  input_string_ = AddInput(tflite::TensorType_STRING);
+  input_length_ = AddInput(tflite::TensorType_INT32);
+  input_output_maxlength_ = AddInput(tflite::TensorType_INT32);
+  input_attributes_int32_ = AddInput(tflite::TensorType_INT32);
+  input_attributes_float_ = AddInput(tflite::TensorType_FLOAT32);
+
+  output_encoding_ = AddOutput(tflite::TensorType_INT32);
+  output_positions_ = AddOutput(tflite::TensorType_INT32);
+  output_length_ = AddOutput(tflite::TensorType_INT32);
+  output_attributes_int32_ = AddOutput(tflite::TensorType_INT32);
+  output_attributes_float_ = AddOutput(tflite::TensorType_FLOAT32);
+
+  std::ifstream test_config_stream(GetTestConfigPath());
+  std::string config((std::istreambuf_iterator<char>(test_config_stream)),
+                     (std::istreambuf_iterator<char>()));
+  flexbuffers::Builder builder;
+  builder.Map([&]() { builder.String("text_encoder_config", config); });
+  builder.Finish();
+  SetCustomOp("TextEncoder", builder.GetBuffer(),
+              tflite::ops::custom::Register_TEXT_ENCODER);
+  BuildInterpreter(
+      {input_strings_shape, {1}, {1}, attribute_shape, attribute_shape});
+}
+
+// Tests
+TEST(TextEncoderTest, SimpleEncoder) {
+  TextEncoderOpModel m({1, 1}, {1, 1});
+  m.SetInputText({"Hello"});
+  m.SetMaxOutputLength(10);
+  m.SetInt32Attribute({7});
+  m.SetFloatAttribute({3.f});
+  m.Invoke();
+  EXPECT_EQ(m.GetEncodedLength(), 5);
+  EXPECT_THAT(m.GetOutputEncoding(),
+              testing::ElementsAre(1, 90, 547, 58, 2, 2, 2, 2, 2, 2));
+  EXPECT_THAT(m.GetOutputPositions(),
+              testing::ElementsAre(0, 1, 2, 3, 4, 10, 10, 10, 10, 10));
+  EXPECT_THAT(m.GetOutputAttributeInt32(),
+              testing::ElementsAre(7, 7, 7, 7, 7, 7, 7, 7, 7, 7));
+  EXPECT_THAT(
+      m.GetOutputAttributeFloat(),
+      testing::ElementsAre(3.f, 3.f, 3.f, 3.f, 3.f, 3.f, 3.f, 3.f, 3.f, 3.f));
+}
+
+TEST(TextEncoderTest, ManyStrings) {
+  TextEncoderOpModel m({1, 3}, {1, 3});
+  m.SetInt32Attribute({1, 2, 3});
+  m.SetFloatAttribute({5.f, 4.f, 3.f});
+  m.SetInputText({"Hello", "Hi", "Bye"});
+  m.SetMaxOutputLength(10);
+  m.Invoke();
+  EXPECT_EQ(m.GetEncodedLength(), 10);
+  EXPECT_THAT(m.GetOutputEncoding(),
+              testing::ElementsAre(547, 58, 2, 1, 862, 2, 1, 1919, 19, 2));
+  EXPECT_THAT(m.GetOutputPositions(),
+              testing::ElementsAre(2, 3, 4, 0, 1, 2, 0, 1, 2, 3));
+  EXPECT_THAT(m.GetOutputAttributeInt32(),
+              testing::ElementsAre(1, 1, 1, 2, 2, 2, 3, 3, 3, 3));
+  EXPECT_THAT(
+      m.GetOutputAttributeFloat(),
+      testing::ElementsAre(5.f, 5.f, 5.f, 4.f, 4.f, 4.f, 3.f, 3.f, 3.f, 3.f));
+}
+
+TEST(TextEncoderTest, LongStrings) {
+  TextEncoderOpModel m({1, 4}, {1, 4});
+  m.SetInt32Attribute({1, 2, 3, 4});
+  m.SetFloatAttribute({5.f, 4.f, 3.f, 2.f});
+  m.SetInputText({"Hello", "Hi", "Bye", "Hi"});
+  m.SetMaxOutputLength(9);
+  m.Invoke();
+  EXPECT_EQ(m.GetEncodedLength(), 9);
+  EXPECT_THAT(m.GetOutputEncoding(),
+              testing::ElementsAre(862, 2, 1, 1919, 19, 2, 1, 862, 2));
+  EXPECT_THAT(m.GetOutputPositions(),
+              testing::ElementsAre(1, 2, 0, 1, 2, 3, 0, 1, 2));
+  EXPECT_THAT(m.GetOutputAttributeInt32(),
+              testing::ElementsAre(2, 2, 3, 3, 3, 3, 4, 4, 4));
+  EXPECT_THAT(
+      m.GetOutputAttributeFloat(),
+      testing::ElementsAre(4.f, 4.f, 3.f, 3.f, 3.f, 3.f, 2.f, 2.f, 2.f));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/util/utf8/unicodetext.cc b/utils/utf8/unicodetext.cc
similarity index 96%
rename from util/utf8/unicodetext.cc
rename to utils/utf8/unicodetext.cc
index 2ef79e9..81492d8 100644
--- a/util/utf8/unicodetext.cc
+++ b/utils/utf8/unicodetext.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#include "util/utf8/unicodetext.h"
+#include "utils/utf8/unicodetext.h"
 
 #include <string.h>
 
 #include <algorithm>
 
-#include "util/strings/utf8.h"
+#include "utils/strings/utf8.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // *************** Data representation **********
 // Note: the copy constructor is undefined.
@@ -176,7 +176,7 @@
 
 }  // namespace
 
-UnicodeText& UnicodeText::AppendCodepoint(char32 ch) {
+UnicodeText& UnicodeText::push_back(char32 ch) {
   char str[4];
   int char_len = runetochar(ch, str);
   repr_.append(str, char_len);
@@ -296,4 +296,4 @@
   return UTF8ToUnicodeText(str, /*do_copy=*/true);
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/utf8/unicodetext.h b/utils/utf8/unicodetext.h
similarity index 94%
rename from util/utf8/unicodetext.h
rename to utils/utf8/unicodetext.h
index ec08f53..eb206b8 100644
--- a/util/utf8/unicodetext.h
+++ b/utils/utf8/unicodetext.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,16 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
-#define LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNICODETEXT_H_
+#define LIBTEXTCLASSIFIER_UTILS_UTF8_UNICODETEXT_H_
 
 #include <iterator>
 #include <string>
 #include <utility>
 
-#include "util/base/integral_types.h"
+#include "utils/base/integral_types.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 // ***************************** UnicodeText **************************
 //
@@ -168,7 +168,7 @@
 
   // Calling this may invalidate pointers to underlying data.
   UnicodeText& AppendUTF8(const char* utf8, int len);
-  UnicodeText& AppendCodepoint(char32 ch);
+  UnicodeText& push_back(char32 ch);
   void clear();
 
   std::string ToUTF8String() const;
@@ -219,6 +219,6 @@
 UnicodeText UTF8ToUnicodeText(const std::string& str, bool do_copy);
 UnicodeText UTF8ToUnicodeText(const std::string& str);
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_UTF8_UNICODETEXT_H_

diff --git a/util/utf8/unicodetext_test.cc b/utils/utf8/unicodetext_test.cc
similarity index 93%
rename from util/utf8/unicodetext_test.cc
rename to utils/utf8/unicodetext_test.cc
index 9ec7621..7ebb415 100644
--- a/util/utf8/unicodetext_test.cc
+++ b/utils/utf8/unicodetext_test.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,21 +14,21 @@
  * limitations under the License.
  */
 
-#include "util/utf8/unicodetext.h"
+#include "utils/utf8/unicodetext.h"
 
 #include "gtest/gtest.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 namespace {
 
 class UnicodeTextTest : public testing::Test {
  protected:
   UnicodeTextTest() : empty_text_() {
-    text_.AppendCodepoint(0x1C0);
-    text_.AppendCodepoint(0x4E8C);
-    text_.AppendCodepoint(0xD7DB);
-    text_.AppendCodepoint(0x34);
-    text_.AppendCodepoint(0x1D11E);
+    text_.push_back(0x1C0);
+    text_.push_back(0x4E8C);
+    text_.push_back(0xD7DB);
+    text_.push_back(0x34);
+    text_.push_back(0x1D11E);
   }
 
   UnicodeText empty_text_;
@@ -186,4 +186,4 @@
 }
 
 }  // namespace
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/utf8/unilib-icu.cc b/utils/utf8/unilib-icu.cc
similarity index 97%
rename from util/utf8/unilib-icu.cc
rename to utils/utf8/unilib-icu.cc
index 9e9ce19..852dd54 100644
--- a/util/utf8/unilib-icu.cc
+++ b/utils/utf8/unilib-icu.cc

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
  * limitations under the License.
  */
 
-#include "util/utf8/unilib-icu.h"
+#include "utils/utf8/unilib-icu.h"
 
 #include <utility>
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 bool UniLib::ParseInt32(const UnicodeText& text, int* result) const {
   UErrorCode status = U_ZERO_ERROR;
@@ -290,4 +290,4 @@
       new UniLib::BreakIterator(text));
 }
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3

diff --git a/util/utf8/unilib-icu.h b/utils/utf8/unilib-icu.h
similarity index 92%
rename from util/utf8/unilib-icu.h
rename to utils/utf8/unilib-icu.h
index 8983756..453c6da 100644
--- a/util/utf8/unilib-icu.h
+++ b/utils/utf8/unilib-icu.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,20 +17,20 @@
 // UniLib implementation with the help of ICU. UniLib is basically a wrapper
 // around the ICU functionality.
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_ICU_H_
-#define LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_ICU_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_ICU_H_
+#define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_ICU_H_
 
 #include <memory>
 
-#include "util/base/integral_types.h"
-#include "util/utf8/unicodetext.h"
+#include "utils/base/integral_types.h"
+#include "utils/utf8/unicodetext.h"
 #include "unicode/brkiter.h"
 #include "unicode/errorcode.h"
 #include "unicode/regex.h"
 #include "unicode/uchar.h"
 #include "unicode/unum.h"
 
-namespace libtextclassifier2 {
+namespace libtextclassifier3 {
 
 class UniLib {
  public:
@@ -150,6 +150,6 @@
       const UnicodeText& text) const;
 };
 
-}  // namespace libtextclassifier2
+}  // namespace libtextclassifier3
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_UTF8_UNILIB_ICU_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_ICU_H_

diff --git a/util/calendar/calendar.h b/utils/utf8/unilib.h
similarity index 66%
copy from util/calendar/calendar.h
copy to utils/utf8/unilib.h
index b0cf2e6..9588001 100644
--- a/util/calendar/calendar.h
+++ b/utils/utf8/unilib.h

@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2017 The Android Open Source Project
+ * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,10 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
-#define LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#ifndef LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_
+#define LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_
 
-#include "util/calendar/calendar-icu.h"
+#include "utils/utf8/unilib-icu.h"
+#define INIT_UNILIB_FOR_TESTING(VAR) VAR()
 
-#endif  // LIBTEXTCLASSIFIER_UTIL_CALENDAR_CALENDAR_H_
+#endif  // LIBTEXTCLASSIFIER_UTILS_UTF8_UNILIB_H_

diff --git a/utils/utf8/unilib_test.cc b/utils/utf8/unilib_test.cc
new file mode 100644
index 0000000..96b2c2d
--- /dev/null
+++ b/utils/utf8/unilib_test.cc

@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8/unilib.h"
+
+#include "utils/base/logging.h"
+#include "utils/utf8/unicodetext.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using ::testing::ElementsAre;
+
+class UniLibTest : public ::testing::Test {
+ protected:
+  UniLibTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+TEST_F(UniLibTest, CharacterClassesAscii) {
+  EXPECT_TRUE(unilib_.IsOpeningBracket('('));
+  EXPECT_TRUE(unilib_.IsClosingBracket(')'));
+  EXPECT_FALSE(unilib_.IsWhitespace(')'));
+  EXPECT_TRUE(unilib_.IsWhitespace(' '));
+  EXPECT_FALSE(unilib_.IsDigit(')'));
+  EXPECT_TRUE(unilib_.IsDigit('0'));
+  EXPECT_TRUE(unilib_.IsDigit('9'));
+  EXPECT_FALSE(unilib_.IsUpper(')'));
+  EXPECT_TRUE(unilib_.IsUpper('A'));
+  EXPECT_TRUE(unilib_.IsUpper('Z'));
+  EXPECT_EQ(unilib_.ToLower('A'), 'a');
+  EXPECT_EQ(unilib_.ToLower('Z'), 'z');
+  EXPECT_EQ(unilib_.ToLower(')'), ')');
+  EXPECT_EQ(unilib_.GetPairedBracket(')'), '(');
+  EXPECT_EQ(unilib_.GetPairedBracket('}'), '{');
+}
+
+#ifndef TC3_UNILIB_DUMMY
+TEST_F(UniLibTest, CharacterClassesUnicode) {
+  EXPECT_TRUE(unilib_.IsOpeningBracket(0x0F3C));  // TIBET ANG KHANG GYON
+  EXPECT_TRUE(unilib_.IsClosingBracket(0x0F3D));  // TIBET ANG KHANG GYAS
+  EXPECT_FALSE(unilib_.IsWhitespace(0x23F0));     // ALARM CLOCK
+  EXPECT_TRUE(unilib_.IsWhitespace(0x2003));      // EM SPACE
+  EXPECT_FALSE(unilib_.IsDigit(0xA619));          // VAI SYMBOL JONG
+  EXPECT_TRUE(unilib_.IsDigit(0xA620));           // VAI DIGIT ZERO
+  EXPECT_TRUE(unilib_.IsDigit(0xA629));           // VAI DIGIT NINE
+  EXPECT_FALSE(unilib_.IsDigit(0xA62A));          // VAI SYLLABLE NDOLE MA
+  EXPECT_FALSE(unilib_.IsUpper(0x0211));          // SMALL R WITH DOUBLE GRAVE
+  EXPECT_TRUE(unilib_.IsUpper(0x0212));           // CAPITAL R WITH DOUBLE GRAVE
+  EXPECT_TRUE(unilib_.IsUpper(0x0391));           // GREEK CAPITAL ALPHA
+  EXPECT_TRUE(unilib_.IsUpper(0x03AB));        // GREEK CAPITAL UPSILON W DIAL
+  EXPECT_FALSE(unilib_.IsUpper(0x03AC));       // GREEK SMALL ALPHA WITH TONOS
+  EXPECT_EQ(unilib_.ToLower(0x0391), 0x03B1);  // GREEK ALPHA
+  EXPECT_EQ(unilib_.ToLower(0x03AB), 0x03CB);  // GREEK UPSILON WITH DIALYTIKA
+  EXPECT_EQ(unilib_.ToLower(0x03C0), 0x03C0);  // GREEK SMALL PI
+
+  EXPECT_EQ(unilib_.GetPairedBracket(0x0F3C), 0x0F3D);
+  EXPECT_EQ(unilib_.GetPairedBracket(0x0F3D), 0x0F3C);
+}
+#endif  // ndef TC3_UNILIB_DUMMY
+
+TEST_F(UniLibTest, RegexInterface) {
+  const UnicodeText regex_pattern =
+      UTF8ToUnicodeText("[0-9]+", /*do_copy=*/true);
+  std::unique_ptr<UniLib::RegexPattern> pattern =
+      unilib_.CreateRegexPattern(regex_pattern);
+  const UnicodeText input = UTF8ToUnicodeText("hello 0123", /*do_copy=*/false);
+  int status;
+  std::unique_ptr<UniLib::RegexMatcher> matcher = pattern->Matcher(input);
+  TC3_LOG(INFO) << matcher->Matches(&status);
+  TC3_LOG(INFO) << matcher->Find(&status);
+  TC3_LOG(INFO) << matcher->Start(0, &status);
+  TC3_LOG(INFO) << matcher->End(0, &status);
+  TC3_LOG(INFO) << matcher->Group(0, &status).size_codepoints();
+}
+
+#ifdef TC3_UNILIB_ICU
+TEST_F(UniLibTest, Regex) {
+  // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
+  // test the regex functionality with it to verify we are handling the indices
+  // correctly.
+  const UnicodeText regex_pattern =
+      UTF8ToUnicodeText("[0-9]+😋", /*do_copy=*/false);
+  std::unique_ptr<UniLib::RegexPattern> pattern =
+      unilib_.CreateRegexPattern(regex_pattern);
+  int status;
+  std::unique_ptr<UniLib::RegexMatcher> matcher;
+
+  matcher = pattern->Matcher(UTF8ToUnicodeText("0123😋", /*do_copy=*/false));
+  EXPECT_TRUE(matcher->Matches(&status));
+  EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_TRUE(matcher->Matches(&status));  // Check that the state is reset.
+  EXPECT_TRUE(matcher->ApproximatelyMatches(&status));
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+
+  matcher = pattern->Matcher(
+      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
+  EXPECT_FALSE(matcher->Matches(&status));
+  EXPECT_FALSE(matcher->ApproximatelyMatches(&status));
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+
+  matcher = pattern->Matcher(
+      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
+  EXPECT_TRUE(matcher->Find(&status));
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Start(0, &status), 8);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->End(0, &status), 13);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123😋");
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+}
+#endif  // TC3_UNILIB_ICU
+
+#ifdef TC3_UNILIB_ICU
+TEST_F(UniLibTest, RegexGroups) {
+  // The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
+  // test the regex functionality with it to verify we are handling the indices
+  // correctly.
+  const UnicodeText regex_pattern = UTF8ToUnicodeText(
+      "(?<group1>[0-9])(?<group2>[0-9]+)😋", /*do_copy=*/false);
+  std::unique_ptr<UniLib::RegexPattern> pattern =
+      unilib_.CreateRegexPattern(regex_pattern);
+  int status;
+  std::unique_ptr<UniLib::RegexMatcher> matcher;
+
+  matcher = pattern->Matcher(
+      UTF8ToUnicodeText("hello😋😋 0123😋 world", /*do_copy=*/false));
+  EXPECT_TRUE(matcher->Find(&status));
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Start(0, &status), 8);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Start(1, &status), 8);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Start(2, &status), 9);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->End(0, &status), 13);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->End(1, &status), 9);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->End(2, &status), 12);
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123😋");
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Group(1, &status).ToUTF8String(), "0");
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+  EXPECT_EQ(matcher->Group(2, &status).ToUTF8String(), "123");
+  EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
+}
+#endif  // TC3_UNILIB_ICU
+
+#ifdef TC3_UNILIB_ICU
+
+TEST_F(UniLibTest, BreakIterator) {
+  const UnicodeText text = UTF8ToUnicodeText("some text", /*do_copy=*/false);
+  std::unique_ptr<UniLib::BreakIterator> iterator =
+      unilib_.CreateBreakIterator(text);
+  std::vector<int> break_indices;
+  int break_index = 0;
+  while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
+    break_indices.push_back(break_index);
+  }
+  EXPECT_THAT(break_indices, ElementsAre(4, 5, 9));
+}
+#endif  // TC3_UNILIB_ICU
+
+#ifdef TC3_UNILIB_ICU
+TEST_F(UniLibTest, BreakIterator4ByteUTF8) {
+  const UnicodeText text = UTF8ToUnicodeText("😀😂😋", /*do_copy=*/false);
+  std::unique_ptr<UniLib::BreakIterator> iterator =
+      unilib_.CreateBreakIterator(text);
+  std::vector<int> break_indices;
+  int break_index = 0;
+  while ((break_index = iterator->Next()) != UniLib::BreakIterator::kDone) {
+    break_indices.push_back(break_index);
+  }
+  EXPECT_THAT(break_indices, ElementsAre(1, 2, 3));
+}
+#endif  // TC3_UNILIB_ICU
+
+#ifndef TC3_UNILIB_JAVAICU
+TEST_F(UniLibTest, IntegerParse) {
+  int result;
+  EXPECT_TRUE(
+      unilib_.ParseInt32(UTF8ToUnicodeText("123", /*do_copy=*/false), &result));
+  EXPECT_EQ(result, 123);
+}
+#endif  // ndef TC3_UNILIB_JAVAICU
+
+#ifdef TC3_UNILIB_ICU
+TEST_F(UniLibTest, IntegerParseFullWidth) {
+  int result;
+  // The input string here is full width
+  EXPECT_TRUE(unilib_.ParseInt32(UTF8ToUnicodeText("１２３", /*do_copy=*/false),
+                                 &result));
+  EXPECT_EQ(result, 123);
+}
+#endif  // TC3_UNILIB_ICU
+
+#ifdef TC3_UNILIB_ICU
+TEST_F(UniLibTest, IntegerParseFullWidthWithAlpha) {
+  int result;
+  // The input string here is full width
+  EXPECT_FALSE(unilib_.ParseInt32(UTF8ToUnicodeText("１a３", /*do_copy=*/false),
+                                  &result));
+}
+#endif  // TC3_UNILIB_ICU
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/utils/variant.h b/utils/variant.h
new file mode 100644
index 0000000..ddb0d60
--- /dev/null
+++ b/utils/variant.h

@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_VARIANT_H_
+#define LIBTEXTCLASSIFIER_UTILS_VARIANT_H_
+
+#include <string>
+
+#include "utils/base/integral_types.h"
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+
+// Represents a type-tagged union of different basic types.
+struct Variant {
+  Variant() : type(TYPE_INVALID) {}
+  explicit Variant(int value) : type(TYPE_INT_VALUE), int_value(value) {}
+  explicit Variant(int64 value) : type(TYPE_LONG_VALUE), long_value(value) {}
+  explicit Variant(float value) : type(TYPE_FLOAT_VALUE), float_value(value) {}
+  explicit Variant(double value)
+      : type(TYPE_DOUBLE_VALUE), double_value(value) {}
+  explicit Variant(StringPiece value)
+      : type(TYPE_STRING_VALUE), string_value(value.ToString()) {}
+  explicit Variant(std::string value)
+      : type(TYPE_STRING_VALUE), string_value(value) {}
+  explicit Variant(const char* value)
+      : type(TYPE_STRING_VALUE), string_value(value) {}
+  explicit Variant(bool value) : type(TYPE_BOOL_VALUE), bool_value(value) {}
+  enum Type {
+    TYPE_INVALID = 0,
+    TYPE_INT_VALUE = 1,
+    TYPE_LONG_VALUE = 2,
+    TYPE_FLOAT_VALUE = 3,
+    TYPE_DOUBLE_VALUE = 4,
+    TYPE_BOOL_VALUE = 5,
+    TYPE_STRING_VALUE = 6,
+  };
+  Type type;
+  union {
+    int int_value;
+    int64 long_value;
+    float float_value;
+    double double_value;
+    bool bool_value;
+  };
+  std::string string_value;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_VARIANT_H_
commit	f906a67732f746658e02ec39008053d3fe2bac7e	[log] [tgz]
author	Xin Li <delphij@google.com>	Mon Dec 10 12:50:41 2018 -0800
committer	android-build-merger <android-build-merger@google.com>	Mon Dec 10 12:50:41 2018 -0800
tree	03d9a22209e129e14999a1c0d93bbb2a056bc880
parent	296b7b68be844bb90e55b83346ce24af8b729164 [diff]
parent	a8f81e51fc12fe38c245d8f43d77e1d2b686a664 [diff]