Export libtextclassifier am: 0b8b33256f am: ec6a59a0ff am: 8feb63bdaf Original change: https://googleplex-android-review.googlesource.com/c/platform/external/libtextclassifier/+/10700569 Change-Id: I857ad04e1ec28acd85fa91839ce712aca8a859e6

commit: 4fe16ac05cfbce74aa726061d6b1b578b14ef0a6 [log] [tgz]
author: Tony Mak <tonymak@google.com> Tue Aug 25 01:06:14 2020 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> Tue Aug 25 01:06:14 2020 +0000
tree: d1a8518de75e5c9244b19ebc0b8ea8af456bddcf
parent: 282003139f2535ac1fa7d68abeaeeeff0aafb774 [diff]
parent: 8feb63bdafaec145dab3f38c3418491e3bb7a3c7 [diff]
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 2b02610..3c8e10b 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING

@@ -7,6 +7,9 @@
           "exclude-annotation": "androidx.test.filters.FlakyTest"
         }
       ]
+    },
+    {
+      "name": "libtextclassifier_tests"
     }
   ]
 }
\ No newline at end of file

diff --git a/java/res/values-es-rUS/strings.xml b/java/res/values-b+es+419/strings.xml
similarity index 100%
rename from java/res/values-es-rUS/strings.xml
rename to java/res/values-b+es+419/strings.xml


diff --git a/java/res/values-in/strings.xml b/java/res/values-b+sr+Latn/strings.xml
similarity index 100%
copy from java/res/values-in/strings.xml
copy to java/res/values-b+sr+Latn/strings.xml


diff --git a/java/res/values-in/strings.xml b/java/res/values-id/strings.xml
similarity index 100%
rename from java/res/values-in/strings.xml
rename to java/res/values-id/strings.xml


diff --git a/java/res/values-nb/strings.xml b/java/res/values-no/strings.xml
similarity index 100%
rename from java/res/values-nb/strings.xml
rename to java/res/values-no/strings.xml


diff --git a/java/res/values-zh-rCN/strings.xml b/java/res/values-zh-rCN/strings.xml
deleted file mode 100755
index 56d9f67..0000000
--- a/java/res/values-zh-rCN/strings.xml
+++ /dev/null

@@ -1,3 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?>
-<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
-</resources>

diff --git a/java/res/values-in/strings.xml b/java/res/values-zh/strings.xml
similarity index 100%
copy from java/res/values-in/strings.xml
copy to java/res/values-zh/strings.xml


diff --git a/java/src/com/android/textclassifier/ActionsSuggestionsHelper.java b/java/src/com/android/textclassifier/ActionsSuggestionsHelper.java
index 730a2d8..a51c95d 100644
--- a/java/src/com/android/textclassifier/ActionsSuggestionsHelper.java
+++ b/java/src/com/android/textclassifier/ActionsSuggestionsHelper.java

@@ -31,18 +31,18 @@
 import com.android.textclassifier.common.base.TcLog;
 import com.android.textclassifier.common.intent.LabeledIntent;
 import com.android.textclassifier.common.intent.TemplateIntentFactory;
-import com.android.textclassifier.common.statsd.ResultIdUtils;
+import com.android.textclassifier.common.logging.ResultIdUtils;
 import com.google.android.textclassifier.ActionsSuggestionsModel;
 import com.google.android.textclassifier.RemoteActionTemplate;
 import com.google.common.base.Equivalence;
 import com.google.common.base.Equivalence.Wrapper;
+import com.google.common.base.Optional;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
-import java.util.Optional;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
@@ -107,21 +107,16 @@
   public static String createResultId(
       Context context,
       List<ConversationActions.Message> messages,
-      ModelFile actionsModel,
-      Optional<ModelFile> annotatorModel) {
+      Optional<ModelFile> actionsModel,
+      Optional<ModelFile> annotatorModel,
+      Optional<ModelFile> langIdModel) {
     int hash =
         Objects.hash(
             messages.stream().mapToInt(ActionsSuggestionsHelper::hashMessage),
             context.getPackageName(),
             System.currentTimeMillis());
-    List<ResultIdUtils.ModelInfo> modelInfos = new ArrayList<>();
-    modelInfos.add(createModelInfo(actionsModel));
-    annotatorModel.ifPresent(model -> modelInfos.add(createModelInfo(model)));
-    return ResultIdUtils.createId(modelInfos, hash);
-  }
-
-  private static ResultIdUtils.ModelInfo createModelInfo(ModelFile modelFile) {
-    return new ResultIdUtils.ModelInfo(modelFile.getVersion(), modelFile.getSupportedLocales());
+    return ResultIdUtils.createId(
+        hash, ModelFile.toModelInfos(actionsModel, annotatorModel, langIdModel));
   }
 
   /** Generated labeled intent from an action suggestion and return the resolved result. */

diff --git a/java/src/com/android/textclassifier/ModelFileManager.java b/java/src/com/android/textclassifier/ModelFileManager.java
index eb1a79d..a6f64d8 100644
--- a/java/src/com/android/textclassifier/ModelFileManager.java
+++ b/java/src/com/android/textclassifier/ModelFileManager.java

@@ -21,6 +21,8 @@
 import android.text.TextUtils;
 import androidx.annotation.GuardedBy;
 import com.android.textclassifier.common.base.TcLog;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Splitter;
 import com.google.common.collect.ImmutableList;
@@ -28,15 +30,16 @@
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Locale;
 import java.util.Objects;
-import java.util.StringJoiner;
 import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 
 /** Manages model files that are listed by the model files supplier. */
@@ -283,19 +286,26 @@
       return false;
     }
 
+    public ModelInfo toModelInfo() {
+      return new ModelInfo(getVersion(), supportedLocalesStr);
+    }
+
     @Override
     public String toString() {
-      final StringJoiner localesJoiner = new StringJoiner(",");
-      for (Locale locale : supportedLocales) {
-        localesJoiner.add(locale.toLanguageTag());
-      }
       return String.format(
           Locale.US,
           "ModelFile { path=%s name=%s version=%d locales=%s }",
           getPath(),
           getName(),
           version,
-          localesJoiner);
+          supportedLocalesStr);
+    }
+
+    public static ImmutableList<Optional<ModelInfo>> toModelInfos(
+        Optional<ModelFile>... modelFiles) {
+      return Arrays.stream(modelFiles)
+          .map(modelFile -> modelFile.transform(ModelFile::toModelInfo))
+          .collect(Collectors.collectingAndThen(Collectors.toList(), ImmutableList::copyOf));
     }
   }
 }

diff --git a/java/src/com/android/textclassifier/TextClassifierImpl.java b/java/src/com/android/textclassifier/TextClassifierImpl.java
index 014a8c9..5c028ef 100644
--- a/java/src/com/android/textclassifier/TextClassifierImpl.java
+++ b/java/src/com/android/textclassifier/TextClassifierImpl.java

@@ -42,11 +42,13 @@
 import androidx.annotation.GuardedBy;
 import androidx.annotation.WorkerThread;
 import androidx.core.util.Pair;
+import com.android.textclassifier.ModelFileManager.ModelFile;
 import com.android.textclassifier.common.base.TcLog;
 import com.android.textclassifier.common.intent.LabeledIntent;
 import com.android.textclassifier.common.intent.TemplateIntentFactory;
+import com.android.textclassifier.common.logging.ResultIdUtils;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
 import com.android.textclassifier.common.statsd.GenerateLinksLogger;
-import com.android.textclassifier.common.statsd.ResultIdUtils;
 import com.android.textclassifier.common.statsd.SelectionEventConverter;
 import com.android.textclassifier.common.statsd.TextClassificationSessionIdConverter;
 import com.android.textclassifier.common.statsd.TextClassifierEventConverter;
@@ -55,7 +57,9 @@
 import com.google.android.textclassifier.ActionsSuggestionsModel;
 import com.google.android.textclassifier.AnnotatorModel;
 import com.google.android.textclassifier.LangIdModel;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -67,7 +71,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
-import java.util.Optional;
 import javax.annotation.Nullable;
 
 /**
@@ -216,9 +219,9 @@
           for (int i = 0; i < size; i++) {
             tsBuilder.setEntityType(results[i].getCollection(), results[i].getScore());
           }
-          return tsBuilder
-              .setId(createId(string, request.getStartIndex(), request.getEndIndex()))
-              .build();
+          final String resultId =
+              createAnnotatorId(string, request.getStartIndex(), request.getEndIndex());
+          return tsBuilder.setId(resultId).build();
         } else {
           // We can not trust the result. Log the issue and ignore the result.
           TcLog.d(TAG, "Got bad indices for input text. Ignoring result.");
@@ -335,8 +338,20 @@
           request.getCallingPackageName() == null
               ? context.getPackageName() // local (in process) TC.
               : request.getCallingPackageName();
+      Optional<ModelInfo> annotatorModelInfo;
+      Optional<ModelInfo> langIdModelInfo;
+      synchronized (lock) {
+        annotatorModelInfo =
+            Optional.fromNullable(annotatorModelInUse).transform(ModelFile::toModelInfo);
+        langIdModelInfo = Optional.fromNullable(langIdModelInUse).transform(ModelFile::toModelInfo);
+      }
       generateLinksLogger.logGenerateLinks(
-          request.getText(), links, callingPackageName, endTimeMs - startTimeMs);
+          request.getText(),
+          links,
+          callingPackageName,
+          endTimeMs - startTimeMs,
+          annotatorModelInfo,
+          langIdModelInfo);
       return links;
     } catch (Throwable t) {
       // Avoid throwing from this method. Log the error.
@@ -482,8 +497,9 @@
           ActionsSuggestionsHelper.createResultId(
               context,
               request.getConversation(),
-              actionModelInUse,
-              Optional.ofNullable(annotatorModelInUse));
+              Optional.fromNullable(actionModelInUse),
+              Optional.fromNullable(annotatorModelInUse),
+              Optional.fromNullable(langIdModelInUse));
       return new ConversationActions(conversationActions, resultId);
     }
   }
@@ -533,7 +549,7 @@
     synchronized (lock) {
       final ModelFileManager.ModelFile bestModel = langIdModelFileManager.findBestModelFile(null);
       if (bestModel == null) {
-        return Optional.empty();
+        return Optional.absent();
       }
       if (langIdImpl == null || !Objects.equals(langIdModelInUse, bestModel)) {
         TcLog.d(TAG, "Loading " + bestModel);
@@ -544,7 +560,7 @@
                   new File(bestModel.getPath()), ParcelFileDescriptor.MODE_READ_ONLY);
         } catch (FileNotFoundException e) {
           TcLog.e(TAG, "Failed to open the LangID model file", e);
-          return Optional.empty();
+          return Optional.absent();
         }
         try {
           if (pfd != null) {
@@ -588,15 +604,15 @@
     }
   }
 
-  private String createId(String text, int start, int end) {
+  private String createAnnotatorId(String text, int start, int end) {
     synchronized (lock) {
       return ResultIdUtils.createId(
           context,
           text,
           start,
           end,
-          annotatorModelInUse.getVersion(),
-          annotatorModelInUse.getSupportedLocales());
+          ModelFile.toModelInfos(
+              Optional.fromNullable(annotatorModelInUse), Optional.fromNullable(langIdModelInUse)));
     }
   }
 
@@ -656,13 +672,13 @@
       actionIntents.add(intent);
     }
     Bundle extras = new Bundle();
-    langId.ifPresent(
-        model -> {
-          maybeCreateExtrasForTranslate(actionIntents, model)
-              .ifPresent(
-                  foreignLanguageExtra ->
-                      ExtrasUtils.putForeignLanguageExtra(extras, foreignLanguageExtra));
-        });
+    Optional<Bundle> foreignLanguageExtra =
+        langId
+            .transform(model -> maybeCreateExtrasForTranslate(actionIntents, model))
+            .or(Optional.<Bundle>absent());
+    if (foreignLanguageExtra.isPresent()) {
+      ExtrasUtils.putForeignLanguageExtra(extras, foreignLanguageExtra.get());
+    }
     if (actionIntents.stream().anyMatch(Objects::nonNull)) {
       ArrayList<Intent> strippedIntents =
           actionIntents.stream()
@@ -672,7 +688,8 @@
     }
     ExtrasUtils.putEntities(extras, classifications);
     builder.setExtras(extras);
-    return builder.setId(createId(text, start, end)).build();
+    String resultId = createAnnotatorId(text, start, end);
+    return builder.setId(resultId).build();
   }
 
   private static OnClickListener createIntentOnClickListener(final PendingIntent intent) {
@@ -689,16 +706,16 @@
   private static Optional<Bundle> maybeCreateExtrasForTranslate(
       List<Intent> intents, LangIdModel langId) {
     Optional<Intent> translateIntent =
-        intents.stream()
+        FluentIterable.from(intents)
             .filter(Objects::nonNull)
             .filter(intent -> Intent.ACTION_TRANSLATE.equals(intent.getAction()))
-            .findFirst();
+            .first();
     if (!translateIntent.isPresent()) {
-      return Optional.empty();
+      return Optional.absent();
     }
     Pair<String, Float> topLanguageWithScore = ExtrasUtils.getTopLanguage(translateIntent.get());
     if (topLanguageWithScore == null) {
-      return Optional.empty();
+      return Optional.absent();
     }
     return Optional.of(
         ExtrasUtils.createForeignLanguageExtra(
@@ -708,13 +725,13 @@
   private ImmutableList<String> detectLanguageTags(
       Optional<LangIdModel> langId, CharSequence text) {
     return langId
-        .map(
+        .transform(
             model -> {
               float threshold = getLangIdThreshold(model);
               EntityConfidence languagesConfidence = detectLanguages(model, text, threshold);
               return ImmutableList.copyOf(languagesConfidence.getEntities());
             })
-        .orElse(ImmutableList.of());
+        .or(ImmutableList.of());
   }
 
   /**

diff --git a/java/src/com/android/textclassifier/common/base/LocaleCompat.java b/java/src/com/android/textclassifier/common/base/LocaleCompat.java
new file mode 100644
index 0000000..baaaf67
--- /dev/null
+++ b/java/src/com/android/textclassifier/common/base/LocaleCompat.java

@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.textclassifier.common.base;
+
+import android.content.Context;
+import android.os.Build;
+import java.util.Locale;
+
+/** Helper for accessing locale related stuff that works across different platform versions. */
+public final class LocaleCompat {
+
+  private LocaleCompat() {}
+
+  /**
+   * Returns a well-formed IETF BCP 47 language tag representing this locale. In older platforms,
+   * only the ISO 639 language code will be returned.
+   *
+   * @see Locale#toLanguageTag()
+   */
+  public static String toLanguageTag(Locale locale) {
+    if (Build.VERSION.SDK_INT >= 24) {
+      return Api24Impl.toLanguageTag(locale);
+    }
+    return ApiBaseImpl.toLanguageTag(locale);
+  }
+
+  /** Returns the language tags in string for the current resources configuration. */
+  public static String getResourceLanguageTags(Context context) {
+    if (Build.VERSION.SDK_INT >= 24) {
+      return Api24Impl.getResourceLanguageTags(context);
+    } else if (Build.VERSION.SDK_INT >= 21) {
+      return Api21Impl.getResourceLanguageTags(context);
+    }
+    return ApiBaseImpl.getResourceLanguageTags(context);
+  }
+
+  private static class Api24Impl {
+    private Api24Impl() {}
+
+    static String toLanguageTag(Locale locale) {
+      return locale.toLanguageTag();
+    }
+
+    static String getResourceLanguageTags(Context context) {
+      return context.getResources().getConfiguration().getLocales().toLanguageTags();
+    }
+  }
+
+  private static class Api21Impl {
+    private Api21Impl() {}
+
+    static String getResourceLanguageTags(Context context) {
+      return context.getResources().getConfiguration().locale.toLanguageTag();
+    }
+  }
+
+  private static class ApiBaseImpl {
+    private ApiBaseImpl() {}
+
+    static String toLanguageTag(Locale locale) {
+      return locale.getLanguage();
+    }
+
+    static String getResourceLanguageTags(Context context) {
+      return context.getResources().getConfiguration().locale.getLanguage();
+    }
+  }
+}

diff --git a/java/src/com/android/textclassifier/common/logging/ResultIdUtils.java b/java/src/com/android/textclassifier/common/logging/ResultIdUtils.java
new file mode 100644
index 0000000..dae0442
--- /dev/null
+++ b/java/src/com/android/textclassifier/common/logging/ResultIdUtils.java

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.textclassifier.common.logging;
+
+import android.content.Context;
+import android.text.TextUtils;
+import com.android.textclassifier.common.base.LocaleCompat;
+import com.google.common.base.Joiner;
+import com.google.common.base.Objects;
+import com.google.common.base.Optional;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import javax.annotation.Nullable;
+
+/** Provide utils to generate and parse the result id. */
+public final class ResultIdUtils {
+  private static final String CLASSIFIER_ID = "androidtc";
+  private static final String SEPARATOR_MODEL_NAME = ";";
+  private static final String SEPARATOR_LOCALES = ",";
+  private static final Pattern EXTRACT_MODEL_NAME_FROM_RESULT_ID =
+      Pattern.compile("^[^|]*\\|([^|]*)\\|[^|]*$");
+
+  /** Creates a string id that may be used to identify a TextClassifier result. */
+  public static String createId(
+      Context context, String text, int start, int end, List<Optional<ModelInfo>> modelInfos) {
+    Preconditions.checkNotNull(text);
+    Preconditions.checkNotNull(context);
+    Preconditions.checkNotNull(modelInfos);
+    final int hash = Objects.hashCode(text, start, end, context.getPackageName());
+    return createId(hash, modelInfos);
+  }
+
+  /** Creates a string id that may be used to identify a TextClassifier result. */
+  public static String createId(int hash, List<Optional<ModelInfo>> modelInfos) {
+    Preconditions.checkNotNull(modelInfos);
+    final List<String> modelNames = new ArrayList<>();
+    for (Optional<ModelInfo> modelInfo : modelInfos) {
+      modelNames.add(modelInfo.transform(ModelInfo::toModelName).or(""));
+    }
+    return String.format(
+        Locale.US,
+        "%s|%s|%d",
+        CLASSIFIER_ID,
+        Joiner.on(SEPARATOR_MODEL_NAME).join(modelNames),
+        hash);
+  }
+
+  /** Returns if the result id was generated from the default text classifier. */
+  public static boolean isFromDefaultTextClassifier(String resultId) {
+    return resultId.startsWith(CLASSIFIER_ID + '|');
+  }
+
+  /** Returns all the model names encoded in the signature. */
+  public static ImmutableList<String> getModelNames(@Nullable String signature) {
+    if (TextUtils.isEmpty(signature)) {
+      return ImmutableList.of();
+    }
+    Matcher matcher = EXTRACT_MODEL_NAME_FROM_RESULT_ID.matcher(signature);
+    if (!matcher.find()) {
+      return ImmutableList.of();
+    }
+    return ImmutableList.copyOf(Splitter.on(SEPARATOR_MODEL_NAME).splitToList(matcher.group(1)));
+  }
+
+  private ResultIdUtils() {}
+
+  /** Model information of a model file. */
+  public static class ModelInfo {
+    private final String modelName;
+
+    public ModelInfo(int version, List<Locale> locales) {
+      this(version, createSupportedLanguageTagsString(locales));
+    }
+
+    /**
+     * Creates a {@link ModelInfo} object.
+     *
+     * @param version model version
+     * @param supportedLanguageTags a comma-separated string of bcp47 language tags of supported
+     *     languages
+     */
+    public ModelInfo(int version, String supportedLanguageTags) {
+      this.modelName = createModelName(version, supportedLanguageTags);
+    }
+
+    private static String createSupportedLanguageTagsString(List<Locale> locales) {
+      List<String> languageTags = new ArrayList<>();
+      for (Locale locale : locales) {
+        languageTags.add(LocaleCompat.toLanguageTag(locale));
+      }
+      return Joiner.on(SEPARATOR_LOCALES).join(languageTags);
+    }
+
+    private static String createModelName(int version, String supportedLanguageTags) {
+      return String.format(Locale.US, "%s_v%d", supportedLanguageTags, version);
+    }
+
+    /** Returns a string representation of the model info. */
+    public String toModelName() {
+      return modelName;
+    }
+  }
+}

diff --git a/java/src/com/android/textclassifier/common/statsd/GenerateLinksLogger.java b/java/src/com/android/textclassifier/common/statsd/GenerateLinksLogger.java
index 80321f7..c132749 100644
--- a/java/src/com/android/textclassifier/common/statsd/GenerateLinksLogger.java
+++ b/java/src/com/android/textclassifier/common/statsd/GenerateLinksLogger.java

@@ -22,8 +22,10 @@
 import android.view.textclassifier.TextLinks;
 import androidx.collection.ArrayMap;
 import com.android.textclassifier.common.base.TcLog;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
 import com.android.textclassifier.common.logging.TextClassifierEvent;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Optional;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import java.util.Locale;
@@ -66,7 +68,12 @@
 
   /** Logs statistics about a call to generateLinks. */
   public void logGenerateLinks(
-      CharSequence text, TextLinks links, String callingPackageName, long latencyMs) {
+      CharSequence text,
+      TextLinks links,
+      String callingPackageName,
+      long latencyMs,
+      Optional<ModelInfo> annotatorModel,
+      Optional<ModelInfo> langIdModel) {
     Preconditions.checkNotNull(text);
     Preconditions.checkNotNull(links);
     Preconditions.checkNotNull(callingPackageName);
@@ -92,7 +99,8 @@
     }
 
     final String callId = randomUuidSupplier.get();
-    writeStats(callId, callingPackageName, null, totalStats, text, latencyMs);
+    writeStats(
+        callId, callingPackageName, null, totalStats, text, latencyMs, annotatorModel, langIdModel);
     // Sort the entity types to ensure the logging order is deterministic.
     ImmutableList<String> sortedEntityTypes =
         ImmutableList.sortedCopyOf(perEntityTypeStats.keySet());
@@ -103,7 +111,9 @@
           entityType,
           perEntityTypeStats.get(entityType),
           text,
-          latencyMs);
+          latencyMs,
+          annotatorModel,
+          langIdModel);
     }
   }
 
@@ -127,13 +137,17 @@
       @Nullable String entityType,
       LinkifyStats stats,
       CharSequence text,
-      long latencyMs) {
+      long latencyMs,
+      Optional<ModelInfo> annotatorModel,
+      Optional<ModelInfo> langIdModel) {
+    String annotatorModelName = annotatorModel.transform(ModelInfo::toModelName).or("");
+    String langIdModelName = langIdModel.transform(ModelInfo::toModelName).or("");
     StatsEvent statsEvent =
         StatsEvent.newBuilder()
             .setAtomId(TextClassifierEventLogger.TEXT_LINKIFY_EVENT_ATOM_ID)
             .writeString(callId)
             .writeInt(TextClassifierEvent.TYPE_LINKS_GENERATED)
-            .writeString(/* modelName */ null)
+            .writeString(annotatorModelName)
             .writeInt(TextClassifierEventLogger.WidgetType.WIDGET_TYPE_UNKNOWN)
             .writeInt(/* eventIndex */ 0)
             .writeString(entityType)
@@ -142,6 +156,7 @@
             .writeInt(text.length())
             .writeLong(latencyMs)
             .writeString(callingPackageName)
+            .writeString(langIdModelName)
             .usePooledBuffer()
             .build();
     StatsLog.write(statsEvent);
@@ -151,14 +166,16 @@
           LOG_TAG,
           String.format(
               Locale.US,
-              "%s:%s %d links (%d/%d chars) %dms %s",
+              "%s:%s %d links (%d/%d chars) %dms %s annotator=%s langid=%s",
               callId,
               entityType,
               stats.numLinks,
               stats.numLinksTextLength,
               text.length(),
               latencyMs,
-              callingPackageName));
+              callingPackageName,
+              annotatorModelName,
+              langIdModelName));
     }
   }
 

diff --git a/java/src/com/android/textclassifier/common/statsd/ResultIdUtils.java b/java/src/com/android/textclassifier/common/statsd/ResultIdUtils.java
deleted file mode 100644
index fc085b9..0000000
--- a/java/src/com/android/textclassifier/common/statsd/ResultIdUtils.java
+++ /dev/null

@@ -1,106 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.textclassifier.common.statsd;
-
-import android.content.Context;
-import android.text.TextUtils;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Splitter;
-import com.google.common.collect.ImmutableList;
-import java.util.List;
-import java.util.Locale;
-import java.util.Objects;
-import java.util.Optional;
-import java.util.StringJoiner;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import javax.annotation.Nullable;
-
-/** Provide utils to generate and parse the result id. */
-public final class ResultIdUtils {
-  private static final String CLASSIFIER_ID = "androidtc";
-  private static final String SEPARATOR_MODEL_NAME = ";";
-  private static final String SEPARATOR_LOCALES = ",";
-  private static final Pattern EXTRACT_MODEL_NAME_FROM_RESULT_ID =
-      Pattern.compile("^[^|]*\\|([^|]*)\\|[^|]*$");
-
-  /** Creates a string id that may be used to identify a TextClassifier result. */
-  public static String createId(
-      Context context,
-      String text,
-      int start,
-      int end,
-      int modelVersion,
-      List<Locale> modelLocales) {
-    Preconditions.checkNotNull(text);
-    Preconditions.checkNotNull(context);
-    Preconditions.checkNotNull(modelLocales);
-    final int hash = Objects.hash(text, start, end, context.getPackageName());
-    return createId(ImmutableList.of(new ModelInfo(modelVersion, modelLocales)), hash);
-  }
-
-  /** Creates a string id that may be used to identify a TextClassifier result. */
-  public static String createId(List<ModelInfo> modelInfos, int hash) {
-    Preconditions.checkNotNull(modelInfos);
-    final StringJoiner modelJoiner = new StringJoiner(SEPARATOR_MODEL_NAME);
-    for (ModelInfo modelInfo : modelInfos) {
-      modelJoiner.add(modelInfo.toModelName());
-    }
-    return String.format(Locale.US, "%s|%s|%d", CLASSIFIER_ID, modelJoiner, hash);
-  }
-
-  /** Returns the first model name encoded in the signature. */
-  static String getModelName(@Nullable String signature) {
-    return Optional.ofNullable(signature)
-        .flatMap(s -> getModelNames(s).stream().findFirst())
-        .orElse("");
-  }
-
-  /** Returns all the model names encoded in the signature. */
-  static ImmutableList<String> getModelNames(@Nullable String signature) {
-    if (TextUtils.isEmpty(signature)) {
-      return ImmutableList.of();
-    }
-    Matcher matcher = EXTRACT_MODEL_NAME_FROM_RESULT_ID.matcher(signature);
-    if (!matcher.find()) {
-      return ImmutableList.of();
-    }
-    return ImmutableList.copyOf(Splitter.on(SEPARATOR_MODEL_NAME).splitToList(matcher.group(1)));
-  }
-
-  private ResultIdUtils() {}
-
-  /** Model information of a model file. */
-  public static class ModelInfo {
-    private final int version;
-    private final ImmutableList<Locale> locales;
-
-    public ModelInfo(int version, List<Locale> locales) {
-      this.version = version;
-      this.locales = ImmutableList.copyOf(locales);
-    }
-
-    /** Returns a string representation of the model info. */
-    private String toModelName() {
-      final StringJoiner localesJoiner = new StringJoiner(SEPARATOR_LOCALES);
-      for (Locale locale : locales) {
-        localesJoiner.add(locale.toLanguageTag());
-      }
-      return String.format(Locale.US, "%s_v%d", localesJoiner, version);
-    }
-  }
-}

diff --git a/java/src/com/android/textclassifier/common/statsd/TextClassifierEventLogger.java b/java/src/com/android/textclassifier/common/statsd/TextClassifierEventLogger.java
index 43b9131..41f546c 100644
--- a/java/src/com/android/textclassifier/common/statsd/TextClassifierEventLogger.java
+++ b/java/src/com/android/textclassifier/common/statsd/TextClassifierEventLogger.java

@@ -16,15 +16,20 @@
 
 package com.android.textclassifier.common.statsd;
 
+import static com.google.common.base.Charsets.UTF_8;
+import static com.google.common.base.Strings.nullToEmpty;
+
 import android.util.StatsEvent;
 import android.util.StatsLog;
 import android.view.textclassifier.TextClassifier;
 import com.android.textclassifier.common.base.TcLog;
+import com.android.textclassifier.common.logging.ResultIdUtils;
 import com.android.textclassifier.common.logging.TextClassificationContext;
 import com.android.textclassifier.common.logging.TextClassificationSessionId;
 import com.android.textclassifier.common.logging.TextClassifierEvent;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
+import com.google.common.hash.Hashing;
 import java.util.List;
 import javax.annotation.Nullable;
 
@@ -63,12 +68,13 @@
   private static void logTextSelectionEvent(
       @Nullable TextClassificationSessionId sessionId,
       TextClassifierEvent.TextSelectionEvent event) {
+    ImmutableList<String> modelNames = getModelNames(event);
     StatsEvent statsEvent =
         StatsEvent.newBuilder()
             .setAtomId(TEXT_SELECTION_EVENT_ATOM_ID)
             .writeString(sessionId == null ? null : sessionId.getValue())
-            .writeInt(event.getEventType())
-            .writeString(getModelName(event))
+            .writeInt(getEventType(event))
+            .writeString(getItemAt(modelNames, /* index= */ 0, /* defaultValue= */ null))
             .writeInt(getWidgetType(event))
             .writeInt(event.getEventIndex())
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 0))
@@ -77,19 +83,32 @@
             .writeInt(event.getRelativeSuggestedWordStartIndex())
             .writeInt(event.getRelativeSuggestedWordEndIndex())
             .writeString(getPackageName(event))
+            .writeString(getItemAt(modelNames, /* index= */ 1, /* defaultValue= */ null))
             .usePooledBuffer()
             .build();
     StatsLog.write(statsEvent);
   }
 
+  private static int getEventType(TextClassifierEvent.TextSelectionEvent event) {
+    if (event.getEventType() == TextClassifierEvent.TYPE_AUTO_SELECTION) {
+      if (ResultIdUtils.isFromDefaultTextClassifier(event.getResultId())) {
+        return event.getRelativeWordEndIndex() - event.getRelativeWordStartIndex() > 1
+            ? TextClassifierEvent.TYPE_SMART_SELECTION_MULTI
+            : TextClassifierEvent.TYPE_SMART_SELECTION_SINGLE;
+      }
+    }
+    return event.getEventType();
+  }
+
   private static void logTextLinkifyEvent(
       TextClassificationSessionId sessionId, TextClassifierEvent.TextLinkifyEvent event) {
+    ImmutableList<String> modelNames = getModelNames(event);
     StatsEvent statsEvent =
         StatsEvent.newBuilder()
             .setAtomId(TEXT_LINKIFY_EVENT_ATOM_ID)
             .writeString(sessionId == null ? null : sessionId.getValue())
             .writeInt(event.getEventType())
-            .writeString(getModelName(event))
+            .writeString(getItemAt(modelNames, /* index= */ 0, /* defaultValue= */ null))
             .writeInt(getWidgetType(event))
             .writeInt(event.getEventIndex())
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 0))
@@ -98,6 +117,7 @@
             .writeInt(/* textLength */ 0)
             .writeLong(/* latencyInMillis */ 0L)
             .writeString(getPackageName(event))
+            .writeString(getItemAt(modelNames, /* index= */ 1, /* defaultValue= */ null))
             .usePooledBuffer()
             .build();
     StatsLog.write(statsEvent);
@@ -106,24 +126,26 @@
   private static void logConversationActionsEvent(
       @Nullable TextClassificationSessionId sessionId,
       TextClassifierEvent.ConversationActionsEvent event) {
-    ImmutableList<String> modelNames = ResultIdUtils.getModelNames(event.getResultId());
-
+    String resultId = nullToEmpty(event.getResultId());
+    ImmutableList<String> modelNames = ResultIdUtils.getModelNames(resultId);
     StatsEvent statsEvent =
         StatsEvent.newBuilder()
             .setAtomId(CONVERSATION_ACTIONS_EVENT_ATOM_ID)
+            // TODO: Update ExtServices to set the session id.
             .writeString(
                 sessionId == null
-                    ? event.getResultId() // TODO: Update ExtServices to set the session id.
+                    ? Hashing.goodFastHash(64).hashString(resultId, UTF_8).toString()
                     : sessionId.getValue())
             .writeInt(event.getEventType())
-            .writeString(getItemAt(modelNames, 0, null))
+            .writeString(getItemAt(modelNames, /* index= */ 0, /* defaultValue= */ null))
             .writeInt(getWidgetType(event))
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 0))
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 1))
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 2))
             .writeFloat(getFloatAt(event.getScores(), /* index= */ 0))
             .writeString(getPackageName(event))
-            .writeString(getItemAt(modelNames, 1, null))
+            .writeString(getItemAt(modelNames, /* index= */ 1, /* defaultValue= */ null))
+            .writeString(getItemAt(modelNames, /* index= */ 2, /* defaultValue= */ null))
             .usePooledBuffer()
             .build();
     StatsLog.write(statsEvent);
@@ -137,7 +159,7 @@
             .setAtomId(LANGUAGE_DETECTION_EVENT_ATOM_ID)
             .writeString(sessionId == null ? null : sessionId.getValue())
             .writeInt(event.getEventType())
-            .writeString(getModelName(event))
+            .writeString(getItemAt(getModelNames(event), /* index= */ 0, /* defaultValue= */ null))
             .writeInt(getWidgetType(event))
             .writeString(getItemAt(event.getEntityTypes(), /* index= */ 0))
             .writeFloat(getFloatAt(event.getScores(), /* index= */ 0))
@@ -190,11 +212,11 @@
     return array[index];
   }
 
-  private static String getModelName(TextClassifierEvent event) {
+  private static ImmutableList<String> getModelNames(TextClassifierEvent event) {
     if (event.getModelName() != null) {
-      return event.getModelName();
+      return ImmutableList.of(event.getModelName());
     }
-    return ResultIdUtils.getModelName(event.getResultId());
+    return ResultIdUtils.getModelNames(event.getResultId());
   }
 
   @Nullable

diff --git a/java/tests/instrumentation/AndroidManifest.xml b/java/tests/instrumentation/AndroidManifest.xml
index 129b909..4964caf 100644
--- a/java/tests/instrumentation/AndroidManifest.xml
+++ b/java/tests/instrumentation/AndroidManifest.xml

@@ -1,10 +1,9 @@
 <?xml version="1.0" encoding="utf-8"?>
 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
-    package="com.android.textclassifier.common.tests"
-    android:versionCode="1"
-    android:versionName="1.0">
+    package="com.android.textclassifier.tests">
 
-  <uses-sdk android:minSdkVersion="16"/>
+  <uses-sdk android:minSdkVersion="29" android:targetSdkVersion="30"/>
+  <uses-permission android:name="android.permission.QUERY_ALL_PACKAGES" />
 
   <application>
     <uses-library android:name="android.test.runner"/>
@@ -12,5 +11,5 @@
 
   <instrumentation
       android:name="androidx.test.runner.AndroidJUnitRunner"
-      android:targetPackage="com.android.textclassifier.common.tests"/>
+      android:targetPackage="com.android.textclassifier.tests"/>
 </manifest>

diff --git a/java/tests/instrumentation/AndroidTest.xml b/java/tests/instrumentation/AndroidTest.xml
new file mode 100644
index 0000000..e02a338
--- /dev/null
+++ b/java/tests/instrumentation/AndroidTest.xml

@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2020 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<!-- This test config file is auto-generated. -->
+<configuration description="Runs TextClassifierServiceTest.">
+    <option name="test-suite-tag" value="apct" />
+    <option name="test-suite-tag" value="apct-instrumentation" />
+    <target_preparer class="com.android.tradefed.targetprep.suite.SuiteApkInstaller">
+        <option name="cleanup-apks" value="true" />
+        <option name="test-file-name" value="TextClassifierServiceTest.apk" />
+    </target_preparer>
+
+    <test class="com.android.tradefed.testtype.AndroidJUnitTest" >
+        <option name="package" value="com.android.textclassifier.tests" />
+        <option name="runner" value="androidx.test.runner.AndroidJUnitRunner" />
+    </test>
+
+    <object type="module_controller" class="com.android.tradefed.testtype.suite.module.MainlineTestModuleController">
+        <option name="mainline-module-package-name" value="com.google.android.extservices" />
+    </object>
+</configuration>

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/ModelFileManagerTest.java b/java/tests/instrumentation/src/com/android/textclassifier/ModelFileManagerTest.java
index ec8d5a5..06d47d6 100644
--- a/java/tests/instrumentation/src/com/android/textclassifier/ModelFileManagerTest.java
+++ b/java/tests/instrumentation/src/com/android/textclassifier/ModelFileManagerTest.java

@@ -24,6 +24,8 @@
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import androidx.test.filters.SmallTest;
 import com.android.textclassifier.ModelFileManager.ModelFile;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
+import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
 import java.io.File;
 import java.io.IOException;
@@ -316,6 +318,36 @@
   }
 
   @Test
+  public void modelFile_toModelInfo() {
+    ModelFileManager.ModelFile modelFile =
+        new ModelFileManager.ModelFile(
+            new File("/path/a"), 2, ImmutableList.of(Locale.JAPANESE), "ja", false);
+
+    ModelInfo modelInfo = modelFile.toModelInfo();
+
+    assertThat(modelInfo.toModelName()).isEqualTo("ja_v2");
+  }
+
+  @Test
+  public void modelFile_toModelInfos() {
+    ModelFile englishModelFile =
+        new ModelFile(new File("/path/a"), 1, ImmutableList.of(Locale.ENGLISH), "en", false);
+    ModelFile japaneseModelFile =
+        new ModelFile(new File("/path/a"), 2, ImmutableList.of(Locale.JAPANESE), "ja", false);
+
+    ImmutableList<Optional<ModelInfo>> modelInfos =
+        ModelFileManager.ModelFile.toModelInfos(
+            Optional.of(englishModelFile), Optional.of(japaneseModelFile));
+
+    assertThat(
+            modelInfos.stream()
+                .map(modelFile -> modelFile.transform(ModelInfo::toModelName).or(""))
+                .collect(Collectors.toList()))
+        .containsExactly("en_v1", "ja_v2")
+        .inOrder();
+  }
+
+  @Test
   public void testFileSupplierImpl_updatedFileOnly() throws IOException {
     updatedModelFile.createNewFile();
     File model1 = new File(factoryModelDir, "test1.model");

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/base/LocaleCompatTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/base/LocaleCompatTest.java
new file mode 100644
index 0000000..9e1f5a8
--- /dev/null
+++ b/java/tests/instrumentation/src/com/android/textclassifier/common/base/LocaleCompatTest.java

@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.textclassifier.common.base;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import android.os.LocaleList;
+import androidx.test.core.app.ApplicationProvider;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+import androidx.test.filters.SdkSuppress;
+import androidx.test.filters.SmallTest;
+import java.util.Locale;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+@SmallTest
+@RunWith(AndroidJUnit4.class)
+public class LocaleCompatTest {
+  @SdkSuppress(minSdkVersion = 24)
+  @Test
+  public void toLanguageTag_minApi24() {
+    Locale locale = Locale.TRADITIONAL_CHINESE;
+
+    String languageTags = LocaleCompat.toLanguageTag(locale);
+
+    assertThat(languageTags).isEqualTo("zh-TW");
+  }
+
+  @SdkSuppress(maxSdkVersion = 23)
+  @Test
+  public void toLanguageTag_base() {
+    Locale locale = Locale.TRADITIONAL_CHINESE;
+
+    String languageTags = LocaleCompat.toLanguageTag(locale);
+
+    assertThat(languageTags).isEqualTo("zh");
+  }
+
+  @SdkSuppress(minSdkVersion = 24)
+  @Test
+  public void getResourceLanguageTags_minApi24() {
+    ApplicationProvider.getApplicationContext()
+        .getResources()
+        .getConfiguration()
+        .setLocales(LocaleList.forLanguageTags("zh-TW"));
+
+    String resourceLanguageTags =
+        LocaleCompat.getResourceLanguageTags(ApplicationProvider.getApplicationContext());
+
+    assertThat(resourceLanguageTags).isEqualTo("zh-TW");
+  }
+
+  @SdkSuppress(minSdkVersion = 21, maxSdkVersion = 23)
+  @Test
+  public void getResourceLanguageTags_minApi21() {
+    ApplicationProvider.getApplicationContext()
+        .getResources()
+        .getConfiguration()
+        .setLocale(Locale.TAIWAN);
+
+    String resourceLanguageTags =
+        LocaleCompat.getResourceLanguageTags(ApplicationProvider.getApplicationContext());
+
+    assertThat(resourceLanguageTags).isEqualTo("zh-TW");
+  }
+
+  @SdkSuppress(maxSdkVersion = 20)
+  @Test
+  public void getResourceLanguageTags_base() {
+    ApplicationProvider.getApplicationContext().getResources().getConfiguration().locale =
+        Locale.TAIWAN;
+
+    String resourceLanguageTags =
+        LocaleCompat.getResourceLanguageTags(ApplicationProvider.getApplicationContext());
+
+    assertThat(resourceLanguageTags).isEqualTo("zh");
+  }
+}

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/logging/ResultIdUtilsTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/logging/ResultIdUtilsTest.java
new file mode 100644
index 0000000..3a85061
--- /dev/null
+++ b/java/tests/instrumentation/src/com/android/textclassifier/common/logging/ResultIdUtilsTest.java

@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.textclassifier.common.logging;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import androidx.test.core.app.ApplicationProvider;
+import androidx.test.ext.junit.runners.AndroidJUnit4;
+import androidx.test.filters.SmallTest;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
+import com.google.common.base.Optional;
+import com.google.common.collect.ImmutableList;
+import java.util.Locale;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+@SmallTest
+@RunWith(AndroidJUnit4.class)
+public class ResultIdUtilsTest {
+  private static final int MODEL_VERSION = 703;
+  private static final int HASH = 12345;
+
+  @Test
+  public void createId_customHash() {
+    ImmutableList<Optional<ModelInfo>> modelInfos =
+        ImmutableList.of(
+            Optional.absent(),
+            Optional.of(
+                new ModelInfo(/* version= */ 1, ImmutableList.of(Locale.ENGLISH, Locale.FRENCH))),
+            Optional.absent(),
+            Optional.of(new ModelInfo(/* version= */ 2, ImmutableList.of(Locale.CHINESE))),
+            Optional.absent());
+
+    String resultId = ResultIdUtils.createId(HASH, modelInfos);
+
+    assertThat(resultId).isEqualTo("androidtc|;en,fr_v1;;zh_v2;|12345");
+  }
+
+  @Test
+  public void createId_selection() {
+    String resultId =
+        ResultIdUtils.createId(
+            ApplicationProvider.getApplicationContext(),
+            "text",
+            1,
+            2,
+            ImmutableList.of(
+                Optional.of(new ModelInfo(MODEL_VERSION, ImmutableList.of(Locale.ENGLISH)))));
+
+    assertThat(resultId).matches("androidtc\\|en_v703\\|-?\\d+");
+  }
+
+  @Test
+  public void getModelName_invalid() {
+    assertThat(ResultIdUtils.getModelNames("a|b")).isEmpty();
+  }
+
+  @Test
+  public void getModelNames() {
+    assertThat(ResultIdUtils.getModelNames("androidtc|;en_v703;;zh_v101;|12344"))
+        .containsExactly("", "en_v703", "", "zh_v101", "")
+        .inOrder();
+  }
+
+  @Test
+  public void getModelNames_invalid() {
+    assertThat(ResultIdUtils.getModelNames("a|b")).isEmpty();
+    assertThat(ResultIdUtils.getModelNames("a|b|c|d")).isEmpty();
+  }
+
+  @Test
+  public void modelInfo_toModelName() {
+    ModelInfo modelInfo = new ModelInfo(700, ImmutableList.of(Locale.ENGLISH));
+
+    assertThat(modelInfo.toModelName()).isEqualTo("en_v700");
+  }
+
+  @Test
+  public void modelInfo_toModelName_supportedLanguageTags() {
+    ModelInfo modelInfo = new ModelInfo(700, "en,fr");
+
+    assertThat(modelInfo.toModelName()).isEqualTo("en,fr_v700");
+  }
+
+  @Test
+  public void isFromDefaultTextClassifier_true() {
+    assertThat(ResultIdUtils.isFromDefaultTextClassifier("androidtc|en_v703|12344")).isTrue();
+  }
+
+  @Test
+  public void isFromDefaultTextClassifier_false() {
+    assertThat(ResultIdUtils.isFromDefaultTextClassifier("aiai|en_v703|12344")).isFalse();
+  }
+}

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/GenerateLinksLoggerTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/GenerateLinksLoggerTest.java
index 76ee24b..c2a911a 100644
--- a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/GenerateLinksLoggerTest.java
+++ b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/GenerateLinksLoggerTest.java

@@ -29,8 +29,11 @@
 import com.android.os.AtomsProto;
 import com.android.os.AtomsProto.Atom;
 import com.android.os.AtomsProto.TextLinkifyEvent;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
+import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
+import java.util.Locale;
 import java.util.Map;
 import java.util.stream.Collectors;
 import org.junit.After;
@@ -46,6 +49,11 @@
   /** A statsd config ID, which is arbitrary. */
   private static final long CONFIG_ID = 689777;
 
+  private static final ModelInfo ANNOTATOR_MODEL =
+      new ModelInfo(1, ImmutableList.of(Locale.ENGLISH));
+  private static final ModelInfo LANGID_MODEL =
+      new ModelInfo(2, ImmutableList.of(Locale.forLanguageTag("*")));
+
   @Before
   public void setup() throws Exception {
     StatsdTestUtils.cleanup(CONFIG_ID);
@@ -77,7 +85,13 @@
 
     GenerateLinksLogger generateLinksLogger =
         new GenerateLinksLogger(/* sampleRate= */ 1, () -> uuid);
-    generateLinksLogger.logGenerateLinks(testText, links, PACKAGE_NAME, LATENCY_MS);
+    generateLinksLogger.logGenerateLinks(
+        testText,
+        links,
+        PACKAGE_NAME,
+        LATENCY_MS,
+        Optional.of(ANNOTATOR_MODEL),
+        Optional.of(LANGID_MODEL));
     ImmutableList<Atom> loggedAtoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
 
     ImmutableList<TextLinkifyEvent> loggedEvents =
@@ -89,7 +103,7 @@
         AtomsProto.TextLinkifyEvent.newBuilder()
             .setSessionId(uuid)
             .setEventIndex(0)
-            .setModelName("")
+            .setModelName("en_v1")
             .setWidgetType(WidgetType.WIDGET_TYPE_UNKNOWN)
             .setEventType(EventType.LINKS_GENERATED)
             .setPackageName(PACKAGE_NAME)
@@ -98,12 +112,13 @@
             .setTextLength(testText.length())
             .setLinkedTextLength(phoneText.length())
             .setLatencyMillis(LATENCY_MS)
+            .setLangidModelName("und_v2")
             .build();
     TextLinkifyEvent phoneEvent =
         AtomsProto.TextLinkifyEvent.newBuilder()
             .setSessionId(uuid)
             .setEventIndex(0)
-            .setModelName("")
+            .setModelName("en_v1")
             .setWidgetType(WidgetType.WIDGET_TYPE_UNKNOWN)
             .setEventType(EventType.LINKS_GENERATED)
             .setPackageName(PACKAGE_NAME)
@@ -112,6 +127,7 @@
             .setTextLength(testText.length())
             .setLinkedTextLength(phoneText.length())
             .setLatencyMillis(LATENCY_MS)
+            .setLangidModelName("und_v2")
             .build();
     assertThat(loggedEvents).containsExactly(summaryEvent, phoneEvent).inOrder();
   }
@@ -134,7 +150,13 @@
 
     GenerateLinksLogger generateLinksLogger =
         new GenerateLinksLogger(/* sampleRate= */ 1, () -> uuid);
-    generateLinksLogger.logGenerateLinks(testText, links, PACKAGE_NAME, LATENCY_MS);
+    generateLinksLogger.logGenerateLinks(
+        testText,
+        links,
+        PACKAGE_NAME,
+        LATENCY_MS,
+        Optional.of(ANNOTATOR_MODEL),
+        Optional.of(LANGID_MODEL));
     ImmutableList<Atom> loggedAtoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
 
     ImmutableList<TextLinkifyEvent> loggedEvents =

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/ResultIdUtilsTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/ResultIdUtilsTest.java
deleted file mode 100644
index ec79ed1..0000000
--- a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/ResultIdUtilsTest.java
+++ /dev/null

@@ -1,91 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.android.textclassifier.common.statsd;
-
-import static com.google.common.truth.Truth.assertThat;
-
-import androidx.test.core.app.ApplicationProvider;
-import androidx.test.ext.junit.runners.AndroidJUnit4;
-import androidx.test.filters.SmallTest;
-import com.android.textclassifier.common.statsd.ResultIdUtils.ModelInfo;
-import com.google.common.collect.ImmutableList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Locale;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-
-@SmallTest
-@RunWith(AndroidJUnit4.class)
-public class ResultIdUtilsTest {
-  private static final int MODEL_VERSION = 703;
-  private static final int HASH = 12345;
-
-  @Test
-  public void createId_customHash() {
-    List<ModelInfo> modelInfos =
-        ImmutableList.of(
-            new ModelInfo(/* version= */ 1, ImmutableList.of(Locale.ENGLISH, Locale.FRENCH)),
-            new ModelInfo(/* version= */ 2, ImmutableList.of(Locale.CHINESE)));
-
-    String resultId = ResultIdUtils.createId(modelInfos, HASH);
-
-    assertThat(resultId).isEqualTo("androidtc|en,fr_v1;zh_v2|12345");
-  }
-
-  @Test
-  public void createId_selection() {
-    String resultId =
-        ResultIdUtils.createId(
-            ApplicationProvider.getApplicationContext(),
-            "text",
-            1,
-            2,
-            MODEL_VERSION,
-            Collections.singletonList(Locale.ENGLISH));
-
-    assertThat(resultId).matches("androidtc\\|en_v703\\|-?\\d+");
-  }
-
-  @Test
-  public void getModelName() {
-    assertThat(ResultIdUtils.getModelName("androidtc|en_v703|12344")).isEqualTo("en_v703");
-  }
-
-  @Test
-  public void getModelName_multipleModels() {
-    assertThat(ResultIdUtils.getModelName("androidtc|en_v703;zh_v101|12344")).isEqualTo("en_v703");
-  }
-
-  @Test
-  public void getModelName_invalid() {
-    assertThat(ResultIdUtils.getModelNames("a|b")).isEmpty();
-  }
-
-  @Test
-  public void getModelNames() {
-    assertThat(ResultIdUtils.getModelNames("androidtc|en_v703;zh_v101|12344"))
-        .containsExactly("en_v703", "zh_v101")
-        .inOrder();
-  }
-
-  @Test
-  public void getModelNames_invalid() {
-    assertThat(ResultIdUtils.getModelNames("a|b")).isEmpty();
-    assertThat(ResultIdUtils.getModelNames("a|b|c|d")).isEmpty();
-  }
-}

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/SelectionEventConverterTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/SelectionEventConverterTest.java
index f9b1470..ecdc1f4 100644
--- a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/SelectionEventConverterTest.java
+++ b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/SelectionEventConverterTest.java

@@ -28,7 +28,9 @@
 import androidx.test.core.app.ApplicationProvider;
 import androidx.test.ext.junit.runners.AndroidJUnit4;
 import androidx.test.filters.SmallTest;
-import com.android.textclassifier.common.statsd.ResultIdUtils.ModelInfo;
+import com.android.textclassifier.common.logging.ResultIdUtils;
+import com.android.textclassifier.common.logging.ResultIdUtils.ModelInfo;
+import com.google.common.base.Optional;
 import com.google.common.collect.ImmutableList;
 import java.util.ArrayDeque;
 import java.util.Deque;
@@ -176,8 +178,9 @@
 
   private static String createResultId() {
     return ResultIdUtils.createId(
-        ImmutableList.of(new ModelInfo(/* version= */ 702, ImmutableList.of(Locale.ENGLISH))),
-        /*hash=*/ 12345);
+        /*hash=*/ 12345,
+        ImmutableList.of(
+            Optional.of(new ModelInfo(/* version= */ 702, ImmutableList.of(Locale.ENGLISH)))));
   }
 
   private static class TestTextClassifier implements TextClassifier {

diff --git a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/TextClassifierEventLoggerTest.java b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/TextClassifierEventLoggerTest.java
index 73cc69d..719fc31 100644
--- a/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/TextClassifierEventLoggerTest.java
+++ b/java/tests/instrumentation/src/com/android/textclassifier/common/statsd/TextClassifierEventLoggerTest.java

@@ -76,7 +76,7 @@
         new TextClassifierEvent.TextSelectionEvent.Builder(
                 TextClassifierEvent.TYPE_SELECTION_STARTED)
             .setEventContext(createTextClassificationContext())
-            .setModelName(MODEL_NAME)
+            .setResultId("androidtc|en_v705;und_v1|12345")
             .setEventIndex(1)
             .setEntityTypes(TextClassifier.TYPE_ADDRESS)
             .setRelativeWordStartIndex(2)
@@ -91,7 +91,7 @@
         AtomsProto.TextSelectionEvent.newBuilder()
             .setSessionId(sessionId.getValue())
             .setEventType(EventType.SELECTION_STARTED)
-            .setModelName(MODEL_NAME)
+            .setModelName("en_v705")
             .setWidgetType(WidgetType.WIDGET_TYPE_WEBVIEW)
             .setEventIndex(1)
             .setEntityType(TextClassifier.TYPE_ADDRESS)
@@ -100,6 +100,7 @@
             .setRelativeSuggestedWordStartIndex(1)
             .setRelativeSuggestedWordEndIndex(4)
             .setPackageName(PKG_NAME)
+            .setLangidModelName("und_v1")
             .build();
     ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
     assertThat(atoms).hasSize(1);
@@ -107,12 +108,66 @@
   }
 
   @Test
+  public void writeEvent_textSelectionEvent_autoToSingle() throws Exception {
+    TextClassificationSessionId sessionId = new TextClassificationSessionId();
+    TextClassifierEvent.TextSelectionEvent textSelectionEvent =
+        new TextClassifierEvent.TextSelectionEvent.Builder(TextClassifierEvent.TYPE_AUTO_SELECTION)
+            .setResultId("androidtc|en_v705;und_v1|12345")
+            .setRelativeWordStartIndex(2)
+            .setRelativeWordEndIndex(3)
+            .build();
+
+    textClassifierEventLogger.writeEvent(sessionId, textSelectionEvent);
+
+    ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
+    assertThat(atoms).hasSize(1);
+    assertThat(atoms.get(0).getTextSelectionEvent().getEventType())
+        .isEqualTo(EventType.SMART_SELECTION_SINGLE);
+  }
+
+  @Test
+  public void writeEvent_textSelectionEvent_autoToMulti() throws Exception {
+    TextClassificationSessionId sessionId = new TextClassificationSessionId();
+    TextClassifierEvent.TextSelectionEvent textSelectionEvent =
+        new TextClassifierEvent.TextSelectionEvent.Builder(TextClassifierEvent.TYPE_AUTO_SELECTION)
+            .setResultId("androidtc|en_v705;und_v1|12345")
+            .setRelativeWordStartIndex(2)
+            .setRelativeWordEndIndex(4)
+            .build();
+
+    textClassifierEventLogger.writeEvent(sessionId, textSelectionEvent);
+
+    ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
+    assertThat(atoms).hasSize(1);
+    assertThat(atoms.get(0).getTextSelectionEvent().getEventType())
+        .isEqualTo(EventType.SMART_SELECTION_MULTI);
+  }
+
+  @Test
+  public void writeEvent_textSelectionEvent_keepAuto() throws Exception {
+    TextClassificationSessionId sessionId = new TextClassificationSessionId();
+    TextClassifierEvent.TextSelectionEvent textSelectionEvent =
+        new TextClassifierEvent.TextSelectionEvent.Builder(TextClassifierEvent.TYPE_AUTO_SELECTION)
+            .setResultId("aiai|en_v705;und_v1|12345")
+            .setRelativeWordStartIndex(2)
+            .setRelativeWordEndIndex(4)
+            .build();
+
+    textClassifierEventLogger.writeEvent(sessionId, textSelectionEvent);
+
+    ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
+    assertThat(atoms).hasSize(1);
+    assertThat(atoms.get(0).getTextSelectionEvent().getEventType())
+        .isEqualTo(EventType.AUTO_SELECTION);
+  }
+
+  @Test
   public void writeEvent_textLinkifyEvent() throws Exception {
     TextClassificationSessionId sessionId = new TextClassificationSessionId();
     TextClassifierEvent.TextLinkifyEvent textLinkifyEvent =
         new TextClassifierEvent.TextLinkifyEvent.Builder(TextClassifierEvent.TYPE_SELECTION_STARTED)
             .setEventContext(createTextClassificationContext())
-            .setModelName(MODEL_NAME)
+            .setResultId("androidtc|en_v705;und_v1|12345")
             .setEventIndex(1)
             .setEntityTypes(TextClassifier.TYPE_ADDRESS)
             .build();
@@ -123,7 +178,7 @@
         AtomsProto.TextLinkifyEvent.newBuilder()
             .setSessionId(sessionId.getValue())
             .setEventType(EventType.SELECTION_STARTED)
-            .setModelName(MODEL_NAME)
+            .setModelName("en_v705")
             .setWidgetType(WidgetType.WIDGET_TYPE_WEBVIEW)
             .setEventIndex(1)
             .setEntityType(TextClassifier.TYPE_ADDRESS)
@@ -132,6 +187,7 @@
             .setTextLength(0)
             .setLatencyMillis(0)
             .setPackageName(PKG_NAME)
+            .setLangidModelName("und_v1")
             .build();
     ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
     assertThat(atoms).hasSize(1);
@@ -145,7 +201,7 @@
         new TextClassifierEvent.ConversationActionsEvent.Builder(
                 TextClassifierEvent.TYPE_SELECTION_STARTED)
             .setEventContext(createTextClassificationContext())
-            .setResultId("android_tc|en_v1;zh_v2|12345")
+            .setResultId("android_tc|en_v1;zh_v2;und_v3|12345")
             .setEventIndex(1)
             .setEntityTypes("first", "second", "third", "fourth")
             .setScores(0.5f)
@@ -165,6 +221,7 @@
             .setScore(0.5f)
             .setPackageName(PKG_NAME)
             .setAnnotatorModelName("zh_v2")
+            .setLangidModelName("und_v3")
             .build();
     ImmutableList<Atom> atoms = StatsdTestUtils.getLoggedAtoms(CONFIG_ID);
     assertThat(atoms).hasSize(1);

diff --git a/jni/com/google/android/textclassifier/AnnotatorModel.java b/jni/com/google/android/textclassifier/AnnotatorModel.java
index d6a15ab..7658bf5 100644
--- a/jni/com/google/android/textclassifier/AnnotatorModel.java
+++ b/jni/com/google/android/textclassifier/AnnotatorModel.java

@@ -180,6 +180,15 @@
   }
 
   /**
+   * Annotates multiple fragments of text at once. There will be one AnnotatedSpan array for each
+   * input fragment to annotate.
+   */
+  public AnnotatedSpan[][] annotateStructuredInput(
+      InputFragment[] fragments, AnnotationOptions options) {
+    return nativeAnnotateStructuredInput(annotatorPtr, fragments, options);
+  }
+
+  /**
    * Looks up a knowledge entity by its identifier. Returns null if the entity is not found or on
    * error.
    */
@@ -415,6 +424,52 @@
     }
   }
 
+  /** Represents a fragment of text to the AnnotateStructuredInput call. */
+  public static final class InputFragment {
+
+    /** Encapsulates the data required to set the relative time of an InputFragment. */
+    public static final class DatetimeOptions {
+      private final String referenceTimezone;
+      private final Long referenceTimeMsUtc;
+
+      public DatetimeOptions(String referenceTimezone, Long referenceTimeMsUtc) {
+        this.referenceTimeMsUtc = referenceTimeMsUtc;
+        this.referenceTimezone = referenceTimezone;
+      }
+    }
+
+    public InputFragment(String text) {
+      this.text = text;
+      this.datetimeOptionsNullable = null;
+    }
+
+    public InputFragment(String text, DatetimeOptions datetimeOptions) {
+      this.text = text;
+      this.datetimeOptionsNullable = datetimeOptions;
+    }
+
+    private final String text;
+    // The DatetimeOptions can't be Optional because the _api16 build of the TCLib SDK does not
+    // support java.util.Optional.
+    private final DatetimeOptions datetimeOptionsNullable;
+
+    public String getText() {
+      return text;
+    }
+
+    public boolean hasDatetimeOptions() {
+      return datetimeOptionsNullable != null;
+    }
+
+    public long getReferenceTimeMsUtc() {
+      return datetimeOptionsNullable.referenceTimeMsUtc;
+    }
+
+    public String getReferenceTimezone() {
+      return datetimeOptionsNullable.referenceTimezone;
+    }
+  }
+
   /**
    * Represents options for the suggestSelection call. TODO(b/63427420): Use location with Selection
    * options.
@@ -560,6 +615,8 @@
     private final String detectedTextLanguageTags;
     private final String[] entityTypes;
     private final int annotationUsecase;
+    private final boolean hasLocationPermission;
+    private final boolean hasPersonalizationPermission;
     private final boolean isSerializedEntityDataEnabled;
     private final double userLocationLat;
     private final double userLocationLng;
@@ -572,6 +629,8 @@
         String detectedTextLanguageTags,
         Collection<String> entityTypes,
         int annotationUsecase,
+        boolean hasLocationPermission,
+        boolean hasPersonalizationPermission,
         boolean isSerializedEntityDataEnabled,
         double userLocationLat,
         double userLocationLng,
@@ -586,6 +645,34 @@
       this.userLocationLat = userLocationLat;
       this.userLocationLng = userLocationLng;
       this.userLocationAccuracyMeters = userLocationAccuracyMeters;
+      this.hasLocationPermission = hasLocationPermission;
+      this.hasPersonalizationPermission = hasPersonalizationPermission;
+    }
+
+    public AnnotationOptions(
+        long referenceTimeMsUtc,
+        String referenceTimezone,
+        String locales,
+        String detectedTextLanguageTags,
+        Collection<String> entityTypes,
+        int annotationUsecase,
+        boolean isSerializedEntityDataEnabled,
+        double userLocationLat,
+        double userLocationLng,
+        float userLocationAccuracyMeters) {
+      this(
+          referenceTimeMsUtc,
+          referenceTimezone,
+          locales,
+          detectedTextLanguageTags,
+          entityTypes,
+          annotationUsecase,
+          /* hasLocationPermission */ true,
+          /* hasPersonalizationPermission */ true,
+          isSerializedEntityDataEnabled,
+          userLocationLat,
+          userLocationLng,
+          userLocationAccuracyMeters);
     }
 
     public AnnotationOptions(
@@ -664,6 +751,14 @@
     public float getUserLocationAccuracyMeters() {
       return userLocationAccuracyMeters;
     }
+
+    public boolean hasLocationPermission() {
+      return hasLocationPermission;
+    }
+
+    public boolean hasPersonalizationPermission() {
+      return hasPersonalizationPermission;
+    }
   }
 
   /**
@@ -720,6 +815,9 @@
   private native AnnotatedSpan[] nativeAnnotate(
       long context, String text, AnnotationOptions options);
 
+  private native AnnotatedSpan[][] nativeAnnotateStructuredInput(
+      long context, InputFragment[] inputFragments, AnnotationOptions options);
+
   private native byte[] nativeLookUpKnowledgeEntity(long context, String id);
 
   private native void nativeCloseAnnotator(long context);

diff --git a/native/Android.bp b/native/Android.bp
index 3110a5b..9a79285 100644
--- a/native/Android.bp
+++ b/native/Android.bp

@@ -54,6 +54,7 @@
         "//apex_available:platform",
         "com.android.neuralnetworks",
         "test_com.android.neuralnetworks",
+        "com.android.extservices",
     ],
 }
 
@@ -93,7 +94,7 @@
     product_variables: {
         debuggable: {
             // Only enable debug logging in userdebug/eng builds.
-            cflags: ["-DTC_DEBUG_LOGGING=1"],
+            cflags: ["-DTC3_DEBUG_LOGGING=1"],
         },
     },
 
@@ -106,6 +107,7 @@
         "libtextclassifier_fbgen_resources_extra",
         "libtextclassifier_fbgen_intent_config",
         "libtextclassifier_fbgen_annotator_model",
+        "libtextclassifier_fbgen_annotator_experimental_model",
         "libtextclassifier_fbgen_actions_model",
         "libtextclassifier_fbgen_tflite_text_encoder_config",
         "libtextclassifier_fbgen_lang_id_embedded_network",
@@ -205,6 +207,13 @@
 }
 
 genrule {
+    name: "libtextclassifier_fbgen_annotator_experimental_model",
+    srcs: ["annotator/experimental/experimental.fbs"],
+    out: ["annotator/experimental/experimental_generated.h"],
+    defaults: ["fbgen"],
+}
+
+genrule {
     name: "libtextclassifier_fbgen_actions_model",
     srcs: ["actions/actions_model.fbs"],
     out: ["actions/actions_model_generated.h"],
@@ -292,13 +301,19 @@
     exclude_srcs: [
         "**/*_test.cc",
         "**/*-test-lib.cc",
-        "utils/testing/*.cc",
+        "**/testing/*.cc",
         "**/*test-util.*",
         "**/*test-utils.*",
         "**/*_test-include.*",
+        "**/*unittest.cc",
     ],
 
     version_script: "jni.lds",
+
+    apex_available: [
+        "//apex_available:platform",
+        "com.android.extservices",
+    ],
 }
 
 // -----------------------
@@ -308,7 +323,7 @@
     name: "libtextclassifier_tests",
     defaults: ["libtextclassifier_defaults"],
 
-    test_suites: ["device-tests"],
+    test_suites: ["device-tests", "mts"],
 
     data: [
         "annotator/test_data/**/*",
@@ -316,21 +331,19 @@
     ],
 
     srcs: ["**/*.cc"],
-    // TODO: Do not filter out tflite test once the dependency issue is resolved.
-    exclude_srcs: [
-        "utils/tflite/*_test.cc",
-        "utils/flatbuffers_test.cc",
-        "utils/calendar/*_test-include.*",
-        "utils/utf8/*_test-include.*"
-    ],
 
-    static_libs: ["libgmock_ndk"],
+    static_libs: [
+        "libgmock_ndk",
+        "libgtest_ndk_c++",
+    ],
 
     multilib: {
         lib32: {
+            suffix: "32",
             cppflags: ["-DTC3_TEST_DATA_DIR=\"/data/nativetest/libtextclassifier_tests/test_data/\""],
         },
         lib64: {
+            suffix: "64",
             cppflags: ["-DTC3_TEST_DATA_DIR=\"/data/nativetest64/libtextclassifier_tests/test_data/\""],
         },
     },

diff --git a/native/AndroidTest.xml b/native/AndroidTest.xml
index fd0c609..cee26dd 100644
--- a/native/AndroidTest.xml
+++ b/native/AndroidTest.xml

@@ -14,13 +14,21 @@
      limitations under the License.
 -->
 <configuration description="Config for libtextclassifier_tests">
-    <target_preparer class="com.android.tradefed.targetprep.PushFilePreparer">
+    <option name="test-suite-tag" value="apct" />
+    <option name="test-suite-tag" value="mts" />
+
+    <target_preparer class="com.android.compatibility.common.tradefed.targetprep.FilePusher">
         <option name="cleanup" value="true" />
         <option name="push" value="libtextclassifier_tests->/data/local/tmp/libtextclassifier_tests" />
+        <option name="append-bitness" value="true" />
     </target_preparer>
-    <option name="test-suite-tag" value="apct" />
+
     <test class="com.android.tradefed.testtype.GTest" >
         <option name="native-test-device-path" value="/data/local/tmp" />
         <option name="module-name" value="libtextclassifier_tests" />
     </test>
+
+    <object type="module_controller" class="com.android.tradefed.testtype.suite.module.MainlineTestModuleController">
+        <option name="mainline-module-package-name" value="com.google.android.extservices" />
+    </object>
 </configuration>

diff --git a/native/actions/actions-suggestions.cc b/native/actions/actions-suggestions.cc
index b1cff28..1fcd35c 100644
--- a/native/actions/actions-suggestions.cc
+++ b/native/actions/actions-suggestions.cc

@@ -22,6 +22,7 @@
 #include "actions/types.h"
 #include "actions/utils.h"
 #include "actions/zlib-utils.h"
+#include "annotator/collections.h"
 #include "utils/base/logging.h"
 #include "utils/flatbuffers.h"
 #include "utils/lua-utils.h"
@@ -51,6 +52,14 @@
 const std::string& ActionsSuggestions::kShareLocation =
     *[]() { return new std::string("share_location"); }();
 
+// Name for a datetime annotation that only includes time but no date.
+const std::string& kTimeAnnotation =
+    *[]() { return new std::string("time"); }();
+
+constexpr float kDefaultFloat = 0.0;
+constexpr bool kDefaultBool = false;
+constexpr int kDefaultInt = 1;
+
 namespace {
 
 const ActionsModel* LoadAndVerifyModel(const uint8_t* addr, int size) {
@@ -257,6 +266,7 @@
     }
   }
 
+  // Gather annotation entities for the rules.
   if (model_->annotation_actions_spec() != nullptr &&
       model_->annotation_actions_spec()->annotation_mapping() != nullptr) {
     for (const AnnotationActionsSpec_::AnnotationMapping* mapping :
@@ -283,7 +293,7 @@
   // Initialize regular expressions model.
   std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
   regex_actions_.reset(
-      new RegexActions(model_->smart_reply_action_type()->str(), unilib_));
+      new RegexActions(unilib_, model_->smart_reply_action_type()->str()));
   if (!regex_actions_->InitializeRules(
           model_->rules(), model_->low_confidence_rules(),
           triggering_preconditions_overlay_, decompressor.get())) {
@@ -297,6 +307,18 @@
     grammar_actions_.reset(new GrammarActions(
         unilib_, model_->rules()->grammar_rules(), entity_data_builder_.get(),
         model_->smart_reply_action_type()->str()));
+
+    // Gather annotation entities for the grammars.
+    if (auto annotation_nt = model_->rules()
+                                 ->grammar_rules()
+                                 ->rules()
+                                 ->nonterminals()
+                                 ->annotation_nt()) {
+      for (const grammar::RulesSet_::Nonterminals_::AnnotationNtEntry* entry :
+           *annotation_nt) {
+        annotation_entity_types_.insert(entry->key()->str());
+      }
+    }
   }
 
   std::string actions_script;
@@ -348,11 +370,10 @@
 
   // Create low confidence model if specified.
   if (model_->low_confidence_ngram_model() != nullptr) {
-    ngram_model_ = NGramModel::Create(model_->low_confidence_ngram_model(),
-                                      feature_processor_ == nullptr
-                                          ? nullptr
-                                          : feature_processor_->tokenizer(),
-                                      unilib_);
+    ngram_model_ = NGramModel::Create(
+        unilib_, model_->low_confidence_ngram_model(),
+        feature_processor_ == nullptr ? nullptr
+                                      : feature_processor_->tokenizer());
     if (ngram_model_ == nullptr) {
       TC3_LOG(ERROR) << "Could not create ngram linear regression model.";
       return false;
@@ -408,15 +429,6 @@
   preconditions_.suppress_on_low_confidence_input = ValueOrDefault(
       overlay, TriggeringPreconditions::VT_SUPPRESS_ON_LOW_CONFIDENCE_INPUT,
       defaults->suppress_on_low_confidence_input());
-  preconditions_.diversification_distance_threshold = ValueOrDefault(
-      overlay, TriggeringPreconditions::VT_DIVERSIFICATION_DISTANCE_THRESHOLD,
-      defaults->diversification_distance_threshold());
-  preconditions_.confidence_threshold =
-      ValueOrDefault(overlay, TriggeringPreconditions::VT_CONFIDENCE_THRESHOLD,
-                     defaults->confidence_threshold());
-  preconditions_.empirical_probability_factor = ValueOrDefault(
-      overlay, TriggeringPreconditions::VT_EMPIRICAL_PROBABILITY_FACTOR,
-      defaults->empirical_probability_factor());
   preconditions_.min_reply_score_threshold = ValueOrDefault(
       overlay, TriggeringPreconditions::VT_MIN_REPLY_SCORE_THRESHOLD,
       defaults->min_reply_score_threshold());
@@ -603,8 +615,7 @@
 bool ActionsSuggestions::SetupModelInput(
     const std::vector<std::string>& context, const std::vector<int>& user_ids,
     const std::vector<float>& time_diffs, const int num_suggestions,
-    const float confidence_threshold, const float diversification_distance,
-    const float empirical_probability_factor,
+    const ActionSuggestionOptions& options,
     tflite::Interpreter* interpreter) const {
   // Compute token embeddings.
   std::vector<std::vector<Token>> tokens;
@@ -665,21 +676,6 @@
         model_->tflite_model_spec()->input_time_diffs(), time_diffs,
         interpreter);
   }
-  if (model_->tflite_model_spec()->input_diversification_distance() >= 0) {
-    model_executor_->SetInput<float>(
-        model_->tflite_model_spec()->input_diversification_distance(),
-        diversification_distance, interpreter);
-  }
-  if (model_->tflite_model_spec()->input_confidence_threshold() >= 0) {
-    model_executor_->SetInput<float>(
-        model_->tflite_model_spec()->input_confidence_threshold(),
-        confidence_threshold, interpreter);
-  }
-  if (model_->tflite_model_spec()->input_empirical_probability_factor() >= 0) {
-    model_executor_->SetInput<float>(
-        model_->tflite_model_spec()->input_empirical_probability_factor(),
-        confidence_threshold, interpreter);
-  }
   if (model_->tflite_model_spec()->input_num_tokens() >= 0) {
     std::vector<int> num_tokens_per_message(tokens.size());
     for (int i = 0; i < tokens.size(); i++) {
@@ -699,15 +695,132 @@
         model_->tflite_model_spec()->input_flattened_token_embeddings(),
         flattened_token_embeddings, interpreter);
   }
+  // Set up additional input parameters.
+  if (const auto* input_name_index =
+          model_->tflite_model_spec()->input_name_index()) {
+    const std::unordered_map<std::string, Variant>& model_parameters =
+        options.model_parameters;
+    for (const TensorflowLiteModelSpec_::InputNameIndexEntry* entry :
+         *input_name_index) {
+      const std::string param_name = entry->key()->str();
+      const int param_index = entry->value();
+      const TfLiteType param_type =
+          interpreter->tensor(interpreter->inputs()[param_index])->type;
+      const auto param_value_it = model_parameters.find(param_name);
+      const bool has_value = param_value_it != model_parameters.end();
+      switch (param_type) {
+        case kTfLiteFloat32:
+          model_executor_->SetInput<float>(
+              param_index,
+              has_value ? param_value_it->second.Value<float>() : kDefaultFloat,
+              interpreter);
+          break;
+        case kTfLiteInt32:
+          model_executor_->SetInput<int32_t>(
+              param_index,
+              has_value ? param_value_it->second.Value<int>() : kDefaultInt,
+              interpreter);
+          break;
+        case kTfLiteInt64:
+          model_executor_->SetInput<int64_t>(
+              param_index,
+              has_value ? param_value_it->second.Value<int64>() : kDefaultInt,
+              interpreter);
+          break;
+        case kTfLiteUInt8:
+          model_executor_->SetInput<uint8_t>(
+              param_index,
+              has_value ? param_value_it->second.Value<uint8>() : kDefaultInt,
+              interpreter);
+          break;
+        case kTfLiteInt8:
+          model_executor_->SetInput<int8_t>(
+              param_index,
+              has_value ? param_value_it->second.Value<int8>() : kDefaultInt,
+              interpreter);
+          break;
+        case kTfLiteBool:
+          model_executor_->SetInput<bool>(
+              param_index,
+              has_value ? param_value_it->second.Value<bool>() : kDefaultBool,
+              interpreter);
+          break;
+        default:
+          TC3_LOG(ERROR) << "Unsupported type of additional input parameter: "
+                         << param_name;
+      }
+    }
+  }
   return true;
 }
 
+void ActionsSuggestions::PopulateTextReplies(
+    const tflite::Interpreter* interpreter, int suggestion_index,
+    int score_index, const std::string& type,
+    ActionsSuggestionsResponse* response) const {
+  const std::vector<tflite::StringRef> replies =
+      model_executor_->Output<tflite::StringRef>(suggestion_index, interpreter);
+  const TensorView<float> scores =
+      model_executor_->OutputView<float>(score_index, interpreter);
+  for (int i = 0; i < replies.size(); i++) {
+    if (replies[i].len == 0) {
+      continue;
+    }
+    const float score = scores.data()[i];
+    if (score < preconditions_.min_reply_score_threshold) {
+      continue;
+    }
+    response->actions.push_back(
+        {std::string(replies[i].str, replies[i].len), type, score});
+  }
+}
+
+void ActionsSuggestions::FillSuggestionFromSpecWithEntityData(
+    const ActionSuggestionSpec* spec, ActionSuggestion* suggestion) const {
+  std::unique_ptr<ReflectiveFlatbuffer> entity_data =
+      entity_data_builder_ != nullptr ? entity_data_builder_->NewRoot()
+                                      : nullptr;
+  FillSuggestionFromSpec(spec, entity_data.get(), suggestion);
+}
+
+void ActionsSuggestions::PopulateIntentTriggering(
+    const tflite::Interpreter* interpreter, int suggestion_index,
+    int score_index, const ActionSuggestionSpec* task_spec,
+    ActionsSuggestionsResponse* response) const {
+  if (!task_spec || task_spec->type()->size() == 0) {
+    TC3_LOG(ERROR)
+        << "Task type for intent (action) triggering cannot be empty!";
+    return;
+  }
+  const TensorView<bool> intent_prediction =
+      model_executor_->OutputView<bool>(suggestion_index, interpreter);
+  const TensorView<float> intent_scores =
+      model_executor_->OutputView<float>(score_index, interpreter);
+  // Two result corresponding to binary triggering case.
+  TC3_CHECK_EQ(intent_prediction.size(), 2);
+  TC3_CHECK_EQ(intent_scores.size(), 2);
+  // We rely on in-graph thresholding logic so at this point the results
+  // have been ranked properly according to threshold.
+  const bool triggering = intent_prediction.data()[0];
+  const float trigger_score = intent_scores.data()[0];
+
+  if (triggering) {
+    ActionSuggestion suggestion;
+    std::unique_ptr<ReflectiveFlatbuffer> entity_data =
+        entity_data_builder_ != nullptr ? entity_data_builder_->NewRoot()
+                                        : nullptr;
+    FillSuggestionFromSpecWithEntityData(task_spec, &suggestion);
+    suggestion.score = trigger_score;
+    response->actions.push_back(std::move(suggestion));
+  }
+}
+
 bool ActionsSuggestions::ReadModelOutput(
     tflite::Interpreter* interpreter, const ActionSuggestionOptions& options,
     ActionsSuggestionsResponse* response) const {
   // Read sensitivity and triggering score predictions.
   if (model_->tflite_model_spec()->output_triggering_score() >= 0) {
-    const TensorView<float>& triggering_score =
+    const TensorView<float> triggering_score =
         model_executor_->OutputView<float>(
             model_->tflite_model_spec()->output_triggering_score(),
             interpreter);
@@ -721,7 +834,7 @@
          preconditions_.min_smart_reply_triggering_score);
   }
   if (model_->tflite_model_spec()->output_sensitive_topic_score() >= 0) {
-    const TensorView<float>& sensitive_topic_score =
+    const TensorView<float> sensitive_topic_score =
         model_executor_->OutputView<float>(
             model_->tflite_model_spec()->output_sensitive_topic_score(),
             interpreter);
@@ -742,24 +855,12 @@
   }
 
   // Read smart reply predictions.
-  std::vector<ActionSuggestion> text_replies;
   if (!response->output_filtered_min_triggering_score &&
       model_->tflite_model_spec()->output_replies() >= 0) {
-    const std::vector<tflite::StringRef> replies =
-        model_executor_->Output<tflite::StringRef>(
-            model_->tflite_model_spec()->output_replies(), interpreter);
-    TensorView<float> scores = model_executor_->OutputView<float>(
-        model_->tflite_model_spec()->output_replies_scores(), interpreter);
-    for (int i = 0; i < replies.size(); i++) {
-      if (replies[i].len == 0) continue;
-      const float score = scores.data()[i];
-      if (score < preconditions_.min_reply_score_threshold) {
-        continue;
-      }
-      response->actions.push_back({std::string(replies[i].str, replies[i].len),
-                                   model_->smart_reply_action_type()->str(),
-                                   score});
-    }
+    PopulateTextReplies(interpreter,
+                        model_->tflite_model_spec()->output_replies(),
+                        model_->tflite_model_spec()->output_replies_scores(),
+                        model_->smart_reply_action_type()->str(), response);
   }
 
   // Read actions suggestions.
@@ -783,13 +884,44 @@
       std::unique_ptr<ReflectiveFlatbuffer> entity_data =
           entity_data_builder_ != nullptr ? entity_data_builder_->NewRoot()
                                           : nullptr;
-      FillSuggestionFromSpec(action_type->action(), entity_data.get(),
-                             &suggestion);
+      FillSuggestionFromSpecWithEntityData(action_type->action(), &suggestion);
       suggestion.score = score;
       response->actions.push_back(std::move(suggestion));
     }
   }
 
+  // Read multi-task predictions and construct the result properly.
+  if (const auto* prediction_metadata =
+          model_->tflite_model_spec()->prediction_metadata()) {
+    for (const PredictionMetadata* metadata : *prediction_metadata) {
+      const ActionSuggestionSpec* task_spec = metadata->task_spec();
+      const int suggestions_index = metadata->output_suggestions();
+      const int suggestions_scores_index =
+          metadata->output_suggestions_scores();
+      switch (metadata->prediction_type()) {
+        case PredictionType_NEXT_MESSAGE_PREDICTION:
+          if (!task_spec || task_spec->type()->size() == 0) {
+            TC3_LOG(WARNING) << "Task type not provided, use default "
+                                "smart_reply_action_type!";
+          }
+          PopulateTextReplies(
+              interpreter, suggestions_index, suggestions_scores_index,
+              task_spec ? task_spec->type()->str()
+                        : model_->smart_reply_action_type()->str(),
+              response);
+          break;
+        case PredictionType_INTENT_TRIGGERING:
+          PopulateIntentTriggering(interpreter, suggestions_index,
+                                   suggestions_scores_index, task_spec,
+                                   response);
+          break;
+        default:
+          TC3_LOG(ERROR) << "Unsupported prediction type!";
+          return false;
+      }
+    }
+  }
+
   return true;
 }
 
@@ -841,10 +973,7 @@
   }
 
   if (!SetupModelInput(context, user_ids, time_diffs,
-                       /*num_suggestions=*/model_->num_smart_replies(),
-                       preconditions_.confidence_threshold,
-                       preconditions_.diversification_distance_threshold,
-                       preconditions_.empirical_probability_factor,
+                       /*num_suggestions=*/model_->num_smart_replies(), options,
                        interpreter->get())) {
     TC3_LOG(ERROR) << "Failed to setup input for TensorFlow Lite model.";
     return false;
@@ -880,7 +1009,11 @@
   }
   const int num_messages_grammar =
       ((model_->rules() && model_->rules()->grammar_rules() &&
-        model_->rules()->grammar_rules()->annotation_nonterminal())
+        model_->rules()
+            ->grammar_rules()
+            ->rules()
+            ->nonterminals()
+            ->annotation_nt())
            ? 1
            : 0);
   const int num_messages_mapping =
@@ -903,6 +1036,30 @@
     if (message->annotations.empty()) {
       message->annotations = annotator->Annotate(
           message->text, AnnotationOptionsForMessage(*message));
+      for (int i = 0; i < message->annotations.size(); i++) {
+        ClassificationResult* classification =
+            &message->annotations[i].classification.front();
+
+        // Specialize datetime annotation to time annotation if no date
+        // component is present.
+        if (classification->collection == Collections::DateTime() &&
+            classification->datetime_parse_result.IsSet()) {
+          bool has_only_time = true;
+          for (const DatetimeComponent& component :
+               classification->datetime_parse_result.datetime_components) {
+            if (component.component_type !=
+                    DatetimeComponent::ComponentType::UNSPECIFIED &&
+                component.component_type <
+                    DatetimeComponent::ComponentType::HOUR) {
+              has_only_time = false;
+              break;
+            }
+          }
+          if (has_only_time) {
+            classification->collection = kTimeAnnotation;
+          }
+        }
+      }
     }
   }
   return annotated_conversation;
@@ -1017,7 +1174,7 @@
         // Apply normalization if specified.
         if (mapping->normalization_options() != nullptr) {
           normalized_annotation_text =
-              NormalizeText(unilib_, mapping->normalization_options(),
+              NormalizeText(*unilib_, mapping->normalization_options(),
                             normalized_annotation_text);
         }
 
@@ -1104,6 +1261,13 @@
 
   SuggestActionsFromAnnotations(annotated_conversation, &response->actions);
 
+  if (grammar_actions_ != nullptr &&
+      !grammar_actions_->SuggestActions(annotated_conversation,
+                                        &response->actions)) {
+    TC3_LOG(ERROR) << "Could not suggest actions from grammar rules.";
+    return false;
+  }
+
   int input_text_length = 0;
   int num_matching_locales = 0;
   for (int i = annotated_conversation.messages.size() - num_messages;
@@ -1179,13 +1343,6 @@
     return false;
   }
 
-  if (grammar_actions_ != nullptr &&
-      !grammar_actions_->SuggestActions(annotated_conversation,
-                                        &response->actions)) {
-    TC3_LOG(ERROR) << "Could not suggest actions from grammar rules.";
-    return false;
-  }
-
   if (preconditions_.suppress_on_low_confidence_input &&
       !regex_actions_->FilterConfidenceOutput(post_check_rules,
                                               &response->actions)) {

diff --git a/native/actions/actions-suggestions.h b/native/actions/actions-suggestions.h
index cd0714a..2a321f0 100644
--- a/native/actions/actions-suggestions.h
+++ b/native/actions/actions-suggestions.h

@@ -20,6 +20,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <unordered_set>
 #include <vector>
 
@@ -46,6 +47,7 @@
 // Options for suggesting actions.
 struct ActionSuggestionOptions {
   static ActionSuggestionOptions Default() { return ActionSuggestionOptions(); }
+  std::unordered_map<std::string, Variant> model_parameters;
 };
 
 // Class for predicting actions following a conversation.
@@ -110,7 +112,7 @@
   const ActionsModel* model() const;
   const reflection::Schema* entity_data_schema() const;
 
-  static const int kLocalUserId = 0;
+  static constexpr int kLocalUserId = 0;
 
   // Should be in sync with those defined in Android.
   // android/frameworks/base/core/java/android/view/textclassifier/ConversationActions.java
@@ -178,10 +180,22 @@
                        const std::vector<int>& user_ids,
                        const std::vector<float>& time_diffs,
                        const int num_suggestions,
-                       const float confidence_threshold,
-                       const float diversification_distance,
-                       const float empirical_probability_factor,
+                       const ActionSuggestionOptions& options,
                        tflite::Interpreter* interpreter) const;
+
+  void FillSuggestionFromSpecWithEntityData(const ActionSuggestionSpec* spec,
+                                            ActionSuggestion* suggestion) const;
+
+  void PopulateTextReplies(const tflite::Interpreter* interpreter,
+                           int suggestion_index, int score_index,
+                           const std::string& type,
+                           ActionsSuggestionsResponse* response) const;
+
+  void PopulateIntentTriggering(const tflite::Interpreter* interpreter,
+                                int suggestion_index, int score_index,
+                                const ActionSuggestionSpec* task_spec,
+                                ActionsSuggestionsResponse* response) const;
+
   bool ReadModelOutput(tflite::Interpreter* interpreter,
                        const ActionSuggestionOptions& options,
                        ActionsSuggestionsResponse* response) const;

diff --git a/native/actions/actions_model.fbs b/native/actions/actions_model.fbs
index 54c8016..251610e 100755
--- a/native/actions/actions_model.fbs
+++ b/native/actions/actions_model.fbs

@@ -27,6 +27,31 @@
 
 file_identifier "TC3A";
 
+// Prediction type for a multi-task model.
+namespace libtextclassifier3;
+enum PredictionType : int {
+  UNSUPPORTED = 0,
+  NEXT_MESSAGE_PREDICTION = 1,
+  INTENT_TRIGGERING = 2,
+  ENTITY_ANNOTATION = 3,
+}
+
+// Prediction metadata for an arbitrary task.
+namespace libtextclassifier3;
+table PredictionMetadata {
+  prediction_type:PredictionType;
+  task_spec:ActionSuggestionSpec;
+  output_suggestions:int;
+  output_suggestions_scores:int;
+  output_suggestions_spans:int;
+}
+
+namespace libtextclassifier3.TensorflowLiteModelSpec_;
+table InputNameIndexEntry {
+  key:string (key, shared);
+  value:int;
+}
+
 // TensorFlow Lite model for suggesting actions.
 namespace libtextclassifier3;
 table TensorflowLiteModelSpec {
@@ -51,13 +76,13 @@
   input_num_suggestions:int = 4;
 
   // float, the output diversification distance parameter.
-  input_diversification_distance:int = -1;
+  reserved_7:int (deprecated);
 
   // float, the empirical probability factor parameter.
-  input_empirical_probability_factor:int = -1;
+  reserved_8:int (deprecated);
 
   // float, the confidence threshold.
-  input_confidence_threshold:int = -1;
+  reserved_9:int (deprecated);
 
   // Input port for hashed and embedded tokens, a (num messages, max tokens,
   // embedding size) float tensor specifying the embeddings of each token of
@@ -87,6 +112,13 @@
   // A (max tokens, embedding_size) float tensor specifying the embeddings of
   // each token.
   input_flattened_token_embeddings:int = -1;
+
+  // Generalized output specification that handles arbitrary number of
+  // prediction tasks.
+  prediction_metadata:[PredictionMetadata];
+
+  // Map of additional input tensor name to its index.
+  input_name_index:[TensorflowLiteModelSpec_.InputNameIndexEntry];
 }
 
 // Configuration for the tokenizer.
@@ -247,11 +279,11 @@
   // as a flag value (i.e. as overlay).
   low_confidence_rules:RulesModel;
 
-  // Smart reply thresholds.
-  diversification_distance_threshold:float = 0;
+  reserved_11:float (deprecated);
+  reserved_12:float (deprecated);
+  reserved_13:float (deprecated);
 
-  confidence_threshold:float = 0;
-  empirical_probability_factor:float = 0;
+  // Smart reply thresholds.
   min_reply_score_threshold:float = 0;
 }
 
@@ -398,6 +430,9 @@
   // If set to true, an existing annotator annotation will be used to
   // create the actions suggestions text annotation.
   use_annotation_match:bool;
+
+  // If set, merge in fixed entity data for a match.
+  entity_data:ActionsEntityData;
 }
 
 // The actions to produce upon triggering.
@@ -433,12 +468,6 @@
   action_id:[uint];
 }
 
-namespace libtextclassifier3.RulesModel_.GrammarRules_;
-table AnnotationNonterminalEntry {
-  key:string (key, shared);
-  value:int;
-}
-
 // Configuration for actions based on context-free grammars.
 namespace libtextclassifier3.RulesModel_;
 table GrammarRules {
@@ -452,10 +481,6 @@
 
   // The action specifications used by the rule matches.
   actions:[RuleActionSpec];
-
-  // Predefined nonterminals for annotations.
-  // Maps annotation/collection names to non-terminal ids.
-  annotation_nonterminal:[GrammarRules_.AnnotationNonterminalEntry];
 }
 
 // Rule based actions.

diff --git a/native/actions/feature-processor.cc b/native/actions/feature-processor.cc
index d0b2072..249a132 100644
--- a/native/actions/feature-processor.cc
+++ b/native/actions/feature-processor.cc

@@ -32,8 +32,8 @@
   extractor_options.unicode_aware_features = options->unicode_aware_features();
   extractor_options.extract_selection_mask_feature = false;
   if (options->regexp_features() != nullptr) {
-    for (const auto& regexp_feauture : *options->regexp_features()) {
-      extractor_options.regexp_features.push_back(regexp_feauture->str());
+    for (const auto regexp_feature : *options->regexp_features()) {
+      extractor_options.regexp_features.push_back(regexp_feature->str());
     }
   }
   extractor_options.remap_digits = options->remap_digits();
@@ -70,7 +70,7 @@
     : options_(options),
       tokenizer_(CreateTokenizer(options->tokenizer_options(), unilib)),
       token_feature_extractor_(BuildTokenFeatureExtractorOptions(options),
-                               *unilib) {}
+                               unilib) {}
 
 int ActionsFeatureProcessor::GetTokenEmbeddingSize() const {
   return options_->embedding_size() +

diff --git a/native/actions/feature-processor.h b/native/actions/feature-processor.h
index e34ccff..5e4085a 100644
--- a/native/actions/feature-processor.h
+++ b/native/actions/feature-processor.h

@@ -36,8 +36,8 @@
 // Feature processor for the actions suggestions model.
 class ActionsFeatureProcessor {
  public:
-  ActionsFeatureProcessor(const ActionsTokenFeatureProcessorOptions* options,
-                          const UniLib* unilib);
+  explicit ActionsFeatureProcessor(
+      const ActionsTokenFeatureProcessorOptions* options, const UniLib* unilib);
 
   // Embeds and appends features to the output vector.
   bool AppendFeatures(const std::vector<int>& sparse_features,

diff --git a/native/actions/feature-processor_test.cc b/native/actions/feature-processor_test.cc
new file mode 100644
index 0000000..969bbf7
--- /dev/null
+++ b/native/actions/feature-processor_test.cc

@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/feature-processor.h"
+
+#include "actions/actions_model_generated.h"
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using ::testing::FloatEq;
+using ::testing::SizeIs;
+
+// EmbeddingExecutor that always returns features based on
+// the id of the sparse features.
+class FakeEmbeddingExecutor : public EmbeddingExecutor {
+ public:
+  bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
+                    const int dest_size) const override {
+    TC3_CHECK_GE(dest_size, 4);
+    EXPECT_THAT(sparse_features, SizeIs(1));
+    dest[0] = sparse_features.data()[0];
+    dest[1] = sparse_features.data()[0];
+    dest[2] = -sparse_features.data()[0];
+    dest[3] = -sparse_features.data()[0];
+    return true;
+  }
+
+ private:
+  std::vector<float> storage_;
+};
+
+class FeatureProcessorTest : public ::testing::Test {
+ protected:
+  FeatureProcessorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+
+  flatbuffers::DetachedBuffer PackFeatureProcessorOptions(
+      ActionsTokenFeatureProcessorOptionsT* options) const {
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(CreateActionsTokenFeatureProcessorOptions(builder, options));
+    return builder.Release();
+  }
+
+  FakeEmbeddingExecutor embedding_executor_;
+  UniLib unilib_;
+};
+
+TEST_F(FeatureProcessorTest, TokenEmbeddings) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  Token token("aaa", 0, 3);
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(token, &embedding_executor_,
+                                                    &token_features));
+  EXPECT_THAT(token_features, SizeIs(4));
+}
+
+TEST_F(FeatureProcessorTest, TokenEmbeddingsCaseFeature) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.extract_case_feature = true;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  Token token("Aaa", 0, 3);
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(token, &embedding_executor_,
+                                                    &token_features));
+  EXPECT_THAT(token_features, SizeIs(5));
+  EXPECT_THAT(token_features[4], FloatEq(1.0));
+}
+
+TEST_F(FeatureProcessorTest, MultipleTokenEmbeddingsCaseFeature) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.extract_case_feature = true;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  const std::vector<Token> tokens = {Token("Aaa", 0, 3), Token("bbb", 4, 7),
+                                     Token("Cccc", 8, 12)};
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(
+      tokens, &embedding_executor_, &token_features));
+  EXPECT_THAT(token_features, SizeIs(15));
+  EXPECT_THAT(token_features[4], FloatEq(1.0));
+  EXPECT_THAT(token_features[9], FloatEq(-1.0));
+  EXPECT_THAT(token_features[14], FloatEq(1.0));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/flatbuffer-utils.cc b/native/actions/flatbuffer-utils.cc
deleted file mode 100644
index 6d60c2f..0000000
--- a/native/actions/flatbuffer-utils.cc
+++ /dev/null

@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "actions/flatbuffer-utils.h"
-
-#include <memory>
-
-#include "utils/base/logging.h"
-#include "utils/flatbuffers.h"
-#include "flatbuffers/reflection.h"
-
-namespace libtextclassifier3 {
-
-bool SwapFieldNamesForOffsetsInPathInActionsModel(ActionsModelT* model) {
-  if (model->actions_entity_data_schema.empty()) {
-    // Nothing to do.
-    return true;
-  }
-
-  const reflection::Schema* schema =
-      LoadAndVerifyFlatbuffer<reflection::Schema>(
-          model->actions_entity_data_schema.data(),
-          model->actions_entity_data_schema.size());
-
-  // Resolve offsets in regex rules.
-  if (model->rules != nullptr) {
-    for (std::unique_ptr<RulesModel_::RegexRuleT>& rule :
-         model->rules->regex_rule) {
-      for (std::unique_ptr<RulesModel_::RuleActionSpecT>& rule_action :
-           rule->actions) {
-        for (std::unique_ptr<RulesModel_::RuleActionSpec_::RuleCapturingGroupT>&
-                 capturing_group : rule_action->capturing_group) {
-          if (capturing_group->entity_field == nullptr) {
-            continue;
-          }
-          if (!SwapFieldNamesForOffsetsInPath(
-                  schema, capturing_group->entity_field.get())) {
-            return false;
-          }
-        }
-      }
-    }
-  }
-
-  // Resolve offsets in annotation action mapping.
-  if (model->annotation_actions_spec != nullptr) {
-    for (std::unique_ptr<AnnotationActionsSpec_::AnnotationMappingT>& mapping :
-         model->annotation_actions_spec->annotation_mapping) {
-      if (mapping->entity_field == nullptr) {
-        continue;
-      }
-      if (!SwapFieldNamesForOffsetsInPath(schema,
-                                          mapping->entity_field.get())) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::string SwapFieldNamesForOffsetsInPathInSerializedActionsModel(
-    const std::string& model) {
-  std::unique_ptr<ActionsModelT> unpacked_model =
-      UnPackActionsModel(model.c_str());
-  TC3_CHECK(unpacked_model != nullptr);
-  TC3_CHECK(SwapFieldNamesForOffsetsInPathInActionsModel(unpacked_model.get()));
-  flatbuffers::FlatBufferBuilder builder;
-  FinishActionsModelBuffer(builder,
-                           ActionsModel::Pack(builder, unpacked_model.get()));
-  return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
-                     builder.GetSize());
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/actions/flatbuffer-utils.h b/native/actions/flatbuffer-utils.h
deleted file mode 100644
index 2479599..0000000
--- a/native/actions/flatbuffer-utils.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Utility functions for working with FlatBuffers in the actions model.
-
-#ifndef LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_
-#define LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_
-
-#include <string>
-
-#include "actions/actions_model_generated.h"
-
-namespace libtextclassifier3 {
-
-// Resolves field lookups by name to the concrete field offsets in the regex
-// rules of the model.
-bool SwapFieldNamesForOffsetsInPathInActionsModel(ActionsModelT* model);
-
-// Same as above but for a serialized model.
-std::string SwapFieldNamesForOffsetsInPathInSerializedActionsModel(
-    const std::string& model);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_

diff --git a/native/actions/grammar-actions.cc b/native/actions/grammar-actions.cc
index 94baa72..7f3e71f 100644
--- a/native/actions/grammar-actions.cc
+++ b/native/actions/grammar-actions.cc

@@ -32,17 +32,11 @@
 namespace libtextclassifier3 {
 namespace {
 
-// Represents an annotator annotated span in the grammar.
-struct AnnotationMatch : public grammar::Match {
-  static const int16 kType = 1;
-  ClassificationResult annotation;
-};
-
 class GrammarActionsCallbackDelegate : public grammar::CallbackDelegate {
  public:
   GrammarActionsCallbackDelegate(const UniLib* unilib,
                                  const RulesModel_::GrammarRules* grammar_rules)
-      : unilib_(unilib), grammar_rules_(grammar_rules) {}
+      : unilib_(*unilib), grammar_rules_(grammar_rules) {}
 
   // Handle a grammar rule match in the actions grammar.
   void MatchFound(const grammar::Match* match, grammar::CallbackId type,
@@ -52,16 +46,8 @@
         HandleRuleMatch(match, /*rule_id=*/value);
         return;
       }
-      case GrammarActions::Callback::kCapturingMatch: {
-        HandleCapturingMatch(match, /*match_id=*/value, matcher);
-        return;
-      }
-      case GrammarActions::Callback::kAssertionMatch: {
-        HandleAssertion(match, /*negative=*/(value != 0), matcher);
-        return;
-      }
       default:
-        TC3_LOG(ERROR) << "Unhandled match type: " << type;
+        grammar::CallbackDelegate::MatchFound(match, type, value, matcher);
     }
   }
 
@@ -78,8 +64,8 @@
       codepoint_offsets.push_back(it);
     }
     codepoint_offsets.push_back(message_unicode.end());
-    for (const grammar::RuleMatch& candidate :
-         grammar::DeduplicateMatches(candidates_)) {
+    for (const grammar::Derivation& candidate :
+         grammar::DeduplicateDerivations(candidates_)) {
       // Check that assertions are fulfilled.
       if (!VerifyAssertions(candidate.match)) {
         continue;
@@ -98,7 +84,7 @@
  private:
   // Handles action rule matches.
   void HandleRuleMatch(const grammar::Match* match, const int64 rule_id) {
-    candidates_.push_back(grammar::RuleMatch{match, rule_id});
+    candidates_.push_back(grammar::Derivation{match, rule_id});
   }
 
   // Instantiates action suggestions from verified and deduplicated rule matches
@@ -108,7 +94,7 @@
   bool InstantiateActionsFromMatch(
       const std::vector<UnicodeText::const_iterator>& message_codepoint_offsets,
       int message_index, const std::string& smart_reply_action_type,
-      const grammar::RuleMatch& candidate,
+      const grammar::Derivation& candidate,
       const ReflectiveFlatbufferBuilder* entity_data_builder,
       std::vector<ActionSuggestion>* result) const {
     const RulesModel_::GrammarRules_::RuleMatch* rule_match =
@@ -119,8 +105,12 @@
     }
 
     // Gather active capturing matches.
-    const std::unordered_map<uint16, const grammar::CapturingMatch*>
-        capturing_matches = GatherCapturingMatches(candidate.match);
+    std::unordered_map<uint16, const grammar::Match*> capturing_matches;
+    for (const grammar::MappingMatch* match :
+         grammar::SelectAllOfType<grammar::MappingMatch>(
+             candidate.match, grammar::Match::kMappingMatch)) {
+      capturing_matches[match->id] = match;
+    }
 
     // Instantiate actions from the rule match.
     for (const uint16 action_id : *rule_match->action_id()) {
@@ -142,7 +132,7 @@
             continue;
           }
 
-          const grammar::CapturingMatch* capturing_match = it->second;
+          const grammar::Match* capturing_match = it->second;
           StringPiece match_text = StringPiece(
               message_codepoint_offsets[capturing_match->codepoint_span.first]
                   .utf8_data(),
@@ -154,16 +144,12 @@
           UnicodeText normalized_match_text =
               NormalizeMatchText(unilib_, group, match_text);
 
-          // Set entity data.
-          if (group->entity_field() != nullptr) {
-            TC3_CHECK_NE(entity_data, nullptr);
-            if (!entity_data->ParseAndSet(
-                    group->entity_field(),
-                    normalized_match_text.ToUTF8String())) {
-              TC3_LOG(ERROR)
-                  << "Could not set entity data from rule capturing match.";
-              return false;
-            }
+          if (!MergeEntityDataFromCapturingMatch(
+                  group, normalized_match_text.ToUTF8String(),
+                  entity_data.get())) {
+            TC3_LOG(ERROR)
+                << "Could not merge entity data from a capturing match.";
+            return false;
           }
 
           // Add smart reply suggestions.
@@ -177,14 +163,14 @@
                   /*span=*/capturing_match->codepoint_span, group,
                   /*message_index=*/message_index, match_text, &annotation)) {
             if (group->use_annotation_match()) {
-              const AnnotationMatch* annotation_match =
-                  grammar::SelectFirstOfType<AnnotationMatch>(
-                      capturing_match, AnnotationMatch::kType);
+              const grammar::AnnotationMatch* annotation_match =
+                  grammar::SelectFirstOfType<grammar::AnnotationMatch>(
+                      capturing_match, grammar::Match::kAnnotationMatch);
               if (!annotation_match) {
                 TC3_LOG(ERROR) << "Could not get annotation for match.";
                 return false;
               }
-              annotation.entity = annotation_match->annotation;
+              annotation.entity = *annotation_match->annotation;
             }
             annotations.push_back(std::move(annotation));
           }
@@ -202,13 +188,13 @@
     return true;
   }
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const RulesModel_::GrammarRules* grammar_rules_;
 
   // All action rule match candidates.
   // Grammar rule matches are recorded, deduplicated, verified and then
   // instantiated.
-  std::vector<grammar::RuleMatch> candidates_;
+  std::vector<grammar::Derivation> candidates_;
 };
 }  // namespace
 
@@ -216,10 +202,10 @@
     const UniLib* unilib, const RulesModel_::GrammarRules* grammar_rules,
     const ReflectiveFlatbufferBuilder* entity_data_builder,
     const std::string& smart_reply_action_type)
-    : unilib_(unilib),
+    : unilib_(*unilib),
       grammar_rules_(grammar_rules),
       tokenizer_(CreateTokenizer(grammar_rules->tokenizer_options(), unilib)),
-      lexer_(*unilib),
+      lexer_(unilib, grammar_rules->rules()),
       entity_data_builder_(entity_data_builder),
       smart_reply_action_type_(smart_reply_action_type),
       rules_locales_(ParseRulesLocales(grammar_rules->rules())) {}
@@ -248,39 +234,17 @@
     return true;
   }
 
-  GrammarActionsCallbackDelegate callback_handler(unilib_, grammar_rules_);
-
-  std::vector<AnnotationMatch> matches;
-  if (auto annotation_nonterminals = grammar_rules_->annotation_nonterminal()) {
-    for (const AnnotatedSpan& annotation :
-         conversation.messages.back().annotations) {
-      if (annotation.classification.empty()) {
-        continue;
-      }
-      const ClassificationResult& classification =
-          annotation.classification.front();
-      if (auto entry = annotation_nonterminals->LookupByKey(
-              classification.collection.c_str())) {
-        AnnotationMatch match;
-        match.Init(entry->value(), annotation.span, annotation.span.first,
-                   AnnotationMatch::kType);
-        match.annotation = classification;
-        matches.push_back(std::move(match));
-      }
-    }
-  }
-
-  std::vector<grammar::Match*> annotation_matches(matches.size());
-  for (int i = 0; i < matches.size(); i++) {
-    annotation_matches[i] = &matches[i];
-  }
-
-  grammar::Matcher matcher(*unilib_, grammar_rules_->rules(), locale_rules,
+  GrammarActionsCallbackDelegate callback_handler(&unilib_, grammar_rules_);
+  grammar::Matcher matcher(&unilib_, grammar_rules_->rules(), locale_rules,
                            &callback_handler);
 
+  const UnicodeText text =
+      UTF8ToUnicodeText(conversation.messages.back().text, /*do_copy=*/false);
+
   // Run grammar on last message.
-  lexer_.Process(tokenizer_->Tokenize(conversation.messages.back().text),
-                 /*matches=*/annotation_matches, &matcher);
+  lexer_.Process(text, tokenizer_->Tokenize(text),
+                 /*annotations=*/&conversation.messages.back().annotations,
+                 &matcher);
 
   // Populate results.
   return callback_handler.GetActions(conversation, smart_reply_action_type_,

diff --git a/native/actions/grammar-actions.h b/native/actions/grammar-actions.h
index 16705db..fc3270d 100644
--- a/native/actions/grammar-actions.h
+++ b/native/actions/grammar-actions.h

@@ -35,23 +35,19 @@
 // Grammar backed actions suggestions.
 class GrammarActions {
  public:
-  enum class Callback : grammar::CallbackId {
-    kActionRuleMatch = 1,
-    kCapturingMatch = 2,
-    kAssertionMatch = 3,
-  };
+  enum class Callback : grammar::CallbackId { kActionRuleMatch = 1 };
 
-  GrammarActions(const UniLib* unilib,
-                 const RulesModel_::GrammarRules* grammar_rules,
-                 const ReflectiveFlatbufferBuilder* entity_data_builder,
-                 const std::string& smart_reply_action_type);
+  explicit GrammarActions(
+      const UniLib* unilib, const RulesModel_::GrammarRules* grammar_rules,
+      const ReflectiveFlatbufferBuilder* entity_data_builder,
+      const std::string& smart_reply_action_type);
 
   // Suggests actions for a conversation from a message stream.
   bool SuggestActions(const Conversation& conversation,
                       std::vector<ActionSuggestion>* result) const;
 
  private:
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const RulesModel_::GrammarRules* grammar_rules_;
   const std::unique_ptr<Tokenizer> tokenizer_;
   const grammar::Lexer lexer_;

diff --git a/native/actions/lua-actions_test.cc b/native/actions/lua-actions_test.cc
new file mode 100644
index 0000000..72cae2c
--- /dev/null
+++ b/native/actions/lua-actions_test.cc

@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/lua-actions.h"
+
+#include <map>
+#include <string>
+
+#include "actions/test-utils.h"
+#include "actions/types.h"
+#include "utils/tflite-model-executor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+
+TEST(LuaActions, SimpleAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    return {{ type = "test_action" }}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, ConversationActions) {
+  Conversation conversation;
+  conversation.messages.push_back({/*user_id=*/0, "hello there!"});
+  conversation.messages.push_back({/*user_id=*/1, "general kenobi!"});
+  const std::string test_snippet = R"(
+    local actions = {}
+    for i, message in pairs(messages) do
+      if i < #messages then
+        if message.text == "hello there!" and
+           messages[i+1].text == "general kenobi!" then
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "you are a bold one!"
+           })
+        end
+        if message.text == "i am the senate!" and
+           messages[i+1].text == "not yet!" then
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "it's treason then"
+           })
+        end
+      end
+    end
+    return actions;
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsSmartReply("you are a bold one!")));
+}
+
+TEST(LuaActions, SimpleModelAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    if #model.actions_scores == 0 then
+      return {{ type = "test_action" }}
+    end
+    return {}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, SimpleModelRepliesAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    if #model.reply == 0 then
+      return {{ type = "test_action" }}
+    end
+    return {}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, AnnotationActions) {
+  AnnotatedSpan annotation;
+  annotation.span = {11, 15};
+  annotation.classification = {ClassificationResult("address", 1.0)};
+  Conversation conversation = {{{/*user_id=*/1, "are you at home?",
+                                 /*reference_time_ms_utc=*/0,
+                                 /*reference_timezone=*/"Europe/Zurich",
+                                 /*annotations=*/{annotation},
+                                 /*locales=*/"en"}}};
+  const std::string test_snippet = R"(
+    local actions = {}
+    local last_message = messages[#messages]
+    for i, annotation in pairs(last_message.annotation) do
+      if #annotation.classification > 0 then
+        if annotation.classification[1].collection == "address" then
+           local text = string.sub(last_message.text,
+                            annotation.span["begin"] + 1,
+                            annotation.span["end"])
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "i am at " .. text,
+             annotation = {{
+               name = "location",
+               span = {
+                 text = text
+               },
+               entity = annotation.classification[1]
+             }},
+           })
+        end
+      end
+    end
+    return actions;
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsSmartReply("i am at home")));
+  EXPECT_EQ("address", actions[0].annotations[0].entity.collection);
+}
+
+TEST(LuaActions, EntityData) {
+  std::string test_schema = TestEntityDataSchema();
+  Conversation conversation = {{{/*user_id=*/1, "hello there"}}};
+  const std::string test_snippet = R"(
+    return {{
+      type = "test",
+      entity = {
+        greeting = "hello",
+        location = "there",
+        person = "Kenobi",
+      },
+    }};
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/
+                  flatbuffers::GetRoot<reflection::Schema>(test_schema.data()),
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, testing::SizeIs(1));
+  EXPECT_EQ("test", actions.front().type);
+  const flatbuffers::Table* entity =
+      flatbuffers::GetAnyRoot(reinterpret_cast<const unsigned char*>(
+          actions.front().serialized_entity_data.data()));
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/4)->str(),
+            "hello");
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/6)->str(),
+            "there");
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/8)->str(),
+            "Kenobi");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/lua-ranker_test.cc b/native/actions/lua-ranker_test.cc
new file mode 100644
index 0000000..a790042
--- /dev/null
+++ b/native/actions/lua-ranker_test.cc

@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/lua-ranker.h"
+
+#include <string>
+
+#include "actions/types.h"
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+MATCHER_P2(IsAction, type, response_text, "") {
+  return testing::Value(arg.type, type) &&
+         testing::Value(arg.response_text, response_text);
+}
+
+MATCHER_P(IsActionType, type, "") { return testing::Value(arg.type, type); }
+
+std::string TestEntitySchema() {
+  // Create fake entity data schema meta data.
+  // Cannot use object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("test"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/0,
+          /*offset=*/4)};
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("EntityData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+TEST(LuaRankingTest, PassThrough) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for i=1,#actions do
+      table.insert(result, i)
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("text_reply"),
+                                         IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, Filtering) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    return {}
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions, testing::IsEmpty());
+}
+
+TEST(LuaRankingTest, Duplication) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for i=1,#actions do
+      table.insert(result, 1)
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("text_reply"),
+                                         IsActionType("text_reply"),
+                                         IsActionType("text_reply")}));
+}
+
+TEST(LuaRankingTest, SortByScore) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    function testScoreSorter(a, b)
+      return actions[a].score < actions[b].score
+    end
+    local result = {}
+    for i=1,#actions do
+      result[i] = i
+    end
+    table.sort(result, testScoreSorter)
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("add_to_collection"),
+                                         IsActionType("share_location"),
+                                         IsActionType("text_reply")}));
+}
+
+TEST(LuaRankingTest, SuppressType) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, HandlesConversation) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    if messages[1].text ~= "hello hello" then
+      return result
+    end
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, HandlesEntityData) {
+  std::string serialized_schema = TestEntitySchema();
+  const reflection::Schema* entity_data_schema =
+      flatbuffers::GetRoot<reflection::Schema>(serialized_schema.data());
+
+  // Create test entity data.
+  ReflectiveFlatbufferBuilder builder(entity_data_schema);
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = builder.NewRoot();
+  buffer->Set("test", "value_a");
+  const std::string serialized_entity_data_a = buffer->Serialize();
+  buffer->Set("test", "value_b");
+  const std::string serialized_entity_data_b = buffer->Serialize();
+
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"", /*type=*/"test",
+       /*score=*/1.0, /*priority_score=*/1.0, /*annotations=*/{},
+       /*serialized_entity_data=*/serialized_entity_data_a},
+      {/*response_text=*/"", /*type=*/"test",
+       /*score=*/1.0, /*priority_score=*/1.0, /*annotations=*/{},
+       /*serialized_entity_data=*/serialized_entity_data_b},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type == "test" and action.test == "value_a" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, entity_data_schema,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("test")}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/ngram-model.cc b/native/actions/ngram-model.cc
index 50f912e..fb3992c 100644
--- a/native/actions/ngram-model.cc
+++ b/native/actions/ngram-model.cc

@@ -61,8 +61,8 @@
 }  // anonymous namespace
 
 std::unique_ptr<NGramModel> NGramModel::Create(
-    const NGramLinearRegressionModel* model, const Tokenizer* tokenizer,
-    const UniLib* unilib) {
+    const UniLib* unilib, const NGramLinearRegressionModel* model,
+    const Tokenizer* tokenizer) {
   if (model == nullptr) {
     return nullptr;
   }
@@ -70,11 +70,12 @@
     TC3_LOG(ERROR) << "No tokenizer options specified.";
     return nullptr;
   }
-  return std::unique_ptr<NGramModel>(new NGramModel(model, tokenizer, unilib));
+  return std::unique_ptr<NGramModel>(new NGramModel(unilib, model, tokenizer));
 }
 
-NGramModel::NGramModel(const NGramLinearRegressionModel* model,
-                       const Tokenizer* tokenizer, const UniLib* unilib)
+NGramModel::NGramModel(const UniLib* unilib,
+                       const NGramLinearRegressionModel* model,
+                       const Tokenizer* tokenizer)
     : model_(model) {
   // Create new tokenizer if options are specified, reuse feature processor
   // tokenizer otherwise.

diff --git a/native/actions/ngram-model.h b/native/actions/ngram-model.h
index da19ddb..a9072cd 100644
--- a/native/actions/ngram-model.h
+++ b/native/actions/ngram-model.h

@@ -30,8 +30,8 @@
 class NGramModel {
  public:
   static std::unique_ptr<NGramModel> Create(
-      const NGramLinearRegressionModel* model, const Tokenizer* tokenizer,
-      const UniLib* unilib);
+      const UniLib* unilib, const NGramLinearRegressionModel* model,
+      const Tokenizer* tokenizer);
 
   // Evaluates an n-gram linear regression model, and tests against the
   // threshold. Returns true in case of a positive classification. The caller
@@ -48,8 +48,8 @@
                                 int max_skips);
 
  private:
-  NGramModel(const NGramLinearRegressionModel* model,
-             const Tokenizer* tokenizer, const UniLib* unilib);
+  NGramModel(const UniLib* unilib, const NGramLinearRegressionModel* model,
+             const Tokenizer* tokenizer);
 
   // Returns the (begin,end] range of n-grams where the first hashed token
   // matches the given value.

diff --git a/native/actions/ranker_test.cc b/native/actions/ranker_test.cc
new file mode 100644
index 0000000..b52cf45
--- /dev/null
+++ b/native/actions/ranker_test.cc

@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/ranker.h"
+
+#include <string>
+
+#include "actions/types.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+MATCHER_P3(IsAction, type, response_text, score, "") {
+  return testing::Value(arg.type, type) &&
+         testing::Value(arg.response_text, response_text) &&
+         testing::Value(arg.score, score);
+}
+
+MATCHER_P(IsActionType, type, "") { return testing::Value(arg.type, type); }
+
+TEST(RankingTest, DeduplicationSmartReply) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.5}};
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("text_reply", "hello there", 1.0)}));
+}
+
+TEST(RankingTest, DeduplicationExtraData) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0, /*priority_score=*/0.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.5,
+       /*priority_score=*/0.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.6,
+       /*priority_score=*/0.0,
+       /*annotations=*/{}, /*serialized_entity_data=*/"test"},
+  };
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("text_reply", "hello there", 1.0),
+                                 // Is kept as it has different entity data.
+                                 IsAction("text_reply", "hello there", 0.6)}));
+}
+
+TEST(RankingTest, DeduplicationAnnotations) {
+  const Conversation conversation = {
+      {{/*user_id=*/1, "742 Evergreen Terrace, the number is 1-800-TESTING"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/1.0,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{37, 50},
+                       /*text=*/"1-800-TESTING"};
+    annotation.entity = ClassificationResult("phone", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("view_map", "", 1.0),
+                                         IsAction("call_phone", "", 0.5)}));
+}
+
+TEST(RankingTest, DeduplicationAnnotationsByPriorityScore) {
+  const Conversation conversation = {
+      {{/*user_id=*/1, "742 Evergreen Terrace, the number is 1-800-TESTING"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/0.6,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{37, 50},
+                       /*text=*/"1-800-TESTING"};
+    annotation.entity = ClassificationResult("phone", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray(
+          {IsAction("view_map", "",
+                    0.6),  // lower score wins, as priority score is higher
+           IsAction("call_phone", "", 0.5)}));
+}
+
+TEST(RankingTest, DeduplicatesConflictingActions) {
+  const Conversation conversation = {{{/*user_id=*/1, "code A-911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{7, 10},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 10},
+                       /*text=*/"A-911"};
+    annotation.entity = ClassificationResult("code", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"copy_code",
+                                /*score=*/1.0,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("copy_code", "", 1.0)}));
+}
+
+TEST(RankingTest, HandlesCompressedLuaScript) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+  RankingOptionsT options;
+  options.compressed_lua_ranking_script.reset(new CompressedBufferT);
+  std::unique_ptr<ZlibCompressor> compressor = ZlibCompressor::Instance();
+  compressor->Compress(test_snippet,
+                       options.compressed_lua_ranking_script.get());
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      decompressor.get(), /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(RankingTest, SuppressSmartRepliesWithAction) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply"});
+  RankingOptionsT options;
+  options.suppress_smart_replies_with_actions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("call_phone", "", 1.0)}));
+}
+
+TEST(RankingTest, GroupsActionsByAnnotations) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"add_contact",
+                                /*score=*/0.0,
+                                /*priority_score=*/0.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply",
+                              /*score=*/0.5});
+  RankingOptionsT options;
+  options.group_by_annotations = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  // The text reply should be last, even though it has a higher score than the
+  // `add_contact` action.
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("call_phone", "", 1.0),
+                                 IsAction("add_contact", "", 0.0),
+                                 IsAction("text_reply", "How are you?", 0.5)}));
+}
+
+TEST(RankingTest, SortsActionsByScore) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"add_contact",
+                                /*score=*/0.0,
+                                /*priority_score=*/0.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply",
+                              /*score=*/0.5});
+  RankingOptionsT options;
+  // Don't group by annotation.
+  options.group_by_annotations = false;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("call_phone", "", 1.0),
+                                 IsAction("text_reply", "How are you?", 0.5),
+                                 IsAction("add_contact", "", 0.0)}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/regex-actions.cc b/native/actions/regex-actions.cc
index 1866135..7d5a4b2 100644
--- a/native/actions/regex-actions.cc
+++ b/native/actions/regex-actions.cc

@@ -96,7 +96,7 @@
   for (const RulesModel_::RegexRule* rule : *rules->regex_rule()) {
     std::unique_ptr<UniLib::RegexPattern> compiled_pattern =
         UncompressMakeRegexPattern(
-            *unilib_, rule->pattern(), rule->compressed_pattern(),
+            unilib_, rule->pattern(), rule->compressed_pattern(),
             rules->lazy_regex_compilation(), decompressor);
     if (compiled_pattern == nullptr) {
       TC3_LOG(ERROR) << "Failed to load rule pattern.";
@@ -108,7 +108,7 @@
     if (rule->output_pattern() != nullptr ||
         rule->compressed_output_pattern() != nullptr) {
       compiled_output_pattern = UncompressMakeRegexPattern(
-          *unilib_, rule->output_pattern(), rule->compressed_output_pattern(),
+          unilib_, rule->output_pattern(), rule->compressed_output_pattern(),
           rules->lazy_regex_compilation(), decompressor);
       if (compiled_output_pattern == nullptr) {
         TC3_LOG(ERROR) << "Failed to load rule output pattern.";
@@ -224,15 +224,12 @@
             UnicodeText normalized_group_match_text =
                 NormalizeMatchText(unilib_, group, group_match_text.value());
 
-            if (group->entity_field() != nullptr) {
-              TC3_CHECK_NE(entity_data, nullptr);
-              if (!entity_data->ParseAndSet(
-                      group->entity_field(),
-                      normalized_group_match_text.ToUTF8String())) {
-                TC3_LOG(ERROR)
-                    << "Could not set entity data from rule capturing group.";
-                return false;
-              }
+            if (!MergeEntityDataFromCapturingMatch(
+                    group, normalized_group_match_text.ToUTF8String(),
+                    entity_data.get())) {
+              TC3_LOG(ERROR)
+                  << "Could not merge entity data from a capturing match.";
+              return false;
             }
 
             // Create a text annotation for the group span.

diff --git a/native/actions/regex-actions.h b/native/actions/regex-actions.h
index c6b9ce2..871f08b 100644
--- a/native/actions/regex-actions.h
+++ b/native/actions/regex-actions.h

@@ -32,8 +32,9 @@
 // Regular expression backed actions suggestions.
 class RegexActions {
  public:
-  RegexActions(const std::string& smart_reply_action_type, const UniLib* unilib)
-      : unilib_(unilib), smart_reply_action_type_(smart_reply_action_type) {}
+  explicit RegexActions(const UniLib* unilib,
+                        const std::string& smart_reply_action_type)
+      : unilib_(*unilib), smart_reply_action_type_(smart_reply_action_type) {}
 
   // Decompresses and initializes all rules in a model.
   bool InitializeRules(
@@ -75,7 +76,7 @@
                             ZlibDecompressor* decompressor,
                             std::vector<CompiledRule>* compiled_rules) const;
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const std::string smart_reply_action_type_;
   std::vector<CompiledRule> rules_, low_confidence_rules_;
 };

diff --git a/native/actions/test-utils.cc b/native/actions/test-utils.cc
new file mode 100644
index 0000000..9b003dd
--- /dev/null
+++ b/native/actions/test-utils.cc

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/test-utils.h"
+
+namespace libtextclassifier3 {
+
+std::string TestEntityDataSchema() {
+  // Create fake entity data schema meta data.
+  // Cannot use object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("greeting"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/0,
+          /*offset=*/4),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("location"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/1,
+          /*offset=*/6),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("person"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/2,
+          /*offset=*/8)};
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("EntityData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+void SetTestEntityDataSchema(ActionsModelT* test_model) {
+  const std::string serialized_schema = TestEntityDataSchema();
+
+  test_model->actions_entity_data_schema.assign(
+      serialized_schema.data(),
+      serialized_schema.data() + serialized_schema.size());
+}
+
+}  // namespace libtextclassifier3

diff --git a/native/actions/test-utils.h b/native/actions/test-utils.h
new file mode 100644
index 0000000..c05d6a9
--- /dev/null
+++ b/native/actions/test-utils.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_
+#define LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_
+
+#include <string>
+
+#include "actions/actions_model_generated.h"
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+
+namespace libtextclassifier3 {
+
+using testing::ExplainMatchResult;
+using testing::Value;
+
+// Create test entity data schema.
+std::string TestEntityDataSchema();
+void SetTestEntityDataSchema(ActionsModelT* test_model);
+
+MATCHER_P(IsActionOfType, type, "") { return Value(arg.type, type); }
+MATCHER_P(IsSmartReply, response_text, "") {
+  return ExplainMatchResult(IsActionOfType("text_reply"), arg,
+                            result_listener) &&
+         Value(arg.response_text, response_text);
+}
+MATCHER_P(IsSpan, span, "") {
+  return Value(arg.first, span.first) && Value(arg.second, span.second);
+}
+MATCHER_P3(IsActionSuggestionAnnotation, name, text, span, "") {
+  return Value(arg.name, name) && Value(arg.span.text, text) &&
+         ExplainMatchResult(IsSpan(span), arg.span.span, result_listener);
+}
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_

diff --git a/native/actions/utils.cc b/native/actions/utils.cc
index 510ea9d..96f6f1f 100644
--- a/native/actions/utils.cc
+++ b/native/actions/utils.cc

@@ -69,7 +69,7 @@
 }
 
 UnicodeText NormalizeMatchText(
-    const UniLib* unilib,
+    const UniLib& unilib,
     const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
     StringPiece match_text) {
   UnicodeText normalized_match_text =
@@ -102,4 +102,23 @@
   return true;
 }
 
+bool MergeEntityDataFromCapturingMatch(
+    const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
+    StringPiece match_text, ReflectiveFlatbuffer* buffer) {
+  if (group->entity_field() != nullptr) {
+    if (!buffer->ParseAndSet(group->entity_field(), match_text.ToString())) {
+      TC3_LOG(ERROR) << "Could not set entity data from rule capturing group.";
+      return false;
+    }
+  }
+  if (group->entity_data() != nullptr) {
+    if (!buffer->MergeFrom(reinterpret_cast<const flatbuffers::Table*>(
+            group->entity_data()))) {
+      TC3_LOG(ERROR) << "Could not set entity data for capturing match.";
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace libtextclassifier3

diff --git a/native/actions/utils.h b/native/actions/utils.h
index 45b9e16..820c79d 100644
--- a/native/actions/utils.h
+++ b/native/actions/utils.h

@@ -45,7 +45,7 @@
 
 // Applies normalization to a capturing match.
 UnicodeText NormalizeMatchText(
-    const UniLib* unilib,
+    const UniLib& unilib,
     const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
     StringPiece match_text);
 
@@ -56,6 +56,12 @@
     const int message_index, StringPiece match_text,
     ActionSuggestionAnnotation* annotation);
 
+// Merges entity data from a capturing match.
+// Parses and sets values from the text and merges fixed data.
+bool MergeEntityDataFromCapturingMatch(
+    const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
+    StringPiece match_text, ReflectiveFlatbuffer* buffer);
+
 }  // namespace libtextclassifier3
 
 #endif  // LIBTEXTCLASSIFIER_ACTIONS_UTILS_H_

diff --git a/native/actions/zlib-utils.cc b/native/actions/zlib-utils.cc
index cfc066f..c8ad4e7 100644
--- a/native/actions/zlib-utils.cc
+++ b/native/actions/zlib-utils.cc

@@ -54,7 +54,7 @@
       }
       if (!rule->output_pattern.empty()) {
         rule->compressed_output_pattern.reset(new CompressedBufferT);
-        zlib_compressor->Compress(rule->pattern,
+        zlib_compressor->Compress(rule->output_pattern,
                                   rule->compressed_output_pattern.get());
         rule->output_pattern.clear();
       }

diff --git a/native/actions/zlib-utils_test.cc b/native/actions/zlib-utils_test.cc
new file mode 100644
index 0000000..75e4c78
--- /dev/null
+++ b/native/actions/zlib-utils_test.cc

@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/zlib-utils.h"
+
+#include <memory>
+
+#include "actions/actions_model_generated.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+using testing::Field;
+using testing::Pointee;
+
+TEST(ActionsZlibUtilsTest, CompressModel) {
+  ActionsModelT model;
+  constexpr char kTestPattern1[] = "this is a test pattern";
+  constexpr char kTestPattern2[] = "this is a second test pattern";
+  constexpr char kTestOutputPattern[] = "this is an output pattern";
+  model.rules.reset(new RulesModelT);
+  model.rules->regex_rule.emplace_back(new RulesModel_::RegexRuleT);
+  model.rules->regex_rule.back()->pattern = kTestPattern1;
+  model.rules->regex_rule.emplace_back(new RulesModel_::RegexRuleT);
+  model.rules->regex_rule.back()->pattern = kTestPattern2;
+  model.rules->regex_rule.back()->output_pattern = kTestOutputPattern;
+
+  // Compress the model.
+  EXPECT_TRUE(CompressActionsModel(&model));
+
+  // Sanity check that uncompressed field is removed.
+  const auto is_empty_pattern =
+      Pointee(Field(&libtextclassifier3::RulesModel_::RegexRuleT::pattern,
+                    testing::IsEmpty()));
+  EXPECT_THAT(model.rules->regex_rule,
+              ElementsAre(is_empty_pattern, is_empty_pattern));
+  // Pack and load the model.
+  flatbuffers::FlatBufferBuilder builder;
+  FinishActionsModelBuffer(builder, ActionsModel::Pack(builder, &model));
+  const ActionsModel* compressed_model = GetActionsModel(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()));
+  ASSERT_TRUE(compressed_model != nullptr);
+
+  // Decompress the fields again and check that they match the original.
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  ASSERT_TRUE(decompressor != nullptr);
+  std::string uncompressed_pattern;
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->rules()->regex_rule()->Get(0)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, kTestPattern1);
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->rules()->regex_rule()->Get(1)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, kTestPattern2);
+  EXPECT_TRUE(DecompressActionsModel(&model));
+  EXPECT_EQ(model.rules->regex_rule[0]->pattern, kTestPattern1);
+  EXPECT_EQ(model.rules->regex_rule[1]->pattern, kTestPattern2);
+  EXPECT_EQ(model.rules->regex_rule[1]->output_pattern, kTestOutputPattern);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/annotator.cc b/native/annotator/annotator.cc
index 540c944..6ee983f 100644
--- a/native/annotator/annotator.cc
+++ b/native/annotator/annotator.cc

@@ -29,6 +29,8 @@
 #include "annotator/model_generated.h"
 #include "annotator/types.h"
 #include "utils/base/logging.h"
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
 #include "utils/checksum.h"
 #include "utils/i18n/locale.h"
 #include "utils/math/softmax.h"
@@ -137,8 +139,6 @@
         fb_annotation_options->enable_date_range();
     result_annotation_options.include_preposition =
         fb_annotation_options->include_preposition();
-    result_annotation_options.expand_date_series =
-        fb_annotation_options->expand_date_series();
     if (fb_annotation_options->extra_requested_dates() != nullptr) {
       for (const auto& extra_requested_date :
            *fb_annotation_options->extra_requested_dates()) {
@@ -146,11 +146,9 @@
             extra_requested_date->str());
       }
     }
-    if (fb_annotation_options->ignored_tokens() != nullptr) {
-      for (const auto& ignored_token :
-           *fb_annotation_options->ignored_tokens()) {
-        result_annotation_options.ignored_tokens.push_back(
-            ignored_token->str());
+    if (fb_annotation_options->ignored_spans() != nullptr) {
+      for (const auto& ignored_span : *fb_annotation_options->ignored_spans()) {
+        result_annotation_options.ignored_spans.push_back(ignored_span->str());
       }
     }
   }
@@ -444,10 +442,10 @@
   if (model_->grammar_datetime_model() &&
       model_->grammar_datetime_model()->datetime_rules()) {
     cfg_datetime_parser_.reset(new dates::CfgDatetimeAnnotator(
-        *unilib_,
+        unilib_,
         /*tokenizer_options=*/
         model_->grammar_datetime_model()->grammar_tokenizer_options(),
-        *calendarlib_,
+        calendarlib_,
         /*datetime_rules=*/model_->grammar_datetime_model()->datetime_rules(),
         model_->grammar_datetime_model()->target_classification_score(),
         model_->grammar_datetime_model()->priority_score()));
@@ -456,9 +454,11 @@
                         "datetime parser.";
       return;
     }
-  } else if (model_->datetime_model()) {
+  }
+
+  if (model_->datetime_model()) {
     datetime_parser_ = DatetimeParser::Instance(
-        model_->datetime_model(), *unilib_, *calendarlib_, decompressor.get());
+        model_->datetime_model(), unilib_, calendarlib_, decompressor.get());
     if (!datetime_parser_) {
       TC3_LOG(ERROR) << "Could not initialize datetime parser.";
       return;
@@ -548,6 +548,19 @@
     return;
   }
 
+  if (model_->conflict_resolution_options() != nullptr) {
+    prioritize_longest_annotation_ =
+        model_->conflict_resolution_options()->prioritize_longest_annotation();
+    do_conflict_resolution_in_raw_mode_ =
+        model_->conflict_resolution_options()
+            ->do_conflict_resolution_in_raw_mode();
+  }
+
+#ifdef TC3_EXPERIMENTAL
+  TC3_LOG(WARNING) << "Enabling experimental annotators.";
+  InitializeExperimentalAnnotators();
+#endif
+
   initialized_ = true;
 }
 
@@ -558,7 +571,7 @@
 
   // Initialize pattern recognizers.
   int regex_pattern_id = 0;
-  for (const auto& regex_pattern : *model_->regex_model()->patterns()) {
+  for (const auto regex_pattern : *model_->regex_model()->patterns()) {
     std::unique_ptr<UniLib::RegexPattern> compiled_pattern =
         UncompressMakeRegexPattern(
             *unilib_, regex_pattern->pattern(),
@@ -684,6 +697,15 @@
   return InitializePersonNameEngineFromScopedMmap(*mmap);
 }
 
+bool Annotator::InitializeExperimentalAnnotators() {
+  if (ExperimentalAnnotator::IsEnabled()) {
+    experimental_annotator_.reset(new ExperimentalAnnotator(
+        model_->experimental_model(), *selection_feature_processor_, *unilib_));
+    return true;
+  }
+  return false;
+}
+
 namespace {
 
 int CountDigits(const std::string& str, CodepointSpan selection_indices) {
@@ -816,6 +838,12 @@
     TC3_LOG(ERROR) << "Not initialized";
     return original_click_indices;
   }
+  if (options.annotation_usecase !=
+      AnnotationUsecase_ANNOTATION_USECASE_SMART) {
+    TC3_LOG(WARNING)
+        << "Invoking SuggestSelection, which is not supported in RAW mode.";
+    return original_click_indices;
+  }
   if (!(model_->enabled_modes() & ModeFlag_SELECTION)) {
     return original_click_indices;
   }
@@ -885,7 +913,8 @@
   }
   if (knowledge_engine_ != nullptr &&
       !knowledge_engine_->Chunk(context, options.annotation_usecase,
-                                options.location_context, &candidates)) {
+                                options.location_context, Permissions(),
+                                &candidates)) {
     TC3_LOG(ERROR) << "Knowledge suggest selection failed.";
     return original_click_indices;
   }
@@ -925,6 +954,11 @@
     candidates.push_back(grammar_suggested_span);
   }
 
+  if (experimental_annotator_ != nullptr) {
+    candidates.push_back(experimental_annotator_->SuggestSelection(
+        context_unicode, click_indices));
+  }
+
   // Sort candidates according to their position in the input, so that the next
   // code can assume that any connected component of overlapping spans forms a
   // contiguous block.
@@ -1065,6 +1099,12 @@
         return false;
       }
 
+      // A PERSONNAME entity does not conflict with anything.
+      if ((source_mask &
+           (1 << static_cast<int>(AnnotatedSpan::Source::PERSON_NAME)))) {
+        return false;
+      }
+
       // Entities from other sources can conflict.
       return true;
   }
@@ -1108,18 +1148,15 @@
     }
   }
 
-  const bool prioritize_longest_annotation =
-      model_->triggering_options() != nullptr &&
-      model_->triggering_options()->prioritize_longest_annotation();
-  std::sort(conflicting_indices.begin(), conflicting_indices.end(),
-            [&scores_lengths, candidates, conflicting_indices,
-             prioritize_longest_annotation](int i, int j) {
-              if (scores_lengths[i].first == scores_lengths[j].first &&
-                  prioritize_longest_annotation) {
-                return scores_lengths[i].second > scores_lengths[j].second;
-              }
-              return scores_lengths[i].first > scores_lengths[j].first;
-            });
+  std::sort(
+      conflicting_indices.begin(), conflicting_indices.end(),
+      [this, &scores_lengths, candidates, conflicting_indices](int i, int j) {
+        if (scores_lengths[i].first == scores_lengths[j].first &&
+            prioritize_longest_annotation_) {
+          return scores_lengths[i].second > scores_lengths[j].second;
+        }
+        return scores_lengths[i].first > scores_lengths[j].first;
+      });
 
   // Here we keep a set of indices that were chosen, per-source, to enable
   // effective computation.
@@ -1140,7 +1177,12 @@
         chosen_indices_for_source_ptr = &source_set_pair.second;
       }
 
-      if (DoSourcesConflict(annotation_usecase, source_set_pair.first,
+      const bool needs_conflict_resolution =
+          annotation_usecase == AnnotationUsecase_ANNOTATION_USECASE_SMART ||
+          (annotation_usecase == AnnotationUsecase_ANNOTATION_USECASE_RAW &&
+           do_conflict_resolution_in_raw_mode_);
+      if (needs_conflict_resolution &&
+          DoSourcesConflict(annotation_usecase, source_set_pair.first,
                             candidates[considered_candidate].source) &&
           DoesCandidateConflict(considered_candidate, candidates,
                                 source_set_pair.second)) {
@@ -1529,7 +1571,19 @@
     }
   }
 
-  *classification_results = {{top_collection, 1.0, scores[best_score_index]}};
+  *classification_results = {{top_collection, /*arg_score=*/1.0,
+                              /*arg_priority_score=*/scores[best_score_index]}};
+
+  // For some entities, we might want to clamp the priority score, for better
+  // conflict resolution between entities.
+  if (model_->triggering_options() != nullptr &&
+      model_->triggering_options()->collection_to_priority() != nullptr) {
+    if (auto entry =
+            model_->triggering_options()->collection_to_priority()->LookupByKey(
+                top_collection.c_str())) {
+      (*classification_results)[0].priority_score *= entry->value();
+    }
+  }
   return true;
 }
 
@@ -1628,7 +1682,7 @@
     const ClassificationOptions& options,
     std::vector<ClassificationResult>* classification_results) const {
   if (!datetime_parser_ && !cfg_datetime_parser_) {
-    return false;
+    return true;
   }
 
   const std::string selection_text =
@@ -1636,6 +1690,7 @@
           .UTF8Substring(selection_indices.first, selection_indices.second);
 
   std::vector<DatetimeParseResultSpan> datetime_spans;
+
   if (cfg_datetime_parser_) {
     if (!(model_->grammar_datetime_model()->enabled_modes() &
           ModeFlag_CLASSIFICATION)) {
@@ -1649,7 +1704,9 @@
             model_->grammar_datetime_model()->annotation_options(),
             options.reference_timezone, options.reference_time_ms_utc),
         parsed_locales, &datetime_spans);
-  } else if (datetime_parser_) {
+  }
+
+  if (datetime_parser_) {
     if (!datetime_parser_->Parse(selection_text, options.reference_time_ms_utc,
                                  options.reference_timezone, options.locales,
                                  ModeFlag_CLASSIFICATION,
@@ -1659,6 +1716,7 @@
       return false;
     }
   }
+
   for (const DatetimeParseResultSpan& datetime_span : datetime_spans) {
     // Only consider the result valid if the selection and extracted datetime
     // spans exactly match.
@@ -1688,7 +1746,12 @@
     TC3_LOG(ERROR) << "Not initialized";
     return {};
   }
-
+  if (options.annotation_usecase !=
+      AnnotationUsecase_ANNOTATION_USECASE_SMART) {
+    TC3_LOG(WARNING)
+        << "Invoking ClassifyText, which is not supported in RAW mode.";
+    return {};
+  }
   if (!(model_->enabled_modes() & ModeFlag_CLASSIFICATION)) {
     return {};
   }
@@ -1724,7 +1787,7 @@
   if (knowledge_engine_ &&
       knowledge_engine_->ClassifyText(
           context, selection_indices, options.annotation_usecase,
-          options.location_context, &knowledge_result)) {
+          options.location_context, Permissions(), &knowledge_result)) {
     candidates.push_back({selection_indices, {knowledge_result}});
     candidates.back().source = AnnotatedSpan::Source::KNOWLEDGE;
   }
@@ -1745,6 +1808,7 @@
       person_name_engine_->ClassifyText(context, selection_indices,
                                         &person_name_result)) {
     candidates.push_back({selection_indices, {person_name_result}});
+    candidates.back().source = AnnotatedSpan::Source::PERSON_NAME;
   }
 
   // Try the installed app engine.
@@ -1781,22 +1845,25 @@
     candidates.back().source = AnnotatedSpan::Source::DATETIME;
   }
 
+  const UnicodeText context_unicode =
+      UTF8ToUnicodeText(context, /*do_copy=*/false);
+
   // Try the number annotator.
   // TODO(b/126579108): Propagate error status.
   ClassificationResult number_annotator_result;
   if (number_annotator_ &&
-      number_annotator_->ClassifyText(
-          UTF8ToUnicodeText(context, /*do_copy=*/false), selection_indices,
-          options.annotation_usecase, &number_annotator_result)) {
+      number_annotator_->ClassifyText(context_unicode, selection_indices,
+                                      options.annotation_usecase,
+                                      &number_annotator_result)) {
     candidates.push_back({selection_indices, {number_annotator_result}});
   }
 
   // Try the duration annotator.
   ClassificationResult duration_annotator_result;
   if (duration_annotator_ &&
-      duration_annotator_->ClassifyText(
-          UTF8ToUnicodeText(context, /*do_copy=*/false), selection_indices,
-          options.annotation_usecase, &duration_annotator_result)) {
+      duration_annotator_->ClassifyText(context_unicode, selection_indices,
+                                        options.annotation_usecase,
+                                        &duration_annotator_result)) {
     candidates.push_back({selection_indices, {duration_annotator_result}});
     candidates.back().source = AnnotatedSpan::Source::DURATION;
   }
@@ -1804,21 +1871,27 @@
   // Try the translate annotator.
   ClassificationResult translate_annotator_result;
   if (translate_annotator_ &&
-      translate_annotator_->ClassifyText(
-          UTF8ToUnicodeText(context, /*do_copy=*/false), selection_indices,
-          options.user_familiar_language_tags, &translate_annotator_result)) {
+      translate_annotator_->ClassifyText(context_unicode, selection_indices,
+                                         options.user_familiar_language_tags,
+                                         &translate_annotator_result)) {
     candidates.push_back({selection_indices, {translate_annotator_result}});
   }
 
   // Try the grammar model.
   ClassificationResult grammar_annotator_result;
   if (grammar_annotator_ && grammar_annotator_->ClassifyText(
-                                detected_text_language_tags,
-                                UTF8ToUnicodeText(context, /*do_copy=*/false),
+                                detected_text_language_tags, context_unicode,
                                 selection_indices, &grammar_annotator_result)) {
     candidates.push_back({selection_indices, {grammar_annotator_result}});
   }
 
+  ClassificationResult experimental_annotator_result;
+  if (experimental_annotator_ &&
+      experimental_annotator_->ClassifyText(context_unicode, selection_indices,
+                                            &experimental_annotator_result)) {
+    candidates.push_back({selection_indices, {experimental_annotator_result}});
+  }
+
   // Try the ML model.
   //
   // The output of the model is considered as an exclusive 1-of-N choice. That's
@@ -2021,18 +2094,18 @@
   }
 }
 
-std::vector<AnnotatedSpan> Annotator::Annotate(
-    const std::string& context, const AnnotationOptions& options) const {
-  std::vector<AnnotatedSpan> candidates;
-
+Status Annotator::AnnotateSingleInput(
+    const std::string& context, const AnnotationOptions& options,
+    std::vector<AnnotatedSpan>* candidates) const {
   if (!(model_->enabled_modes() & ModeFlag_ANNOTATION)) {
-    return {};
+    return Status(StatusCode::UNAVAILABLE, "Model annotation was not enabled.");
   }
 
   const UnicodeText context_unicode =
       UTF8ToUnicodeText(context, /*do_copy=*/false);
   if (!context_unicode.is_valid()) {
-    return {};
+    return Status(StatusCode::INVALID_ARGUMENT,
+                  "Context string isn't valid UTF8.");
   }
 
   std::vector<Locale> detected_text_language_tags;
@@ -2045,7 +2118,9 @@
   if (!Locale::IsAnyLocaleSupported(detected_text_language_tags,
                                     model_triggering_locales_,
                                     /*default_value=*/true)) {
-    return {};
+    return Status(
+        StatusCode::UNAVAILABLE,
+        "The detected language tags are not in the supported locales.");
   }
 
   InterpreterManager interpreter_manager(selection_executor_.get(),
@@ -2054,17 +2129,15 @@
   // Annotate with the selection model.
   std::vector<Token> tokens;
   if (!ModelAnnotate(context, detected_text_language_tags, &interpreter_manager,
-                     &tokens, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run ModelAnnotate.";
-    return {};
+                     &tokens, candidates)) {
+    return Status(StatusCode::INTERNAL, "Couldn't run ModelAnnotate.");
   }
 
   // Annotate with the regular expression models.
   if (!RegexChunk(UTF8ToUnicodeText(context, /*do_copy=*/false),
-                  annotation_regex_patterns_, &candidates,
+                  annotation_regex_patterns_, candidates,
                   options.is_serialized_entity_data_enabled)) {
-    TC3_LOG(ERROR) << "Couldn't run RegexChunk.";
-    return {};
+    return Status(StatusCode::INTERNAL, "Couldn't run RegexChunk.");
   }
 
   // Annotate with the datetime model.
@@ -2075,115 +2148,109 @@
                      options.reference_time_ms_utc, options.reference_timezone,
                      options.locales, ModeFlag_ANNOTATION,
                      options.annotation_usecase,
-                     options.is_serialized_entity_data_enabled, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run DatetimeChunk.";
-    return {};
+                     options.is_serialized_entity_data_enabled, candidates)) {
+    return Status(StatusCode::INTERNAL, "Couldn't run DatetimeChunk.");
   }
 
-  // Annotate with the knowledge engine into a temporary vector.
-  std::vector<AnnotatedSpan> knowledge_candidates;
-  if (knowledge_engine_ &&
-      !knowledge_engine_->Chunk(context, options.annotation_usecase,
-                                options.location_context,
-                                &knowledge_candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run knowledge engine Chunk.";
-    return {};
-  }
-
-  AddContactMetadataToKnowledgeClassificationResults(&knowledge_candidates);
-
-  // Move the knowledge candidates to the full candidate list, and erase
-  // knowledge_candidates.
-  candidates.insert(candidates.end(),
-                    std::make_move_iterator(knowledge_candidates.begin()),
-                    std::make_move_iterator(knowledge_candidates.end()));
-  knowledge_candidates.clear();
-
   // Annotate with the contact engine.
   if (contact_engine_ &&
-      !contact_engine_->Chunk(context_unicode, tokens, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run contact engine Chunk.";
-    return {};
+      !contact_engine_->Chunk(context_unicode, tokens, candidates)) {
+    return Status(StatusCode::INTERNAL, "Couldn't run contact engine Chunk.");
   }
 
   // Annotate with the installed app engine.
   if (installed_app_engine_ &&
-      !installed_app_engine_->Chunk(context_unicode, tokens, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run installed app engine Chunk.";
-    return {};
+      !installed_app_engine_->Chunk(context_unicode, tokens, candidates)) {
+    return Status(StatusCode::INTERNAL,
+                  "Couldn't run installed app engine Chunk.");
   }
 
   // Annotate with the number annotator.
   if (number_annotator_ != nullptr &&
       !number_annotator_->FindAll(context_unicode, options.annotation_usecase,
-                                  &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run number annotator FindAll.";
-    return {};
+                                  candidates)) {
+    return Status(StatusCode::INTERNAL,
+                  "Couldn't run number annotator FindAll.");
   }
 
   // Annotate with the duration annotator.
   if (is_entity_type_enabled(Collections::Duration()) &&
       duration_annotator_ != nullptr &&
       !duration_annotator_->FindAll(context_unicode, tokens,
-                                    options.annotation_usecase, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run duration annotator FindAll.";
-    return {};
+                                    options.annotation_usecase, candidates)) {
+    return Status(StatusCode::INTERNAL,
+                  "Couldn't run duration annotator FindAll.");
   }
 
   // Annotate with the person name engine.
   if (is_entity_type_enabled(Collections::PersonName()) &&
       person_name_engine_ &&
-      !person_name_engine_->Chunk(context_unicode, tokens, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run person name engine Chunk.";
-    return {};
+      !person_name_engine_->Chunk(context_unicode, tokens, candidates)) {
+    return Status(StatusCode::INTERNAL,
+                  "Couldn't run person name engine Chunk.");
   }
 
   // Annotate with the grammar annotators.
   if (grammar_annotator_ != nullptr &&
       !grammar_annotator_->Annotate(detected_text_language_tags,
-                                    context_unicode, &candidates)) {
-    TC3_LOG(ERROR) << "Couldn't run grammar annotators.";
-    return {};
+                                    context_unicode, candidates)) {
+    return Status(StatusCode::INTERNAL, "Couldn't run grammar annotators.");
+  }
+
+  if (experimental_annotator_ != nullptr &&
+      !experimental_annotator_->Annotate(context_unicode, candidates)) {
+    return Status(StatusCode::INTERNAL, "Couldn't run experimental annotator.");
   }
 
   // Sort candidates according to their position in the input, so that the next
   // code can assume that any connected component of overlapping spans forms a
   // contiguous block.
-  std::sort(candidates.begin(), candidates.end(),
+  // Also sort them according to the end position and collection, so that the
+  // deduplication code below can assume that same spans and classifications
+  // form contiguous blocks.
+  std::sort(candidates->begin(), candidates->end(),
             [](const AnnotatedSpan& a, const AnnotatedSpan& b) {
-              return a.span.first < b.span.first;
+              if (a.span.first != b.span.first) {
+                return a.span.first < b.span.first;
+              }
+
+              if (a.span.second != b.span.second) {
+                return a.span.second < b.span.second;
+              }
+
+              return a.classification[0].collection <
+                     b.classification[0].collection;
             });
 
   std::vector<int> candidate_indices;
-  if (!ResolveConflicts(candidates, context, tokens,
+  if (!ResolveConflicts(*candidates, context, tokens,
                         detected_text_language_tags, options.annotation_usecase,
                         &interpreter_manager, &candidate_indices)) {
-    TC3_LOG(ERROR) << "Couldn't resolve conflicts.";
-    return {};
+    return Status(StatusCode::INTERNAL, "Couldn't resolve conflicts.");
   }
 
+  // Remove candidates that overlap exactly and have the same collection.
+  // This can e.g. happen for phone coming from both ML model and regex.
+  candidate_indices.erase(
+      std::unique(candidate_indices.begin(), candidate_indices.end(),
+                  [&candidates](const int a_index, const int b_index) {
+                    const AnnotatedSpan& a = (*candidates)[a_index];
+                    const AnnotatedSpan& b = (*candidates)[b_index];
+                    return a.span == b.span &&
+                           a.classification[0].collection ==
+                               b.classification[0].collection;
+                  }),
+      candidate_indices.end());
+
   std::vector<AnnotatedSpan> result;
   result.reserve(candidate_indices.size());
-  AnnotatedSpan aggregated_span;
   for (const int i : candidate_indices) {
-    if (candidates[i].span != aggregated_span.span) {
-      if (!aggregated_span.classification.empty()) {
-        result.push_back(std::move(aggregated_span));
-      }
-      aggregated_span =
-          AnnotatedSpan(candidates[i].span, /*arg_classification=*/{});
-    }
-    if (candidates[i].classification.empty() ||
-        ClassifiedAsOther(candidates[i].classification) ||
-        FilteredForAnnotation(candidates[i])) {
+    if ((*candidates)[i].classification.empty() ||
+        ClassifiedAsOther((*candidates)[i].classification) ||
+        FilteredForAnnotation((*candidates)[i])) {
       continue;
     }
-    for (ClassificationResult& classification : candidates[i].classification) {
-      aggregated_span.classification.push_back(std::move(classification));
-    }
-  }
-  if (!aggregated_span.classification.empty()) {
-    result.push_back(std::move(aggregated_span));
+    result.push_back(std::move((*candidates)[i]));
   }
 
   // We generate all candidates and remove them later (with the exception of
@@ -2196,8 +2263,80 @@
   for (AnnotatedSpan& annotated_span : result) {
     SortClassificationResults(&annotated_span.classification);
   }
+  *candidates = result;
+  return Status::OK;
+}
 
-  return result;
+StatusOr<std::vector<std::vector<AnnotatedSpan>>>
+Annotator::AnnotateStructuredInput(
+    const std::vector<InputFragment>& string_fragments,
+    const AnnotationOptions& options) const {
+  std::vector<std::vector<AnnotatedSpan>> annotation_candidates(
+      string_fragments.size());
+
+  std::vector<std::string> text_to_annotate;
+  text_to_annotate.reserve(string_fragments.size());
+  for (const auto& string_fragment : string_fragments) {
+    text_to_annotate.push_back(string_fragment.text);
+  }
+
+  // KnowledgeEngine is special, because it supports annotation of multiple
+  // fragments at once.
+  if (knowledge_engine_ &&
+      !knowledge_engine_
+           ->ChunkMultipleSpans(text_to_annotate, options.annotation_usecase,
+                                options.location_context, options.permissions,
+                                &annotation_candidates)
+           .ok()) {
+    return Status(StatusCode::INTERNAL, "Couldn't run knowledge engine Chunk.");
+  }
+  // The annotator engines shouldn't change the number of annotation vectors.
+  if (annotation_candidates.size() != text_to_annotate.size()) {
+    TC3_LOG(ERROR) << "Received " << text_to_annotate.size()
+                   << " texts to annotate but generated a different number of  "
+                      "lists of annotations:"
+                   << annotation_candidates.size();
+    return Status(StatusCode::INTERNAL,
+                  "Number of annotation candidates differs from "
+                  "number of texts to annotate.");
+  }
+
+  // Other annotators run on each fragment independently.
+  for (int i = 0; i < text_to_annotate.size(); ++i) {
+    AnnotationOptions annotation_options = options;
+    if (string_fragments[i].datetime_options.has_value()) {
+      DatetimeOptions reference_datetime =
+          string_fragments[i].datetime_options.value();
+      annotation_options.reference_time_ms_utc =
+          reference_datetime.reference_time_ms_utc;
+      annotation_options.reference_timezone =
+          reference_datetime.reference_timezone;
+    }
+
+    AddContactMetadataToKnowledgeClassificationResults(
+        &annotation_candidates[i]);
+
+    Status annotation_status = AnnotateSingleInput(
+        text_to_annotate[i], annotation_options, &annotation_candidates[i]);
+    if (!annotation_status.ok()) {
+      return annotation_status;
+    }
+  }
+  return annotation_candidates;
+}
+
+std::vector<AnnotatedSpan> Annotator::Annotate(
+    const std::string& context, const AnnotationOptions& options) const {
+  std::vector<InputFragment> string_fragments;
+  string_fragments.push_back({.text = context});
+  StatusOr<std::vector<std::vector<AnnotatedSpan>>> annotations =
+      AnnotateStructuredInput(string_fragments, options);
+  if (!annotations.ok()) {
+    TC3_LOG(ERROR) << "Returned error when calling AnnotateStructuredInput: "
+                   << annotations.status().error_message();
+    return {};
+  }
+  return annotations.ValueOrDie()[0];
 }
 
 CodepointSpan Annotator::ComputeSelectionBoundaries(
@@ -2317,7 +2456,7 @@
         // Apply normalization if specified.
         if (group->normalization_options() != nullptr) {
           normalized_group_match_text =
-              NormalizeText(unilib_, group->normalization_options(),
+              NormalizeText(*unilib_, group->normalization_options(),
                             normalized_group_match_text);
         }
 
@@ -2357,13 +2496,22 @@
       LoadAndVerifyMutableFlatbuffer<libtextclassifier3::EntityData>(
           *serialized_entity_data);
   if (data == nullptr) {
-    TC3_LOG(ERROR)
-        << "Data field is null when trying to parse Money Entity Data";
+    if (model_->version() >= 706) {
+      // This way of parsing money entity data is enabled for models newer than
+      // v706, consequently logging errors only for them (b/156634162).
+      TC3_LOG(ERROR)
+          << "Data field is null when trying to parse Money Entity Data";
+    }
     return false;
   }
   if (data->money->unnormalized_amount.empty()) {
-    TC3_LOG(ERROR) << "Data unnormalized_amount is empty when trying to parse "
-                      "Money Entity Data";
+    if (model_->version() >= 706) {
+      // This way of parsing money entity data is enabled for models newer than
+      // v706, consequently logging errors only for them (b/156634162).
+      TC3_LOG(ERROR)
+          << "Data unnormalized_amount is empty when trying to parse "
+             "Money Entity Data";
+    }
     return false;
   }
 
@@ -2454,7 +2602,11 @@
         if (regex_pattern.config->collection_name()->str() ==
             Collections::Money()) {
           if (!ParseAndFillInMoneyAmount(&serialized_entity_data)) {
-            TC3_LOG(ERROR) << "Could not parse and fill in money amount.";
+            if (model_->version() >= 706) {
+              // This way of parsing money entity data is enabled for models
+              // newer than v706 => logging errors only for them (b/156634162).
+              TC3_LOG(ERROR) << "Could not parse and fill in money amount.";
+            }
           }
         }
       }
@@ -2741,15 +2893,15 @@
             model_->grammar_datetime_model()->annotation_options(),
             reference_timezone, reference_time_ms_utc),
         parsed_locales, &datetime_spans);
-  } else if (datetime_parser_) {
+  }
+
+  if (datetime_parser_) {
     if (!datetime_parser_->Parse(context_unicode, reference_time_ms_utc,
                                  reference_timezone, locales, mode,
                                  annotation_usecase,
                                  /*anchor_start_end=*/false, &datetime_spans)) {
       return false;
     }
-  } else {
-    return true;
   }
 
   for (const DatetimeParseResultSpan& datetime_span : datetime_spans) {

diff --git a/native/annotator/annotator.h b/native/annotator/annotator.h
index 8498267..ebd762c 100644
--- a/native/annotator/annotator.h
+++ b/native/annotator/annotator.h

@@ -28,6 +28,7 @@
 #include "annotator/contact/contact-engine.h"
 #include "annotator/datetime/parser.h"
 #include "annotator/duration/duration.h"
+#include "annotator/experimental/experimental.h"
 #include "annotator/feature-processor.h"
 #include "annotator/grammar/dates/cfg-datetime-annotator.h"
 #include "annotator/grammar/grammar-annotator.h"
@@ -41,6 +42,8 @@
 #include "annotator/translate/translate.h"
 #include "annotator/types.h"
 #include "annotator/zlib-utils.h"
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
 #include "utils/flatbuffers.h"
 #include "utils/i18n/locale.h"
 #include "utils/memory/mmap.h"
@@ -158,6 +161,11 @@
   bool InitializePersonNameEngineFromFileDescriptor(int fd, int offset,
                                                     int size);
 
+  // Initializes the experimental annotators if available.
+  // Returns true if there is an implementation of experimental annotators
+  // linked in.
+  bool InitializeExperimentalAnnotators();
+
   // Sets up the lang-id instance that should be used.
   void SetLangId(const libtextclassifier3::mobile::lang_id::LangId* lang_id);
 
@@ -179,6 +187,20 @@
       const std::string& context, CodepointSpan selection_indices,
       const ClassificationOptions& options = ClassificationOptions()) const;
 
+  // Annotates the given structed input request. Models which handle the full
+  // context request will receive all the metadata they require. While models
+  // that don't use the extra context are called using only a string.
+  // For each fragment the annotations are sorted by their position in
+  // the fragment and exclude spans classified as 'other'.
+  //
+  // The number of vectors of annotated spans will match the number
+  // of input fragments. The order of annotation span vectors will match the
+  // order of input fragments. If annotation is not possible for any of the
+  // annotators, no annotation is returned.
+  StatusOr<std::vector<std::vector<AnnotatedSpan>>> AnnotateStructuredInput(
+      const std::vector<InputFragment>& string_fragments,
+      const AnnotationOptions& options = AnnotationOptions()) const;
+
   // Annotates given input text. The annotations are sorted by their position
   // in the context string and exclude spans classified as 'other'.
   std::vector<AnnotatedSpan> Annotate(
@@ -429,6 +451,15 @@
       const EnabledEntityTypes& is_entity_type_enabled,
       std::vector<AnnotatedSpan>* annotated_spans) const;
 
+  // Runs only annotators that do not support structured input. Does conflict
+  // resolution, removal of disallowed entities and sorting on both new
+  // generated candidates and passed in entities.
+  // Returns Status::Error if the annotation failed, in which case the vector of
+  // candidates should be ignored.
+  Status AnnotateSingleInput(const std::string& context,
+                             const AnnotationOptions& options,
+                             std::vector<AnnotatedSpan>* candidates) const;
+
   // Parses the money amount into whole and decimal part and fills in the
   // entity data information.
   bool ParseAndFillInMoneyAmount(std::string* serialized_entity_data) const;
@@ -460,6 +491,7 @@
   std::unique_ptr<const DurationAnnotator> duration_annotator_;
   std::unique_ptr<const PersonNameEngine> person_name_engine_;
   std::unique_ptr<const TranslateAnnotator> translate_annotator_;
+  std::unique_ptr<const ExperimentalAnnotator> experimental_annotator_;
 
   // Builder for creating extra data.
   const reflection::Schema* entity_data_schema_;
@@ -479,6 +511,14 @@
 
   // Model for language identification.
   const libtextclassifier3::mobile::lang_id::LangId* lang_id_ = nullptr;
+
+  // If true, will prioritize the longest annotation during conflict resolution.
+  bool prioritize_longest_annotation_ = false;
+
+  // If true, the annotator will perform conflict resolution between the
+  // different sub-annotators also in the RAW mode. If false, no conflict
+  // resolution will be performed in RAW mode.
+  bool do_conflict_resolution_in_raw_mode_ = true;
 };
 
 namespace internal {

diff --git a/native/annotator/annotator_jni.cc b/native/annotator/annotator_jni.cc
index 4d5b4df..3e04f7f 100644
--- a/native/annotator/annotator_jni.cc
+++ b/native/annotator/annotator_jni.cc

@@ -27,6 +27,7 @@
 #include "annotator/annotator_jni_common.h"
 #include "annotator/types.h"
 #include "utils/base/integral_types.h"
+#include "utils/base/status_macros.h"
 #include "utils/base/statusor.h"
 #include "utils/calendar/calendar.h"
 #include "utils/intents/intent-generator.h"
@@ -327,7 +328,8 @@
             datetime_parse_class_constructor, device_locales, options, context,
             selection_indices, classification_result[i],
             generate_intents && (i == 0)));
-    env->SetObjectArrayElement(results.get(), i, result.get());
+    TC3_RETURN_IF_ERROR(
+        JniHelper::SetObjectArrayElement(env, results.get(), i, result.get()));
   }
   return results;
 }
@@ -451,7 +453,9 @@
 using libtextclassifier3::ConvertIndicesUTF8ToBMP;
 using libtextclassifier3::FromJavaAnnotationOptions;
 using libtextclassifier3::FromJavaClassificationOptions;
+using libtextclassifier3::FromJavaInputFragment;
 using libtextclassifier3::FromJavaSelectionOptions;
+using libtextclassifier3::InputFragment;
 using libtextclassifier3::ToStlString;
 
 TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotator)
@@ -513,7 +517,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -532,7 +537,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -551,7 +557,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -677,10 +684,12 @@
       JniHelper::FindClass(
           env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotatedSpan"));
 
-  jmethodID result_class_constructor =
-      env->GetMethodID(result_class.get(), "<init>",
-                       "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
-                       "$ClassificationResult;)V");
+  TC3_ASSIGN_OR_RETURN_NULL(
+      jmethodID result_class_constructor,
+      JniHelper::GetMethodID(
+          env, result_class.get(), "<init>",
+          "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+          "$ClassificationResult;)V"));
 
   TC3_ASSIGN_OR_RETURN_NULL(
       ScopedLocalRef<jobjectArray> results,
@@ -701,11 +710,114 @@
                              static_cast<jint>(span_bmp.first),
                              static_cast<jint>(span_bmp.second),
                              classification_results.get()));
-    env->SetObjectArrayElement(results.get(), i, result.get());
+    if (!JniHelper::SetObjectArrayElement(env, results.get(), i, result.get())
+             .ok()) {
+      return nullptr;
+    }
   }
   return results.release();
 }
 
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME,
+               nativeAnnotateStructuredInput)
+(JNIEnv* env, jobject thiz, jlong ptr, jobjectArray jinput_fragments,
+ jobject options) {
+  if (!ptr) {
+    return nullptr;
+  }
+  const AnnotatorJniContext* model_context =
+      reinterpret_cast<AnnotatorJniContext*>(ptr);
+
+  std::vector<InputFragment> string_fragments;
+  TC3_ASSIGN_OR_RETURN_NULL(jsize input_size,
+                            JniHelper::GetArrayLength(env, jinput_fragments));
+  for (int i = 0; i < input_size; ++i) {
+    TC3_ASSIGN_OR_RETURN_NULL(
+        ScopedLocalRef<jobject> jfragment,
+        JniHelper::GetObjectArrayElement<jobject>(env, jinput_fragments, i));
+    TC3_ASSIGN_OR_RETURN_NULL(InputFragment fragment,
+                              FromJavaInputFragment(env, jfragment.get()));
+    string_fragments.push_back(std::move(fragment));
+  }
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      libtextclassifier3::AnnotationOptions annotation_options,
+      FromJavaAnnotationOptions(env, options));
+  const StatusOr<std::vector<std::vector<AnnotatedSpan>>> annotations_or =
+      model_context->model()->AnnotateStructuredInput(string_fragments,
+                                                      annotation_options);
+  if (!annotations_or.ok()) {
+    TC3_LOG(ERROR) << "Annotation of structured input failed with error: "
+                   << annotations_or.status().error_message();
+    return nullptr;
+  }
+
+  std::vector<std::vector<AnnotatedSpan>> annotations =
+      std::move(annotations_or.ValueOrDie());
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jclass> span_class,
+      JniHelper::FindClass(
+          env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotatedSpan"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      jmethodID span_class_constructor,
+      JniHelper::GetMethodID(
+          env, span_class.get(), "<init>",
+          "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+          "$ClassificationResult;)V"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jclass> span_class_array,
+      JniHelper::FindClass(env,
+                           "[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                           "$AnnotatedSpan;"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jobjectArray> results,
+      JniHelper::NewObjectArray(env, input_size, span_class_array.get()));
+
+  for (int fragment_index = 0; fragment_index < annotations.size();
+       ++fragment_index) {
+    TC3_ASSIGN_OR_RETURN_NULL(
+        ScopedLocalRef<jobjectArray> jfragmentAnnotations,
+        JniHelper::NewObjectArray(env, annotations[fragment_index].size(),
+                                  span_class.get()));
+    for (int annotation_index = 0;
+         annotation_index < annotations[fragment_index].size();
+         ++annotation_index) {
+      CodepointSpan span_bmp = ConvertIndicesUTF8ToBMP(
+          string_fragments[fragment_index].text,
+          annotations[fragment_index][annotation_index].span);
+      TC3_ASSIGN_OR_RETURN_NULL(
+          ScopedLocalRef<jobjectArray> classification_results,
+          ClassificationResultsToJObjectArray(
+              env, model_context,
+              annotations[fragment_index][annotation_index].classification));
+      TC3_ASSIGN_OR_RETURN_NULL(
+          ScopedLocalRef<jobject> single_annotation,
+          JniHelper::NewObject(env, span_class.get(), span_class_constructor,
+                               static_cast<jint>(span_bmp.first),
+                               static_cast<jint>(span_bmp.second),
+                               classification_results.get()));
+
+      if (!JniHelper::SetObjectArrayElement(env, jfragmentAnnotations.get(),
+                                            annotation_index,
+                                            single_annotation.get())
+               .ok()) {
+        return nullptr;
+      }
+    }
+
+    if (!JniHelper::SetObjectArrayElement(env, results.get(), fragment_index,
+                                          jfragmentAnnotations.get())
+             .ok()) {
+      return nullptr;
+    }
+  }
+
+  return results.release();
+}
+
 TC3_JNI_METHOD(jbyteArray, TC3_ANNOTATOR_CLASS_NAME,
                nativeLookUpKnowledgeEntity)
 (JNIEnv* env, jobject thiz, jlong ptr, jstring id) {

diff --git a/native/annotator/annotator_jni.h b/native/annotator/annotator_jni.h
index 55893a4..39a9d9a 100644
--- a/native/annotator/annotator_jni.h
+++ b/native/annotator/annotator_jni.h

@@ -68,6 +68,11 @@
  jint selection_end, jobject options, jobject app_context,
  jstring device_locales);
 
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME,
+               nativeAnnotateStructuredInput)
+(JNIEnv* env, jobject thiz, jlong ptr, jobjectArray jinput_fragments,
+ jobject options);
+
 TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeAnnotate)
 (JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options);
 

diff --git a/native/annotator/annotator_jni_common.cc b/native/annotator/annotator_jni_common.cc
index cbe70a3..de58b70 100644
--- a/native/annotator/annotator_jni_common.cc
+++ b/native/annotator/annotator_jni_common.cc

@@ -235,6 +235,24 @@
       JniHelper::CallBooleanMethod(env, joptions,
                                    is_serialized_entity_data_enabled_method));
 
+  // .hasLocationPermission()
+  TC3_ASSIGN_OR_RETURN(jmethodID has_location_permission_method,
+                       JniHelper::GetMethodID(env, options_class.get(),
+                                              "hasLocationPermission", "()Z"));
+  TC3_ASSIGN_OR_RETURN(bool has_location_permission,
+                       JniHelper::CallBooleanMethod(
+                           env, joptions, has_location_permission_method));
+
+  // .hasPersonalizationPermission()
+  TC3_ASSIGN_OR_RETURN(
+      jmethodID has_personalization_permission_method,
+      JniHelper::GetMethodID(env, options_class.get(),
+                             "hasPersonalizationPermission", "()Z"));
+  TC3_ASSIGN_OR_RETURN(
+      bool has_personalization_permission,
+      JniHelper::CallBooleanMethod(env, joptions,
+                                   has_personalization_permission_method));
+
   TC3_ASSIGN_OR_RETURN(
       AnnotationOptions annotation_options,
       FromJavaOptionsInternal<AnnotationOptions>(
@@ -244,7 +262,74 @@
                        EntityTypesFromJObject(env, entity_types.get()));
   annotation_options.is_serialized_entity_data_enabled =
       is_serialized_entity_data_enabled;
+  annotation_options.permissions.has_location_permission =
+      has_location_permission;
+  annotation_options.permissions.has_personalization_permission =
+      has_personalization_permission;
   return annotation_options;
 }
 
+StatusOr<InputFragment> FromJavaInputFragment(JNIEnv* env, jobject jfragment) {
+  if (!jfragment) {
+    return Status(StatusCode::INTERNAL, "Called with null input fragment.");
+  }
+  InputFragment fragment;
+
+  TC3_ASSIGN_OR_RETURN(
+      ScopedLocalRef<jclass> fragment_class,
+      JniHelper::FindClass(
+          env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$InputFragment"));
+
+  // .getText()
+  TC3_ASSIGN_OR_RETURN(
+      jmethodID get_text,
+      JniHelper::GetMethodID(env, fragment_class.get(), "getText",
+                             "()Ljava/lang/String;"));
+
+  TC3_ASSIGN_OR_RETURN(
+      ScopedLocalRef<jstring> text,
+      JniHelper::CallObjectMethod<jstring>(env, jfragment, get_text));
+
+  TC3_ASSIGN_OR_RETURN(fragment.text, ToStlString(env, text.get()));
+
+  // .hasDatetimeOptions()
+  TC3_ASSIGN_OR_RETURN(jmethodID has_date_time_options_method,
+                       JniHelper::GetMethodID(env, fragment_class.get(),
+                                              "hasDatetimeOptions", "()Z"));
+
+  TC3_ASSIGN_OR_RETURN(bool has_date_time_options,
+                       JniHelper::CallBooleanMethod(
+                           env, jfragment, has_date_time_options_method));
+
+  if (has_date_time_options) {
+    // .getReferenceTimeMsUtc()
+    TC3_ASSIGN_OR_RETURN(
+        jmethodID get_reference_time_method,
+        JniHelper::GetMethodID(env, fragment_class.get(),
+                               "getReferenceTimeMsUtc", "()J"));
+
+    TC3_ASSIGN_OR_RETURN(
+        int64 reference_time,
+        JniHelper::CallLongMethod(env, jfragment, get_reference_time_method));
+
+    // .getReferenceTimezone()
+    TC3_ASSIGN_OR_RETURN(
+        jmethodID get_reference_timezone_method,
+        JniHelper::GetMethodID(env, fragment_class.get(),
+                               "getReferenceTimezone", "()Ljava/lang/String;"));
+
+    TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> jreference_timezone,
+                         JniHelper::CallObjectMethod<jstring>(
+                             env, jfragment, get_reference_timezone_method));
+
+    TC3_ASSIGN_OR_RETURN(std::string reference_timezone,
+                         ToStlString(env, jreference_timezone.get()));
+
+    fragment.datetime_options =
+        DatetimeOptions{.reference_time_ms_utc = reference_time,
+                        .reference_timezone = reference_timezone};
+  }
+
+  return fragment;
+}
 }  // namespace libtextclassifier3

diff --git a/native/annotator/annotator_jni_common.h b/native/annotator/annotator_jni_common.h
index 4ad984c..cadd2fd 100644
--- a/native/annotator/annotator_jni_common.h
+++ b/native/annotator/annotator_jni_common.h

@@ -40,6 +40,8 @@
 StatusOr<AnnotationOptions> FromJavaAnnotationOptions(JNIEnv* env,
                                                       jobject joptions);
 
+StatusOr<InputFragment> FromJavaInputFragment(JNIEnv* env, jobject jfragment);
+
 }  // namespace libtextclassifier3
 
 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_COMMON_H_

diff --git a/native/annotator/annotator_jni_test.cc b/native/annotator/annotator_jni_test.cc
new file mode 100644
index 0000000..929fb59
--- /dev/null
+++ b/native/annotator/annotator_jni_test.cc

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/annotator_jni.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(Annotator, ConvertIndicesBMPUTF8) {
+  // Test boundary cases.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}),
+            std::make_pair(0, 6));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}),
+            std::make_pair(6, 12));
+
+  // Simple example where the longer character is before the selection.
+  //  character 😁 is 0x1f601
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hello World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hello World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before and in selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hell😁 World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hell😁 World.", {2, 7}),
+            std::make_pair(3, 9));
+
+  // Longer character is before and after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hello😁World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hello😁World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before in after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hell😁😁World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hell😁😁World.", {2, 7}),
+            std::make_pair(3, 9));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/cached-features_test.cc b/native/annotator/cached-features_test.cc
new file mode 100644
index 0000000..702f3ca
--- /dev/null
+++ b/native/annotator/cached-features_test.cc

@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/cached-features.h"
+
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::ElementsAreArray;
+using testing::FloatEq;
+using testing::Matcher;
+
+namespace libtextclassifier3 {
+namespace {
+
+Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
+  std::vector<Matcher<float>> matchers;
+  for (const float value : values) {
+    matchers.push_back(FloatEq(value));
+  }
+  return ElementsAreArray(matchers);
+}
+
+std::unique_ptr<std::vector<float>> MakeFeatures(int num_tokens) {
+  std::unique_ptr<std::vector<float>> features(new std::vector<float>());
+  for (int i = 1; i <= num_tokens; ++i) {
+    features->push_back(i * 11.0f);
+    features->push_back(-i * 11.0f);
+    features->push_back(i * 0.1f);
+  }
+  return features;
+}
+
+std::vector<float> GetCachedClickContextFeatures(
+    const CachedFeatures& cached_features, int click_pos) {
+  std::vector<float> output_features;
+  cached_features.AppendClickContextFeaturesForClick(click_pos,
+                                                     &output_features);
+  return output_features;
+}
+
+std::vector<float> GetCachedBoundsSensitiveFeatures(
+    const CachedFeatures& cached_features, TokenSpan selected_span) {
+  std::vector<float> output_features;
+  cached_features.AppendBoundsSensitiveFeaturesForSpan(selected_span,
+                                                       &output_features);
+  return output_features;
+}
+
+TEST(CachedFeaturesTest, ClickContext) {
+  FeatureProcessorOptionsT options;
+  options.context_size = 2;
+  options.feature_version = 1;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(CreateFeatureProcessorOptions(builder, &options));
+  flatbuffers::DetachedBuffer options_fb = builder.Release();
+
+  std::unique_ptr<std::vector<float>> features = MakeFeatures(9);
+  std::unique_ptr<std::vector<float>> padding_features(
+      new std::vector<float>{112233.0, -112233.0, 321.0});
+
+  const std::unique_ptr<CachedFeatures> cached_features =
+      CachedFeatures::Create(
+          {3, 10}, std::move(features), std::move(padding_features),
+          flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+          /*feature_vector_size=*/3);
+  ASSERT_TRUE(cached_features);
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 5),
+              ElementsAreFloat({11.0, -11.0, 0.1, 22.0, -22.0, 0.2, 33.0, -33.0,
+                                0.3, 44.0, -44.0, 0.4, 55.0, -55.0, 0.5}));
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 6),
+              ElementsAreFloat({22.0, -22.0, 0.2, 33.0, -33.0, 0.3, 44.0, -44.0,
+                                0.4, 55.0, -55.0, 0.5, 66.0, -66.0, 0.6}));
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 7),
+              ElementsAreFloat({33.0, -33.0, 0.3, 44.0, -44.0, 0.4, 55.0, -55.0,
+                                0.5, 66.0, -66.0, 0.6, 77.0, -77.0, 0.7}));
+}
+
+TEST(CachedFeaturesTest, BoundsSensitive) {
+  std::unique_ptr<FeatureProcessorOptions_::BoundsSensitiveFeaturesT> config(
+      new FeatureProcessorOptions_::BoundsSensitiveFeaturesT());
+  config->enabled = true;
+  config->num_tokens_before = 2;
+  config->num_tokens_inside_left = 2;
+  config->num_tokens_inside_right = 2;
+  config->num_tokens_after = 2;
+  config->include_inside_bag = true;
+  config->include_inside_length = true;
+  FeatureProcessorOptionsT options;
+  options.bounds_sensitive_features = std::move(config);
+  options.feature_version = 2;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(CreateFeatureProcessorOptions(builder, &options));
+  flatbuffers::DetachedBuffer options_fb = builder.Release();
+
+  std::unique_ptr<std::vector<float>> features = MakeFeatures(9);
+  std::unique_ptr<std::vector<float>> padding_features(
+      new std::vector<float>{112233.0, -112233.0, 321.0});
+
+  const std::unique_ptr<CachedFeatures> cached_features =
+      CachedFeatures::Create(
+          {3, 9}, std::move(features), std::move(padding_features),
+          flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+          /*feature_vector_size=*/3);
+  ASSERT_TRUE(cached_features);
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {5, 8}),
+      ElementsAreFloat({11.0,     -11.0,     0.1,   22.0,  -22.0, 0.2,   33.0,
+                        -33.0,    0.3,       44.0,  -44.0, 0.4,   44.0,  -44.0,
+                        0.4,      55.0,      -55.0, 0.5,   66.0,  -66.0, 0.6,
+                        112233.0, -112233.0, 321.0, 44.0,  -44.0, 0.4,   3.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {5, 7}),
+      ElementsAreFloat({11.0,  -11.0, 0.1,   22.0,  -22.0, 0.2,   33.0,
+                        -33.0, 0.3,   44.0,  -44.0, 0.4,   33.0,  -33.0,
+                        0.3,   44.0,  -44.0, 0.4,   55.0,  -55.0, 0.5,
+                        66.0,  -66.0, 0.6,   38.5,  -38.5, 0.35,  2.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {6, 8}),
+      ElementsAreFloat({22.0,     -22.0,     0.2,   33.0,  -33.0, 0.3,   44.0,
+                        -44.0,    0.4,       55.0,  -55.0, 0.5,   44.0,  -44.0,
+                        0.4,      55.0,      -55.0, 0.5,   66.0,  -66.0, 0.6,
+                        112233.0, -112233.0, 321.0, 49.5,  -49.5, 0.45,  2.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {6, 7}),
+      ElementsAreFloat({22.0,     -22.0,     0.2,   33.0,     -33.0,     0.3,
+                        44.0,     -44.0,     0.4,   112233.0, -112233.0, 321.0,
+                        112233.0, -112233.0, 321.0, 44.0,     -44.0,     0.4,
+                        55.0,     -55.0,     0.5,   66.0,     -66.0,     0.6,
+                        44.0,     -44.0,     0.4,   1.0}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/collections.h b/native/annotator/collections.h
index 2718bae..417b447 100644
--- a/native/annotator/collections.h
+++ b/native/annotator/collections.h

@@ -139,6 +139,11 @@
         *[]() { return new std::string("url"); }();
     return value;
   }
+  static const std::string& OtpCode() {
+    static const std::string& value =
+        *[]() { return new std::string("otp_code"); }();
+    return value;
+  }
 };
 
 }  // namespace libtextclassifier3

diff --git a/native/annotator/datetime/extractor.cc b/native/annotator/datetime/extractor.cc
index 9c2c919..b8e1b7a 100644
--- a/native/annotator/datetime/extractor.cc
+++ b/native/annotator/datetime/extractor.cc

@@ -341,7 +341,7 @@
   int running_value = -1;
   // Simple math to make sure we handle written numerical modifiers correctly
   // so that :="fifty one  thousand and one" maps to 51001 and not 50 1 1000 1.
-  for (const std::pair<int, int> position_number_pair : found_numbers) {
+  for (const std::pair<int, int>& position_number_pair : found_numbers) {
     if (running_value >= 0) {
       if (running_value > position_number_pair.second) {
         sum += running_value;
@@ -473,6 +473,7 @@
                       {DatetimeExtractorType_NEXT, 1},
                       {DatetimeExtractorType_NEXT_OR_SAME, 1},
                       {DatetimeExtractorType_LAST, -1},
+                      {DatetimeExtractorType_PAST, -1},
                   },
                   relative_count);
 }

diff --git a/native/annotator/datetime/extractor.h b/native/annotator/datetime/extractor.h
index 097dd95..0f92b2a 100644
--- a/native/annotator/datetime/extractor.h
+++ b/native/annotator/datetime/extractor.h

@@ -44,9 +44,9 @@
 // (DateParseDate) from the current match of the passed RegexMatcher.
 class DatetimeExtractor {
  public:
-  DatetimeExtractor(
+  explicit DatetimeExtractor(
       const CompiledRule& rule, const UniLib::RegexMatcher& matcher,
-      int locale_id, const UniLib& unilib,
+      int locale_id, const UniLib* unilib,
       const std::vector<std::unique_ptr<const UniLib::RegexPattern>>&
           extractor_rules,
       const std::unordered_map<DatetimeExtractorType,
@@ -55,7 +55,7 @@
       : rule_(rule),
         matcher_(matcher),
         locale_id_(locale_id),
-        unilib_(unilib),
+        unilib_(*unilib),
         rules_(extractor_rules),
         type_and_locale_to_rule_(type_and_locale_to_extractor_rule) {}
   bool Extract(DatetimeParsedData* result, CodepointSpan* result_span) const;

diff --git a/native/annotator/datetime/parser.cc b/native/annotator/datetime/parser.cc
index a8305a0..72fd3ab 100644
--- a/native/annotator/datetime/parser.cc
+++ b/native/annotator/datetime/parser.cc

@@ -28,8 +28,8 @@
 
 namespace libtextclassifier3 {
 std::unique_ptr<DatetimeParser> DatetimeParser::Instance(
-    const DatetimeModel* model, const UniLib& unilib,
-    const CalendarLib& calendarlib, ZlibDecompressor* decompressor) {
+    const DatetimeModel* model, const UniLib* unilib,
+    const CalendarLib* calendarlib, ZlibDecompressor* decompressor) {
   std::unique_ptr<DatetimeParser> result(
       new DatetimeParser(model, unilib, calendarlib, decompressor));
   if (!result->initialized_) {
@@ -38,10 +38,10 @@
   return result;
 }
 
-DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
-                               const CalendarLib& calendarlib,
+DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib* unilib,
+                               const CalendarLib* calendarlib,
                                ZlibDecompressor* decompressor)
-    : unilib_(unilib), calendarlib_(calendarlib) {
+    : unilib_(*unilib), calendarlib_(*calendarlib) {
   initialized_ = false;
 
   if (model == nullptr) {
@@ -54,7 +54,7 @@
         for (const DatetimeModelPattern_::Regex* regex : *pattern->regexes()) {
           std::unique_ptr<UniLib::RegexPattern> regex_pattern =
               UncompressMakeRegexPattern(
-                  unilib, regex->pattern(), regex->compressed_pattern(),
+                  unilib_, regex->pattern(), regex->compressed_pattern(),
                   model->lazy_regex_compilation(), decompressor);
           if (!regex_pattern) {
             TC3_LOG(ERROR) << "Couldn't create rule pattern.";
@@ -75,7 +75,7 @@
     for (const DatetimeModelExtractor* extractor : *model->extractors()) {
       std::unique_ptr<UniLib::RegexPattern> regex_pattern =
           UncompressMakeRegexPattern(
-              unilib, extractor->pattern(), extractor->compressed_pattern(),
+              unilib_, extractor->pattern(), extractor->compressed_pattern(),
               model->lazy_regex_compilation(), decompressor);
       if (!regex_pattern) {
         TC3_LOG(ERROR) << "Couldn't create extractor pattern";
@@ -357,7 +357,7 @@
                                      std::vector<DatetimeParseResult>* results,
                                      CodepointSpan* result_span) const {
   DatetimeParsedData parse;
-  DatetimeExtractor extractor(rule, matcher, locale_id, unilib_,
+  DatetimeExtractor extractor(rule, matcher, locale_id, &unilib_,
                               extractor_rules_,
                               type_and_locale_to_extractor_rule_);
   if (!extractor.Extract(&parse, result_span)) {

diff --git a/native/annotator/datetime/parser.h b/native/annotator/datetime/parser.h
index 2b8b615..8b58388 100644
--- a/native/annotator/datetime/parser.h
+++ b/native/annotator/datetime/parser.h

@@ -39,8 +39,8 @@
 class DatetimeParser {
  public:
   static std::unique_ptr<DatetimeParser> Instance(
-      const DatetimeModel* model, const UniLib& unilib,
-      const CalendarLib& calendarlib, ZlibDecompressor* decompressor);
+      const DatetimeModel* model, const UniLib* unilib,
+      const CalendarLib* calendarlib, ZlibDecompressor* decompressor);
 
   // Parses the dates in 'input' and fills result. Makes sure that the results
   // do not overlap.
@@ -60,9 +60,9 @@
              std::vector<DatetimeParseResultSpan>* results) const;
 
  protected:
-  DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
-                 const CalendarLib& calendarlib,
-                 ZlibDecompressor* decompressor);
+  explicit DatetimeParser(const DatetimeModel* model, const UniLib* unilib,
+                          const CalendarLib* calendarlib,
+                          ZlibDecompressor* decompressor);
 
   // Returns a list of locale ids for given locale spec string (comma-separated
   // locale names). Assigns the first parsed locale to reference_locale.

diff --git a/native/annotator/duration/duration_test.cc b/native/annotator/duration/duration_test.cc
new file mode 100644
index 0000000..a0985a2
--- /dev/null
+++ b/native/annotator/duration/duration_test.cc

@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/duration/duration.h"
+
+#include <string>
+#include <vector>
+
+#include "annotator/collections.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
+#include "annotator/types.h"
+#include "utils/test-utils.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::AllOf;
+using testing::ElementsAre;
+using testing::Field;
+using testing::IsEmpty;
+
+const DurationAnnotatorOptions* TestingDurationAnnotatorOptions() {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    DurationAnnotatorOptionsT options;
+    options.enabled = true;
+
+    options.week_expressions.push_back("week");
+    options.week_expressions.push_back("weeks");
+
+    options.day_expressions.push_back("day");
+    options.day_expressions.push_back("days");
+
+    options.hour_expressions.push_back("hour");
+    options.hour_expressions.push_back("hours");
+
+    options.minute_expressions.push_back("minute");
+    options.minute_expressions.push_back("minutes");
+
+    options.second_expressions.push_back("second");
+    options.second_expressions.push_back("seconds");
+
+    options.filler_expressions.push_back("and");
+    options.filler_expressions.push_back("a");
+    options.filler_expressions.push_back("an");
+    options.filler_expressions.push_back("one");
+
+    options.half_expressions.push_back("half");
+
+    options.sub_token_separator_codepoints.push_back('-');
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(DurationAnnotatorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  return flatbuffers::GetRoot<DurationAnnotatorOptions>(options_data->data());
+}
+
+std::unique_ptr<FeatureProcessor> BuildFeatureProcessor(const UniLib* unilib) {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    FeatureProcessorOptionsT options;
+    options.context_size = 1;
+    options.max_selection_span = 1;
+    options.snap_label_span_boundaries_to_containing_tokens = false;
+    options.ignored_span_boundary_codepoints.push_back(',');
+
+    options.tokenization_codepoint_config.emplace_back(
+        new TokenizationCodepointRangeT());
+    auto& config = options.tokenization_codepoint_config.back();
+    config->start = 32;
+    config->end = 33;
+    config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(FeatureProcessorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  const FeatureProcessorOptions* feature_processor_options =
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_data->data());
+
+  return std::unique_ptr<FeatureProcessor>(
+      new FeatureProcessor(feature_processor_options, unilib));
+}
+
+class DurationAnnotatorTest : public ::testing::Test {
+ protected:
+  DurationAnnotatorTest()
+      : INIT_UNILIB_FOR_TESTING(unilib_),
+        feature_processor_(BuildFeatureProcessor(&unilib_)),
+        duration_annotator_(TestingDurationAnnotatorOptions(),
+                            feature_processor_.get(), &unilib_) {}
+
+  std::vector<Token> Tokenize(const UnicodeText& text) {
+    return feature_processor_->Tokenize(text);
+  }
+
+  UniLib unilib_;
+  std::unique_ptr<FeatureProcessor> feature_processor_;
+  DurationAnnotator duration_annotator_;
+};
+
+TEST_F(DurationAnnotatorTest, ClassifiesSimpleDuration) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Wake me up in 15 minutes ok?"), {14, 24},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms, 15 * 60 * 1000)));
+}
+
+TEST_F(DurationAnnotatorTest, ClassifiesWhenTokensDontAlignWithSelection) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Wake me up in15 minutesok?"), {13, 23},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms, 15 * 60 * 1000)));
+}
+
+TEST_F(DurationAnnotatorTest, DoNotClassifyWhenInputIsInvalid) {
+  ClassificationResult classification;
+  EXPECT_FALSE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Weird space"), {5, 6},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+}
+
+TEST_F(DurationAnnotatorTest, FindsSimpleDuration) {
+  const UnicodeText text = UTF8ToUnicodeText("Wake me up in 15 minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 24)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                15 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsDurationWithHalfExpression) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 and half minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsComposedDuration) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Wake me up in 3 hours and 5 seconds ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 35)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3 * 60 * 60 * 1000 + 5 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, AllUnitsAreCovered) {
+  const UnicodeText text = UTF8ToUnicodeText(
+      "See you in a week and a day and an hour and a minute and a second");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(13, 65)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                7 * 24 * 60 * 60 * 1000 + 24 * 60 * 60 * 1000 +
+                                    60 * 60 * 1000 + 60 * 1000 + 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsHalfAnHour) {
+  const UnicodeText text = UTF8ToUnicodeText("Set a timer for half an hour");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 28)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                0.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsWhenHalfIsAfterGranularitySpecification) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 1 hour and a half");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 33)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                1.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsAnHourAndAHalf) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for an hour and a half");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(19, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                1.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       FindsCorrectlyWhenSecondsComeSecondAndDontHaveNumber) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 10 minutes and a second ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 39)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 1 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, DoesNotGreedilyTakeFillerWords) {
+  const UnicodeText text = UTF8ToUnicodeText(
+      "Set a timer for a a a 10 minutes and 2 seconds an and an ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(22, 46)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 2 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, DoesNotCrashWhenJustHalfIsSaid) {
+  const UnicodeText text = UTF8ToUnicodeText("Set a timer for half ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  ASSERT_EQ(result.size(), 0);
+}
+
+TEST_F(DurationAnnotatorTest, StripsPunctuationFromTokens) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 10 ,minutes, ,and, ,2, seconds, ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 46)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 2 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsCorrectlyWithCombinedQuantityUnitToken) {
+  const UnicodeText text = UTF8ToUnicodeText("Show 5-minute timer.");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(5, 13)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       DoesNotIntOverflowWithDurationThatHasMoreThanInt32Millis) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("1400 hours"), {0, 10},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms,
+                          1400LL * 60LL * 60LL * 1000LL)));
+}
+
+TEST_F(DurationAnnotatorTest, FindsSimpleDurationIgnoringCase) {
+  const UnicodeText text = UTF8ToUnicodeText("Wake me up in 15 MiNuTeS ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 24)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                15 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsDurationWithHalfExpressionIgnoringCase) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 and HaLf minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       FindsDurationWithHalfExpressionIgnoringFillerWordCase) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 AnD half minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, CorrectlyAnnotatesSpanWithDanglingQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("20 minutes 10");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  // TODO(b/144752747) Include test for duration_ms.
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 13)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(Field(&ClassificationResult::collection,
+                                              "duration")))))));
+}
+
+const DurationAnnotatorOptions* TestingJapaneseDurationAnnotatorOptions() {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    DurationAnnotatorOptionsT options;
+    options.enabled = true;
+
+    options.week_expressions.push_back("週間");
+
+    options.day_expressions.push_back("日間");
+
+    options.hour_expressions.push_back("時間");
+
+    options.minute_expressions.push_back("分");
+    options.minute_expressions.push_back("分間");
+
+    options.second_expressions.push_back("秒");
+    options.second_expressions.push_back("秒間");
+
+    options.half_expressions.push_back("半");
+
+    options.require_quantity = true;
+    options.enable_dangling_quantity_interpretation = false;
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(DurationAnnotatorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  return flatbuffers::GetRoot<DurationAnnotatorOptions>(options_data->data());
+}
+
+class JapaneseDurationAnnotatorTest : public ::testing::Test {
+ protected:
+  JapaneseDurationAnnotatorTest()
+      : INIT_UNILIB_FOR_TESTING(unilib_),
+        feature_processor_(BuildFeatureProcessor(&unilib_)),
+        duration_annotator_(TestingJapaneseDurationAnnotatorOptions(),
+                            feature_processor_.get(), &unilib_) {}
+
+  std::vector<Token> Tokenize(const UnicodeText& text) {
+    return feature_processor_->Tokenize(text);
+  }
+
+  UniLib unilib_;
+  std::unique_ptr<FeatureProcessor> feature_processor_;
+  DurationAnnotator duration_annotator_;
+};
+
+TEST_F(JapaneseDurationAnnotatorTest, FindsDuration) {
+  const UnicodeText text = UTF8ToUnicodeText("10 分 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 4)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000)))))));
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, FindsDurationWithHalfExpression) {
+  const UnicodeText text = UTF8ToUnicodeText("2 分 半 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 5)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                2.5 * 60 * 1000)))))));
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, IgnoresDurationWithoutQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("分 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(result, IsEmpty());
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, IgnoresDanglingQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("2 分 10 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 3)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                2 * 60 * 1000)))))));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/experimental/experimental-dummy.h b/native/annotator/experimental/experimental-dummy.h
new file mode 100644
index 0000000..389aae1
--- /dev/null
+++ b/native/annotator/experimental/experimental-dummy.h

@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_DUMMY_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_DUMMY_H_
+
+#include <string>
+#include <vector>
+
+#include "annotator/feature-processor.h"
+#include "annotator/types.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+
+namespace libtextclassifier3 {
+
+class ExperimentalAnnotator {
+ public:
+  // This is the dummy implementation of ExperimentalAnnotator and so it's
+  // always disabled;
+  static constexpr bool IsEnabled() { return false; }
+
+  explicit ExperimentalAnnotator(const ExperimentalModel* model,
+                                 const FeatureProcessor& feature_processor,
+                                 const UniLib& unilib) {}
+
+  bool Annotate(const UnicodeText& context,
+                std::vector<AnnotatedSpan>* candidates) const {
+    return false;
+  }
+
+  AnnotatedSpan SuggestSelection(const UnicodeText& context,
+                                 CodepointSpan click) const {
+    return {click, {}};
+  }
+
+  bool ClassifyText(const UnicodeText& context, CodepointSpan click,
+                    ClassificationResult* result) const {
+    return false;
+  }
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_DUMMY_H_

diff --git a/native/annotator/experimental/experimental.fbs b/native/annotator/experimental/experimental.fbs
new file mode 100755
index 0000000..6e15d04
--- /dev/null
+++ b/native/annotator/experimental/experimental.fbs

@@ -0,0 +1,20 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+namespace libtextclassifier3;
+table ExperimentalModel {
+}
+

diff --git a/native/annotator/experimental/experimental.h b/native/annotator/experimental/experimental.h
new file mode 100644
index 0000000..8144996
--- /dev/null
+++ b/native/annotator/experimental/experimental.h

@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_H_
+
+#include "annotator/experimental/experimental-dummy.h"
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_EXPERIMENTAL_EXPERIMENTAL_H_

diff --git a/native/annotator/feature-processor.cc b/native/annotator/feature-processor.cc
index 1d3b8f5..8d08574 100644
--- a/native/annotator/feature-processor.cc
+++ b/native/annotator/feature-processor.cc

@@ -661,6 +661,10 @@
       ++num_total;
     }
   }
+  // Avoid division by zero.
+  if (num_total == 0) {
+    return 0.0;
+  }
   return static_cast<float>(num_supported) / static_cast<float>(num_total);
 }
 

diff --git a/native/annotator/feature-processor.h b/native/annotator/feature-processor.h
index 2245b66..78dbbce 100644
--- a/native/annotator/feature-processor.h
+++ b/native/annotator/feature-processor.h

@@ -91,9 +91,10 @@
   // identical.
   typedef std::map<CodepointSpan, std::vector<float>> EmbeddingCache;
 
-  FeatureProcessor(const FeatureProcessorOptions* options, const UniLib* unilib)
+  explicit FeatureProcessor(const FeatureProcessorOptions* options,
+                            const UniLib* unilib)
       : feature_extractor_(internal::BuildTokenFeatureExtractorOptions(options),
-                           *unilib),
+                           unilib),
         options_(options),
         tokenizer_(internal::BuildTokenizer(options, unilib)) {
     MakeLabelMaps();

diff --git a/native/annotator/flatbuffer-utils.cc b/native/annotator/flatbuffer-utils.cc
deleted file mode 100644
index d83d2bb..0000000
--- a/native/annotator/flatbuffer-utils.cc
+++ /dev/null

@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "annotator/flatbuffer-utils.h"
-
-#include <memory>
-
-#include "utils/base/logging.h"
-#include "utils/flatbuffers.h"
-#include "flatbuffers/reflection.h"
-
-namespace libtextclassifier3 {
-
-bool SwapFieldNamesForOffsetsInPath(ModelT* model) {
-  if (model->regex_model == nullptr || model->entity_data_schema.empty()) {
-    // Nothing to do.
-    return true;
-  }
-  const reflection::Schema* schema =
-      LoadAndVerifyFlatbuffer<reflection::Schema>(
-          model->entity_data_schema.data(), model->entity_data_schema.size());
-
-  for (std::unique_ptr<RegexModel_::PatternT>& pattern :
-       model->regex_model->patterns) {
-    for (std::unique_ptr<CapturingGroupT>& group : pattern->capturing_group) {
-      if (group->entity_field_path == nullptr) {
-        continue;
-      }
-
-      if (!SwapFieldNamesForOffsetsInPath(schema,
-                                          group->entity_field_path.get())) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::string SwapFieldNamesForOffsetsInPathInSerializedModel(
-    const std::string& model) {
-  std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str());
-  TC3_CHECK(unpacked_model != nullptr);
-  TC3_CHECK(SwapFieldNamesForOffsetsInPath(unpacked_model.get()));
-  flatbuffers::FlatBufferBuilder builder;
-  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
-  return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
-                     builder.GetSize());
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/annotator/flatbuffer-utils.h b/native/annotator/flatbuffer-utils.h
deleted file mode 100644
index a7e5d64..0000000
--- a/native/annotator/flatbuffer-utils.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Utility functions for working with FlatBuffers in the annotator model.
-
-#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_
-#define LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_
-
-#include <string>
-
-#include "annotator/model_generated.h"
-
-namespace libtextclassifier3 {
-
-// Resolves field lookups by name to the concrete field offsets in the regex
-// rules of the model.
-bool SwapFieldNamesForOffsetsInPath(ModelT* model);
-
-// Same as above but for a serialized model.
-std::string SwapFieldNamesForOffsetsInPathInSerializedModel(
-    const std::string& model);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_

diff --git a/native/annotator/grammar/dates/annotations/annotation-options.h b/native/annotator/grammar/dates/annotations/annotation-options.h
index 7edab3a..29e9939 100755
--- a/native/annotator/grammar/dates/annotations/annotation-options.h
+++ b/native/annotator/grammar/dates/annotations/annotation-options.h

@@ -59,16 +59,6 @@
   //     instance: "Monday" and "6pm".
   bool enable_date_range;
 
-  // If enabled, expand a date series. Must have date_range enabled to be used.
-  // The date range cannot exceed 30 days.
-  //   input: April 4-6, 6:30pm
-  //     If the flag is true, the extracted annotation will contaly 3 instance
-  //     which are April 4 at 6:30pm, April 5 at 6:30pm and April 6 at 6:30pm
-  //     all have the same begin and end annotation
-  //     If the flag is false, the extracted annotation contains one time range
-  //     instance and one date instance
-  bool expand_date_series;
-
   // Timezone in which the input text was written
   std::string reference_timezone;
   // Localization params.
@@ -87,9 +77,9 @@
   // e.g. '9:45' will be resolved to '9:45 AM' and '9:45 PM'.
   bool generate_alternative_interpretations_when_ambiguous;
 
-  // List the ignored tokens in the date string e.g. 12 March @12PM, here '@'
+  // List the ignored span in the date string e.g. 12 March @12PM, here '@'
   // can be ignored tokens.
-  std::vector<std::string> ignored_tokens;
+  std::vector<std::string> ignored_spans;
 
   // Default Constructor
   DateAnnotationOptions()
@@ -98,7 +88,6 @@
         include_preposition(false),
         base_timestamp_millis(0),
         enable_date_range(false),
-        expand_date_series(false),
         use_rule_priority_score(false),
         generate_alternative_interpretations_when_ambiguous(false) {}
 };

diff --git a/native/annotator/grammar/dates/annotations/annotation-util_test.cc b/native/annotator/grammar/dates/annotations/annotation-util_test.cc
new file mode 100644
index 0000000..6d25d64
--- /dev/null
+++ b/native/annotator/grammar/dates/annotations/annotation-util_test.cc

@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/grammar/dates/annotations/annotation-util.h"
+
+#include "annotator/grammar/dates/annotations/annotation.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(AnnotationUtilTest, VerifyIntFunctions) {
+  Annotation annotation;
+
+  int index_key1 = AddIntProperty("key1", 1, &annotation);
+  int index_key2 = AddIntProperty("key2", 2, &annotation);
+
+  static const int kValuesKey3[] = {3, 4, 5};
+  int index_key3 =
+      AddRepeatedIntProperty("key3", kValuesKey3, /*size=*/3, &annotation);
+
+  EXPECT_EQ(2, GetIntProperty("key2", annotation));
+  EXPECT_EQ(1, GetIntProperty("key1", annotation));
+
+  EXPECT_EQ(index_key1, GetPropertyIndex("key1", annotation));
+  EXPECT_EQ(index_key2, GetPropertyIndex("key2", annotation));
+  EXPECT_EQ(index_key3, GetPropertyIndex("key3", annotation));
+  EXPECT_EQ(-1, GetPropertyIndex("invalid_key", annotation));
+}
+
+TEST(AnnotationUtilTest, VerifyAnnotationDataFunctions) {
+  Annotation annotation;
+
+  AnnotationData true_annotation_data;
+  Property true_property;
+  true_property.bool_values.push_back(true);
+  true_annotation_data.properties.push_back(true_property);
+  int index_key1 =
+      AddAnnotationDataProperty("key1", true_annotation_data, &annotation);
+
+  AnnotationData false_annotation_data;
+  Property false_property;
+  false_property.bool_values.push_back(false);
+  true_annotation_data.properties.push_back(false_property);
+  int index_key2 =
+      AddAnnotationDataProperty("key2", false_annotation_data, &annotation);
+
+  EXPECT_EQ(index_key1, GetPropertyIndex("key1", annotation));
+  EXPECT_EQ(index_key2, GetPropertyIndex("key2", annotation));
+  EXPECT_EQ(-1, GetPropertyIndex("invalid_key", annotation));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/grammar/dates/cfg-datetime-annotator.cc b/native/annotator/grammar/dates/cfg-datetime-annotator.cc
index 255db81..99d3be0 100644
--- a/native/annotator/grammar/dates/cfg-datetime-annotator.cc
+++ b/native/annotator/grammar/dates/cfg-datetime-annotator.cc

@@ -23,195 +23,9 @@
 #include "utils/tokenizer.h"
 #include "utils/utf8/unicodetext.h"
 
-static const int kAM = 0;
-static const int kPM = 1;
-
 namespace libtextclassifier3::dates {
 namespace {
 
-// Datetime annotation are stored as the repeated field of ints & the order is
-// preserved as follow -year[0], -month[1], -day_of_month[2], -day[3], -week[4],
-// -hour[5], -minute[6], -second[7] and day_of_week[8];
-static const std::map<const DatetimeComponent::ComponentType, int>&
-    kTypeToDatetimeIndex =
-        *new std::map<const DatetimeComponent::ComponentType, int>{
-            {DatetimeComponent::ComponentType::YEAR, 0},
-            {DatetimeComponent::ComponentType::MONTH, 1},
-            {DatetimeComponent::ComponentType::DAY_OF_MONTH, 2},
-            {DatetimeComponent::ComponentType::HOUR, 3},
-            {DatetimeComponent::ComponentType::MINUTE, 4},
-            {DatetimeComponent::ComponentType::SECOND, 5},
-            {DatetimeComponent::ComponentType::DAY_OF_WEEK, 7},
-        };
-
-// Datetime annotation are stored as the repeated field of ints & the order is
-// preserved as follow -is_future[0], -year[1], -month[2], -day[3], -week[4],
-// -hour[5], -minute[6], -second[7] and day_of_week[8];
-// After first nine fields mentioned above there are more fields related to day
-// of week which are used to interprete day of week. Those could be  zero or
-// multiple values see RelativeEnum for details.
-static const std::map<const DatetimeComponent::ComponentType, int>&
-    kTypeToRelativeIndex =
-        *new std::map<const DatetimeComponent::ComponentType, int>{
-            {DatetimeComponent::ComponentType::YEAR, 1},
-            {DatetimeComponent::ComponentType::MONTH, 2},
-            {DatetimeComponent::ComponentType::DAY_OF_MONTH, 3},
-            {DatetimeComponent::ComponentType::WEEK, 4},
-            {DatetimeComponent::ComponentType::HOUR, 5},
-            {DatetimeComponent::ComponentType::MINUTE, 6},
-            {DatetimeComponent::ComponentType::SECOND, 7},
-            {DatetimeComponent::ComponentType::DAY_OF_WEEK, 8},
-        };
-
-// kDateTimeSupplementary contains uncommon field like timespan, timezone. It's
-// integer array and the format is (bc_ad, timespan_code, timezone_code,
-// timezone_offset). Al four fields must be provided. If the field is not
-// extracted, the value is -1 in the array.
-static const std::map<const DatetimeComponent::ComponentType, int>&
-    kDateTimeSupplementaryindex =
-        *new std::map<const DatetimeComponent::ComponentType, int>{
-            {DatetimeComponent::ComponentType::MERIDIEM, 1},
-        };
-
-static DatetimeComponent::RelativeQualifier GetRelativeQualifier(
-    const Property& property) {
-  //|| property.int_values(0) < 0
-  if (property.name != kDateTimeRelative) {
-    return DatetimeComponent::RelativeQualifier::UNSPECIFIED;
-  }
-  // Special case: there are certain scenarios in which the relative qualifier
-  // is hard to determine e.g. given “Wednesday 4:00 am” it is hard to
-  // determine if the event is in the past or future hence it is okay to mark
-  // the relative qualifier as unspecified.
-  // Given “unspecified” relative qualifier, now it is not possible to say time
-  // in milliseconds which is unfortunately not optional so make it work with
-  // the existing api the solution is that whenever the relative-ness of the
-  // text is unspecified. The code will override it with “future”.
-  if (property.int_values[0] == -1) {
-    return DatetimeComponent::RelativeQualifier::FUTURE;
-  }
-  return property.int_values[0] > 0
-             ? DatetimeComponent::RelativeQualifier::FUTURE
-             : DatetimeComponent::RelativeQualifier::PAST;
-}
-
-static int GetRelativeCount(const Property& property) {
-  // Relative count fields are stored from index 9 and onword.
-  for (int i = 9; i < property.int_values.size(); i++) {
-    switch (property.int_values[i]) {
-      case RelativeParameter_::Interpretation_NEAREST_LAST:
-        return -1;
-      case RelativeParameter_::Interpretation_SECOND_LAST:
-        return -2;
-      case RelativeParameter_::Interpretation_SECOND_NEXT:
-        return 2;
-      case RelativeParameter_::Interpretation_PREVIOUS:
-        return -1;
-      case RelativeParameter_::Interpretation_COMING:
-      case RelativeParameter_::Interpretation_SOME:
-      case RelativeParameter_::Interpretation_NEAREST:
-      case RelativeParameter_::Interpretation_NEAREST_NEXT:
-        return 1;
-      case RelativeParameter_::Interpretation_CURRENT:
-        return 0;
-    }
-  }
-  return 0;
-}
-
-// Resolve the  year’s ambiguity.
-// If the year in the date has 4 digits i.e. DD/MM/YYYY then there is no
-// ambiguity, the year value is YYYY but certain format i.e. MM/DD/YY is
-// ambiguous e.g. in {April/23/15} year value can be 15 or 1915 or 2015.
-// Following heuristic is used to resolve the ambiguity.
-// - For YYYY there is nothing to resolve.
-// - For all YY years
-//    - Value less than 50 will be resolved to 20YY
-//    - Value greater or equal 50 will be resolved to 19YY
-static int InterpretYear(int parsed_year) {
-  if (parsed_year < 100) {
-    if (parsed_year < 50) {
-      return parsed_year + 2000;
-    }
-    return parsed_year + 1900;
-  }
-  return parsed_year;
-}
-
-static void FillAbsoluteDatetimeComponent(
-    const Property& property, DatetimeParsedData* datetime_parsed_data) {
-  for (auto const& entry : kTypeToDatetimeIndex) {
-    if (property.int_values[entry.second] > -1) {
-      int absolute_value = property.int_values[entry.second];
-      if (entry.first == DatetimeComponent::ComponentType::YEAR) {
-        absolute_value = InterpretYear(absolute_value);
-      }
-      datetime_parsed_data->SetAbsoluteValue(entry.first, absolute_value);
-    }
-  }
-}
-
-static void FillRelativeDatetimeComponent(
-    const Property& property, DatetimeParsedData* datetime_parsed_data) {
-  for (auto const& entry : kTypeToRelativeIndex) {
-    int relative_value = property.int_values[entry.second];
-    if (relative_value > -1) {
-      if (property.int_values.size() > 9) {
-        datetime_parsed_data->SetRelativeCount(entry.first,
-                                               GetRelativeCount(property));
-      } else {
-        datetime_parsed_data->SetRelativeCount(entry.first, relative_value);
-      }
-      datetime_parsed_data->SetRelativeValue(entry.first,
-                                             GetRelativeQualifier(property));
-    }
-  }
-}
-
-static void FillSupplementaryDatetimeComponent(
-    const Property& property, DatetimeParsedData* datetime_parsed_data) {
-  for (auto const& entry : kDateTimeSupplementaryindex) {
-    switch (property.int_values[entry.second]) {
-      case TimespanCode_NOON:
-        // NOON [2] -> PM
-        datetime_parsed_data->SetAbsoluteValue(entry.first, kPM);
-        break;
-      case TimespanCode_MIDNIGHT:
-        // MIDNIGHT [3] -> AM
-        datetime_parsed_data->SetAbsoluteValue(entry.first, kAM);
-        break;
-      case TimespanCode_TONIGHT:
-        // TONIGHT [11] -> PM
-        datetime_parsed_data->SetAbsoluteValue(entry.first, kPM);
-        break;
-      case TimespanCode_AM:
-      case TimespanCode_PM:
-        datetime_parsed_data->SetAbsoluteValue(
-            entry.first, property.int_values[entry.second]);
-        break;
-      case TimespanCode_TIMESPAN_CODE_NONE:
-        break;
-      default:
-        TC3_LOG(WARNING) << "Failed to extract time span code.";
-    }
-  }
-}
-
-static void FillDatetimeParsedData(const Property& property,
-                                   DatetimeParsedData* datetime_parsed_data) {
-  // Absolute Datetime.
-  if (property.name == kDateTime) {
-    FillAbsoluteDatetimeComponent(property, datetime_parsed_data);
-  }
-  // Relative Datetime.
-  if (property.name == kDateTimeRelative) {
-    FillRelativeDatetimeComponent(property, datetime_parsed_data);
-  }
-  if (property.name == kDateTimeSupplementary) {
-    FillSupplementaryDatetimeComponent(property, datetime_parsed_data);
-  }
-}
-
 static std::string GetReferenceLocale(const std::string& locales) {
   std::vector<StringPiece> split_locales = strings::Split(locales, ',');
   if (!split_locales.empty()) {
@@ -241,103 +55,17 @@
 }  // namespace
 
 CfgDatetimeAnnotator::CfgDatetimeAnnotator(
-    const UniLib& unilib, const GrammarTokenizerOptions* tokenizer_options,
-    const CalendarLib& calendar_lib, const DatetimeRules* datetime_rules,
+    const UniLib* unilib, const GrammarTokenizerOptions* tokenizer_options,
+    const CalendarLib* calendar_lib, const DatetimeRules* datetime_rules,
     const float annotator_target_classification_score,
     const float annotator_priority_score)
-    : calendar_lib_(calendar_lib),
-      tokenizer_(BuildTokenizer(&unilib, tokenizer_options)),
+    : calendar_lib_(*calendar_lib),
+      tokenizer_(BuildTokenizer(unilib, tokenizer_options)),
       parser_(unilib, datetime_rules),
       annotator_target_classification_score_(
           annotator_target_classification_score),
       annotator_priority_score_(annotator_priority_score) {}
 
-// Helper method to convert the Thing into DatetimeParseResult.
-// Thing constains the annotation instance i.e. type of the annotation and its
-// properties/values
-void CfgDatetimeAnnotator::FillDatetimeParseResults(
-    const AnnotationData& annotation_data, const DateAnnotationOptions& options,
-    std::vector<DatetimeParseResult>* results) const {
-  DatetimeParsedData datetime_parsed_data;
-  for (const auto& property : annotation_data.properties) {
-    // Property can contain further AnnotationData which indicate that input
-    // text contains multiple datetime instances & co-exist with each other
-    // e.g. 11 June 2019 to 15 June 2019 two dates but connected to each other
-    //      4-6 April contains 3 dates 4 April, 5 April, 6 April.
-    if (!property.annotation_data_values.empty()) {
-      for (const auto& nested_annotation_data :
-           property.annotation_data_values) {
-        FillDatetimeParseResults(nested_annotation_data, options, results);
-      }
-    } else {
-      FillDatetimeParsedData(property, &datetime_parsed_data);
-    }
-  }
-  // If we found any annotation for AnnotationData add it to the result.
-  if (!datetime_parsed_data.IsEmpty()) {
-    std::vector<DatetimeParsedData> interpretations;
-    if (options.generate_alternative_interpretations_when_ambiguous) {
-      FillInterpretations(datetime_parsed_data,
-                          calendar_lib_.GetGranularity(datetime_parsed_data),
-                          &interpretations);
-    } else {
-      interpretations.emplace_back(datetime_parsed_data);
-    }
-    for (const DatetimeParsedData& interpretation : interpretations) {
-      DatetimeParseResult datetime_parse_result;
-      interpretation.GetDatetimeComponents(
-          &datetime_parse_result.datetime_components);
-      InterpretParseData(interpretation, options, calendar_lib_,
-                         &(datetime_parse_result.time_ms_utc),
-                         &(datetime_parse_result.granularity));
-      std::sort(datetime_parse_result.datetime_components.begin(),
-                datetime_parse_result.datetime_components.end(),
-                [](DatetimeComponent a, DatetimeComponent b) {
-                  return a.component_type > b.component_type;
-                });
-      results->emplace_back(datetime_parse_result);
-    }
-  }
-}
-
-// Helper methods to convert the Annotation proto to collection of
-// DatetimeParseResultSpan.
-// DateTime Extractor extract the list of annotation from the grammar rules,
-// where each annotation is a datetime span in the input string. The method will
-// convert each annotation into DatetimeParseResultSpan.
-void CfgDatetimeAnnotator::FillDatetimeParseResultSpan(
-    const UnicodeText& unicode_text,
-    const std::vector<Annotation>& annotation_list,
-    const DateAnnotationOptions& options,
-    std::vector<DatetimeParseResultSpan>* results) const {
-  for (const Annotation& annotation : annotation_list) {
-    DatetimeParseResultSpan datetime_parse_result_span;
-    datetime_parse_result_span.span =
-        CodepointSpan{annotation.begin, annotation.end};
-    datetime_parse_result_span.target_classification_score =
-        annotator_target_classification_score_;
-    // CFG grammar has a confidence score for each extracted annotation which
-    // is an indication of how certain is the system about extracted annotation
-    // e.g.  given input: "22.33" the grammar may extract "hour.minute"  but
-    // because there is no other time component and can also be just a floating
-    // point number the confidence of this match should be less as compare to
-    // "22.33.23 GMT"
-    if (options.use_rule_priority_score) {
-      datetime_parse_result_span.priority_score =
-          annotation.annotator_priority_score;
-    } else {
-      datetime_parse_result_span.priority_score = annotator_priority_score_;
-    }
-
-    std::vector<DatetimeParseResult> datetime_parse_results;
-    FillDatetimeParseResults(annotation.data, options, &datetime_parse_results);
-    for (auto& datetime_parse_result : datetime_parse_results) {
-      datetime_parse_result_span.data.push_back(datetime_parse_result);
-    }
-    results->emplace_back(datetime_parse_result_span);
-  }
-}
-
 void CfgDatetimeAnnotator::Parse(
     const std::string& input, const DateAnnotationOptions& annotation_options,
     const std::vector<Locale>& locales,
@@ -346,15 +74,66 @@
         locales, results);
 }
 
+void CfgDatetimeAnnotator::ProcessDatetimeParseResult(
+    const DateAnnotationOptions& annotation_options,
+    const DatetimeParseResult& datetime_parse_result,
+    std::vector<DatetimeParseResult>* results) const {
+  DatetimeParsedData datetime_parsed_data;
+  datetime_parsed_data.AddDatetimeComponents(
+      datetime_parse_result.datetime_components);
+
+  std::vector<DatetimeParsedData> interpretations;
+  if (annotation_options.generate_alternative_interpretations_when_ambiguous) {
+    FillInterpretations(datetime_parsed_data,
+                        calendar_lib_.GetGranularity(datetime_parsed_data),
+                        &interpretations);
+  } else {
+    interpretations.emplace_back(datetime_parsed_data);
+  }
+  for (const DatetimeParsedData& interpretation : interpretations) {
+    results->emplace_back();
+    interpretation.GetDatetimeComponents(&results->back().datetime_components);
+    InterpretParseData(interpretation, annotation_options, calendar_lib_,
+                       &(results->back().time_ms_utc),
+                       &(results->back().granularity));
+    std::sort(results->back().datetime_components.begin(),
+              results->back().datetime_components.end(),
+              [](const DatetimeComponent& a, const DatetimeComponent& b) {
+                return a.component_type > b.component_type;
+              });
+  }
+}
+
 void CfgDatetimeAnnotator::Parse(
     const UnicodeText& input, const DateAnnotationOptions& annotation_options,
     const std::vector<Locale>& locales,
     std::vector<DatetimeParseResultSpan>* results) const {
-  FillDatetimeParseResultSpan(
-      input,
+  std::vector<DatetimeParseResultSpan> grammar_datetime_parse_result_spans =
       parser_.Parse(input.data(), tokenizer_.Tokenize(input), locales,
-                    annotation_options),
-      annotation_options, results);
+                    annotation_options);
+
+  for (const DatetimeParseResultSpan& grammar_datetime_parse_result_span :
+       grammar_datetime_parse_result_spans) {
+    DatetimeParseResultSpan datetime_parse_result_span;
+    datetime_parse_result_span.span.first =
+        grammar_datetime_parse_result_span.span.first;
+    datetime_parse_result_span.span.second =
+        grammar_datetime_parse_result_span.span.second;
+    datetime_parse_result_span.priority_score = annotator_priority_score_;
+    if (annotation_options.use_rule_priority_score) {
+      datetime_parse_result_span.priority_score =
+          grammar_datetime_parse_result_span.priority_score;
+    }
+    datetime_parse_result_span.target_classification_score =
+        annotator_target_classification_score_;
+    for (const DatetimeParseResult& grammar_datetime_parse_result :
+         grammar_datetime_parse_result_span.data) {
+      ProcessDatetimeParseResult(annotation_options,
+                                 grammar_datetime_parse_result,
+                                 &datetime_parse_result_span.data);
+    }
+    results->emplace_back(datetime_parse_result_span);
+  }
 }
 
 }  // namespace libtextclassifier3::dates

diff --git a/native/annotator/grammar/dates/cfg-datetime-annotator.h b/native/annotator/grammar/dates/cfg-datetime-annotator.h
index 3089cfc..73c9b7b 100644
--- a/native/annotator/grammar/dates/cfg-datetime-annotator.h
+++ b/native/annotator/grammar/dates/cfg-datetime-annotator.h

@@ -33,12 +33,11 @@
 // (List of annotation generated from Grammar rules) to DatetimeParseResultSpan.
 class CfgDatetimeAnnotator {
  public:
-  CfgDatetimeAnnotator(const UniLib& unilib,
-                       const GrammarTokenizerOptions* tokenizer_options,
-                       const CalendarLib& calendar_lib,
-                       const DatetimeRules* datetime_rules,
-                       const float annotator_target_classification_score,
-                       const float annotator_priority_score);
+  explicit CfgDatetimeAnnotator(
+      const UniLib* unilib, const GrammarTokenizerOptions* tokenizer_options,
+      const CalendarLib* calendar_lib, const DatetimeRules* datetime_rules,
+      const float annotator_target_classification_score,
+      const float annotator_priority_score);
 
   // CfgDatetimeAnnotator is neither copyable nor movable.
   CfgDatetimeAnnotator(const CfgDatetimeAnnotator&) = delete;
@@ -59,17 +58,11 @@
              std::vector<DatetimeParseResultSpan>* results) const;
 
  private:
-  void FillDatetimeParseResults(
-      const AnnotationData& annotation_data,
-      const DateAnnotationOptions& options,
+  void ProcessDatetimeParseResult(
+      const DateAnnotationOptions& annotation_options,
+      const DatetimeParseResult& datetime_parse_result,
       std::vector<DatetimeParseResult>* results) const;
 
-  void FillDatetimeParseResultSpan(
-      const UnicodeText& unicode_text,
-      const std::vector<Annotation>& annotation_list,
-      const DateAnnotationOptions& options,
-      std::vector<DatetimeParseResultSpan>* results) const;
-
   const CalendarLib& calendar_lib_;
   const Tokenizer tokenizer_;
   DateParser parser_;

diff --git a/native/annotator/grammar/dates/parser.cc b/native/annotator/grammar/dates/parser.cc
index 950b636..37e65fc 100644
--- a/native/annotator/grammar/dates/parser.cc
+++ b/native/annotator/grammar/dates/parser.cc

@@ -234,7 +234,7 @@
 
 // Copies the field from one DateMatch to another whose field is null. for
 // example: if the from is "May 1, 8pm", and the to is "9pm", "May 1" will be
-// copied to "to". Now we only copy fields for date range requirement.
+// copied to "to". Now we only copy fields for date range requirement.fv
 void CopyFieldsForDateMatch(const DateMatch& from, DateMatch* to) {
   if (from.time_span_match != nullptr && to->time_span_match == nullptr) {
     to->time_span_match = from.time_span_match;
@@ -511,7 +511,7 @@
 // Checks if two date matches are adjacent and mergeable.
 bool AreDateMatchesAdjacentAndMergeable(
     const UniLib& unilib, const std::vector<UnicodeText::const_iterator>& text,
-    const std::vector<std::string>& ignored_tokens, const DateMatch& prev,
+    const std::vector<std::string>& ignored_spans, const DateMatch& prev,
     const DateMatch& next) {
   // Check the context between the two matches.
   if (next.begin <= prev.end) {
@@ -531,8 +531,8 @@
   }
   const std::string span_text = span.ToUTF8String();
   bool matched = false;
-  for (const std::string& ignored_token : ignored_tokens) {
-    if (span_text == ignored_token) {
+  for (const std::string& ignored_span : ignored_spans) {
+    if (span_text == ignored_span) {
       matched = true;
       break;
     }
@@ -548,7 +548,7 @@
 // be merged
 void MergeDateRangeAndDate(const UniLib& unilib,
                            const std::vector<UnicodeText::const_iterator>& text,
-                           const std::vector<std::string>& ignored_tokens,
+                           const std::vector<std::string>& ignored_spans,
                            const std::vector<DateMatch>& dates,
                            std::vector<DateRangeMatch>* date_ranges) {
   // For each range, check the date before or after the it to see if they could
@@ -569,7 +569,7 @@
       // be merged with the date.
       if (date_range->end <= date.begin) {
         DateMatch merged_date = date;
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_tokens,
+        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_spans,
                                                date_range->to, date)) {
           MergeDateMatch(date_range->to, &merged_date, /*update_span=*/true);
           date_range->to = merged_date;
@@ -584,7 +584,7 @@
           if (next_date < dates.size()) {
             DateMatch next_match = dates[next_date];
             if (AreDateMatchesAdjacentAndMergeable(
-                    unilib, text, ignored_tokens, date_range->to, next_match)) {
+                    unilib, text, ignored_spans, date_range->to, next_match)) {
               MergeDateMatch(date_range->to, &next_match, /*update_span=*/true);
               date_range->to = next_match;
               date_range->end = date_range->to.end;
@@ -612,7 +612,7 @@
           merged_date.end = date.year_match->match_offset;
         }
         // Check and merge the range and the date before the range.
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_tokens,
+        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_spans,
                                                merged_date, date_range->from)) {
           MergeDateMatch(merged_date, &date_range->from, /*update_span=*/true);
           date_range->begin = date_range->from.begin;
@@ -623,7 +623,7 @@
             DateMatch prev_match = dates[next_date - 1];
             if (prev_match.end <= date_range->from.begin) {
               if (AreDateMatchesAdjacentAndMergeable(unilib, text,
-                                                     ignored_tokens, prev_match,
+                                                     ignored_spans, prev_match,
                                                      date_range->from)) {
                 MergeDateMatch(prev_match, &date_range->from,
                                /*update_span=*/true);
@@ -674,177 +674,50 @@
 }
 
 // Converts candidate dates and date ranges.
-void FillDateInstances(const UniLib& unilib,
-                       const std::vector<UnicodeText::const_iterator>& text,
-                       const DateAnnotationOptions& options,
-                       std::vector<DateMatch>* date_matches,
-                       std::vector<Annotation>* datetimes) {
+void FillDateInstances(
+    const UniLib& unilib, const std::vector<UnicodeText::const_iterator>& text,
+    const DateAnnotationOptions& options, std::vector<DateMatch>* date_matches,
+    std::vector<DatetimeParseResultSpan>* datetime_parse_result_spans) {
   int i = 0;
   for (int j = 1; j < date_matches->size(); j++) {
     if (options.merge_adjacent_components &&
-        AreDateMatchesAdjacentAndMergeable(unilib, text, options.ignored_tokens,
+        AreDateMatchesAdjacentAndMergeable(unilib, text, options.ignored_spans,
                                            date_matches->at(i),
                                            date_matches->at(j))) {
       MergeDateMatch(date_matches->at(i), &date_matches->at(j), true);
     } else {
       if (!IsBlacklistedDate(unilib, text, date_matches->at(i))) {
-        Annotation annotation;
-        FillDateInstance(date_matches->at(i), &annotation);
-        datetimes->push_back(annotation);
+        DatetimeParseResultSpan datetime_parse_result_span;
+        FillDateInstance(date_matches->at(i), &datetime_parse_result_span);
+        datetime_parse_result_spans->push_back(datetime_parse_result_span);
       }
     }
     i = j;
   }
   if (!IsBlacklistedDate(unilib, text, date_matches->at(i))) {
-    Annotation annotation;
-    FillDateInstance(date_matches->at(i), &annotation);
-    datetimes->push_back(annotation);
+    DatetimeParseResultSpan datetime_parse_result_span;
+    FillDateInstance(date_matches->at(i), &datetime_parse_result_span);
+    datetime_parse_result_spans->push_back(datetime_parse_result_span);
   }
 }
 
 void FillDateRangeInstances(
     const std::vector<DateRangeMatch>& date_range_matches,
-    std::vector<Annotation>* datetimes) {
+    std::vector<DatetimeParseResultSpan>* datetime_parse_result_spans) {
   for (const DateRangeMatch& date_range_match : date_range_matches) {
-    Annotation annotation;
-    FillDateRangeInstance(date_range_match, &annotation);
-    datetimes->push_back(annotation);
+    DatetimeParseResultSpan datetime_parse_result_span;
+    FillDateRangeInstance(date_range_match, &datetime_parse_result_span);
+    datetime_parse_result_spans->push_back(datetime_parse_result_span);
   }
 }
 
-bool ExpandDateRange(const DateRangeMatch& range,
-                     std::vector<DateMatch>* date_series) {
-  if (!range.from.HasDay() || !range.to.HasDay()) {
-    return false;
-  }
-  DateMatch date = range.from;
-  for (; IsPrecedent(date, range.to); IncrementOneDay(&date)) {
-    date.end = range.to.end;
-    date_series->push_back(date);
-  }
-  date = range.to;
-  date.begin = range.from.begin;
-  date_series->push_back(date);
-  return true;
-}
-
-// Maximum number of days allowed for a date range to be expanded
-static constexpr int kMaximumExpansion = 30;
-
-// Check if the date range spans more than kMaximumExpansion of days.
-// Assumes both from and to field has day and month
-bool IsDateRangeTooLong(DateRangeMatch date_range_match) {
-  int number_of_days = 1;
-  for (int month = date_range_match.from.month;
-       month < date_range_match.to.month; ++month) {
-    number_of_days += GetLastDayOfMonth(0, month);
-  }
-  number_of_days += date_range_match.to.day - date_range_match.from.day;
-  return number_of_days > kMaximumExpansion;
-}
-
-// Expands a date range and merges it with a time.
-// e.g. April 4-6, 2:00pm will be expanded into April 4 at 2pm, April 5 at 2pm
-// and April 6 at 2:00pm
-//  - Only supports a range of days with a time
-//  - Does not expand a date range without time
-void ExpandDateRangeAndMergeWithTime(
-    const UniLib& unilib, const std::vector<UnicodeText::const_iterator>& text,
-    const std::vector<std::string>& ignored_tokens,
-    std::vector<DateMatch>* times, std::vector<DateRangeMatch>* date_ranges) {
-  auto next_time = times->begin();
-  auto next_range = date_ranges->begin();
-  while (next_range != date_ranges->end() && next_time != times->end()) {
-    const DateRangeMatch& range = *next_range;
-    if (range.from.HasHour() || !IsPrecedent(range.from, range.to)) {
-      ++next_range;
-      continue;
-    }
-
-    while (next_time != times->end()) {
-      const DateMatch& time = *next_time;
-      if (!time.IsStandaloneTime()) {
-        ++next_time;
-        continue;
-      }
-
-      // The range is before the time
-      if (range.end <= time.begin) {
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_tokens,
-                                               range.to, time) &&
-            !IsDateRangeTooLong(range)) {
-          std::vector<DateMatch> expanded_dates;
-          ExpandDateRange(range, &expanded_dates);
-
-          // Merge the expaneded date and with time
-          std::vector<DateMatch> merged_times;
-          for (const auto& expanded_date : expanded_dates) {
-            DateMatch merged_time = time;
-            MergeDateMatch(expanded_date, &merged_time, true);
-            merged_times.push_back(merged_time);
-          }
-          // Insert the expanded time before next_time and move next_time point
-          // to previous time.
-          next_time = times->insert(next_time, merged_times.begin(),
-                                    merged_times.end());
-          next_time += merged_times.size();
-
-          // Remove merged time. now next_time point to the time after the
-          // merged time.
-          next_time = times->erase(next_time);
-          // Remove merged range, now next_range point to the range after the
-          // merged range.
-          next_range = date_ranges->erase(next_range);
-        } else {
-          // range is behind time, check next range.
-          ++next_range;
-        }
-        break;
-      } else if (range.end > time.end && range.begin > time.begin) {
-        // The range is after the time
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_tokens,
-                                               time, range.from) &&
-            !IsDateRangeTooLong(range)) {
-          std::vector<DateMatch> expanded_dates;
-          ExpandDateRange(range, &expanded_dates);
-
-          // Merge the expaneded dates with time
-          for (auto& expanded_date : expanded_dates) {
-            MergeDateMatch(time, &expanded_date, true);
-          }
-          // Insert expanded time before next_time and move next_time point to
-          // previous time.
-          next_time = times->insert(next_time, expanded_dates.begin(),
-                                    expanded_dates.end());
-          next_time += expanded_dates.size();
-
-          // Remove merged time. Now next_time point to the time after the
-          // merged time.
-          next_time = times->erase(next_time);
-          // Remove merged range. Now next_range point to the range after the
-          // merged range.
-          next_range = date_ranges->erase(next_range);
-          break;
-        } else {
-          // Since the range is after the time, we need to check the next time
-          // first
-          ++next_time;
-        }
-      } else {
-        // Range fully overlaps with time In this case, we move to the next time
-        ++next_time;
-      }
-    }
-  }
-}
-
-// Fills `DateTimes` proto from matched `DateMatch` and `DateRangeMatch`
+// Fills `DatetimeParseResultSpan`  from `DateMatch` and `DateRangeMatch`
 // instances.
-std::vector<Annotation> GetOutputAsAnnotationList(
+std::vector<DatetimeParseResultSpan> GetOutputAsAnnotationList(
     const UniLib& unilib, const DateExtractor& extractor,
     const std::vector<UnicodeText::const_iterator>& text,
     const DateAnnotationOptions& options) {
-  std::vector<Annotation> date_annotations;
+  std::vector<DatetimeParseResultSpan> datetime_parse_result_spans;
   std::vector<DateMatch> date_matches =
       BuildDateMatches(text, extractor.output());
 
@@ -874,35 +747,23 @@
     }
 
     if (!date_matches.empty()) {
-      MergeDateRangeAndDate(unilib, text, options.ignored_tokens, date_matches,
+      MergeDateRangeAndDate(unilib, text, options.ignored_spans, date_matches,
                             &date_range_matches);
       RemoveOverlappedDateByRange(date_range_matches, &date_matches);
-
-      if (options.expand_date_series) {
-        ExpandDateRangeAndMergeWithTime(unilib, text, options.ignored_tokens,
-                                        &date_matches, &date_range_matches);
-      }
     }
-    FillDateRangeInstances(date_range_matches, &date_annotations);
+    FillDateRangeInstances(date_range_matches, &datetime_parse_result_spans);
   }
 
   if (!date_matches.empty()) {
-    FillDateInstances(unilib, text, options, &date_matches, &date_annotations);
-
-    int64 timestamp_ms = options.base_timestamp_millis;
-    if (timestamp_ms > 0) {
-      // The timestamp in options is milliseconds, the time_t is seconds from
-      // 00:00 Jan 1 1970 UTC.
-      time_t base_timestamp = timestamp_ms / 1000;
-      NormalizeDateTimes(base_timestamp, &date_annotations);
-    }
+    FillDateInstances(unilib, text, options, &date_matches,
+                      &datetime_parse_result_spans);
   }
-  return date_annotations;
+  return datetime_parse_result_spans;
 }
 
 }  // namespace
 
-std::vector<Annotation> DateParser::Parse(
+std::vector<DatetimeParseResultSpan> DateParser::Parse(
     StringPiece text, const std::vector<Token>& tokens,
     const std::vector<Locale>& locales,
     const DateAnnotationOptions& options) const {
@@ -923,9 +784,9 @@
   if (locale_rules.empty()) {
     return {};
   }
-  grammar::Matcher matcher(unilib_, datetime_rules_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, datetime_rules_->rules(), locale_rules,
                            &extractor);
-  lexer_.Process(tokens, /*matches=*/{}, &matcher);
+  lexer_.Process(text_unicode, tokens, /*annotations=*/nullptr, &matcher);
   return GetOutputAsAnnotationList(unilib_, extractor, codepoint_offsets,
                                    options);
 }

diff --git a/native/annotator/grammar/dates/parser.h b/native/annotator/grammar/dates/parser.h
index 28f89c3..be919df 100644
--- a/native/annotator/grammar/dates/parser.h
+++ b/native/annotator/grammar/dates/parser.h

@@ -35,18 +35,18 @@
 // constructs, validates, deduplicates and normalizes date time annotations.
 class DateParser {
  public:
-  DateParser(const UniLib& unilib, const DatetimeRules* datetime_rules)
-      : unilib_(unilib),
-        lexer_(unilib),
+  explicit DateParser(const UniLib* unilib, const DatetimeRules* datetime_rules)
+      : unilib_(*unilib),
+        lexer_(unilib, datetime_rules->rules()),
         datetime_rules_(datetime_rules),
         rules_locales_(ParseRulesLocales(datetime_rules->rules())) {}
 
   // Parses the dates in the input. Makes sure that the results do not
   // overlap.
-  std::vector<Annotation> Parse(StringPiece text,
-                                const std::vector<Token>& tokens,
-                                const std::vector<Locale>& locales,
-                                const DateAnnotationOptions& options) const;
+  std::vector<DatetimeParseResultSpan> Parse(
+      StringPiece text, const std::vector<Token>& tokens,
+      const std::vector<Locale>& locales,
+      const DateAnnotationOptions& options) const;
 
  private:
   const UniLib& unilib_;

diff --git a/native/annotator/grammar/dates/timezone-code.fbs b/native/annotator/grammar/dates/timezone-code.fbs
index ae74982..ff615ee 100755
--- a/native/annotator/grammar/dates/timezone-code.fbs
+++ b/native/annotator/grammar/dates/timezone-code.fbs

@@ -17,9 +17,7 @@
 namespace libtextclassifier3.dates;
 enum TimezoneCode : int {
   TIMEZONE_CODE_NONE = -1,
-
   ETC_UNKNOWN = 0,
-
   PST8PDT = 1,
   // Delegate.
 

diff --git a/native/annotator/grammar/dates/utils/date-match.cc b/native/annotator/grammar/dates/utils/date-match.cc
index 227b320..d9fca52 100644
--- a/native/annotator/grammar/dates/utils/date-match.cc
+++ b/native/annotator/grammar/dates/utils/date-match.cc

@@ -19,11 +19,59 @@
 #include <algorithm>
 
 #include "annotator/grammar/dates/utils/date-utils.h"
+#include "annotator/types.h"
 #include "utils/strings/append.h"
 
+static const int kAM = 0;
+static const int kPM = 1;
+
 namespace libtextclassifier3 {
 namespace dates {
 
+namespace {
+static int GetMeridiemValue(const TimespanCode& timespan_code) {
+  switch (timespan_code) {
+    case TimespanCode_AM:
+    case TimespanCode_MIDNIGHT:
+      // MIDNIGHT [3] -> AM
+      return kAM;
+    case TimespanCode_TONIGHT:
+      // TONIGHT [11] -> PM
+    case TimespanCode_NOON:
+      // NOON [2] -> PM
+    case TimespanCode_PM:
+      return kPM;
+    case TimespanCode_TIMESPAN_CODE_NONE:
+    default:
+      TC3_LOG(WARNING) << "Failed to extract time span code.";
+  }
+  return NO_VAL;
+}
+
+static int GetRelativeCount(const RelativeParameter* relative_parameter) {
+  for (const int interpretation :
+       *relative_parameter->day_of_week_interpretation()) {
+    switch (interpretation) {
+      case RelativeParameter_::Interpretation_NEAREST_LAST:
+      case RelativeParameter_::Interpretation_PREVIOUS:
+        return -1;
+      case RelativeParameter_::Interpretation_SECOND_LAST:
+        return -2;
+      case RelativeParameter_::Interpretation_SECOND_NEXT:
+        return 2;
+      case RelativeParameter_::Interpretation_COMING:
+      case RelativeParameter_::Interpretation_SOME:
+      case RelativeParameter_::Interpretation_NEAREST:
+      case RelativeParameter_::Interpretation_NEAREST_NEXT:
+        return 1;
+      case RelativeParameter_::Interpretation_CURRENT:
+        return 0;
+    }
+  }
+  return 0;
+}
+}  // namespace
+
 using strings::JoinStrings;
 using strings::SStringAppendF;
 
@@ -165,6 +213,162 @@
   }
 }
 
+DatetimeComponent::RelativeQualifier DateMatch::GetRelativeQualifier() const {
+  if (HasRelativeDate()) {
+    if (relative_match->existing & RelativeMatch::HAS_IS_FUTURE) {
+      if (!relative_match->is_future_date) {
+        return DatetimeComponent::RelativeQualifier::PAST;
+      }
+    }
+    return DatetimeComponent::RelativeQualifier::FUTURE;
+  }
+  return DatetimeComponent::RelativeQualifier::UNSPECIFIED;
+}
+
+// Embed RelativeQualifier information of DatetimeComponent as a sign of
+// relative counter field of datetime component i.e. relative counter is
+// negative when relative qualifier RelativeQualifier::PAST.
+int GetAdjustedRelativeCounter(
+    const DatetimeComponent::RelativeQualifier& relative_qualifier,
+    const int relative_counter) {
+  if (DatetimeComponent::RelativeQualifier::PAST == relative_qualifier) {
+    return -relative_counter;
+  }
+  return relative_counter;
+}
+
+Optional<DatetimeComponent> CreateDatetimeComponent(
+    const DatetimeComponent::ComponentType& component_type,
+    const DatetimeComponent::RelativeQualifier& relative_qualifier,
+    const int absolute_value, const int relative_value) {
+  if (absolute_value == NO_VAL && relative_value == NO_VAL) {
+    return Optional<DatetimeComponent>();
+  }
+  return Optional<DatetimeComponent>(DatetimeComponent(
+      component_type,
+      (relative_value != NO_VAL)
+          ? relative_qualifier
+          : DatetimeComponent::RelativeQualifier::UNSPECIFIED,
+      (absolute_value != NO_VAL) ? absolute_value : 0,
+      (relative_value != NO_VAL)
+          ? GetAdjustedRelativeCounter(relative_qualifier, relative_value)
+          : 0));
+}
+
+Optional<DatetimeComponent> CreateDayOfWeekComponent(
+    const RelativeMatch* relative_match,
+    const DatetimeComponent::RelativeQualifier& relative_qualifier,
+    const DayOfWeek& absolute_day_of_week) {
+  DatetimeComponent::RelativeQualifier updated_relative_qualifier =
+      relative_qualifier;
+  int absolute_value = absolute_day_of_week;
+  int relative_value = NO_VAL;
+  if (relative_match) {
+    relative_value = relative_match->day_of_week;
+    if (relative_match->existing & RelativeMatch::HAS_DAY_OF_WEEK) {
+      if (relative_match->IsStandaloneRelativeDayOfWeek() &&
+          absolute_day_of_week == DayOfWeek_DOW_NONE) {
+        absolute_value = relative_match->day_of_week;
+      }
+      // Check if the relative date has day of week with week period.
+      if (relative_match->existing & RelativeMatch::HAS_WEEK) {
+        relative_value = 1;
+      } else {
+        const NonterminalValue* nonterminal =
+            relative_match->day_of_week_nonterminal;
+        TC3_CHECK(nonterminal != nullptr);
+        TC3_CHECK(nonterminal->relative_parameter());
+        const RelativeParameter* rp = nonterminal->relative_parameter();
+        if (rp->day_of_week_interpretation()) {
+          relative_value = GetRelativeCount(rp);
+          if (relative_value < 0) {
+            relative_value = abs(relative_value);
+            updated_relative_qualifier =
+                DatetimeComponent::RelativeQualifier::PAST;
+          } else if (relative_value > 0) {
+            updated_relative_qualifier =
+                DatetimeComponent::RelativeQualifier::FUTURE;
+          }
+        }
+      }
+    }
+  }
+  return CreateDatetimeComponent(DatetimeComponent::ComponentType::DAY_OF_WEEK,
+                                 updated_relative_qualifier, absolute_value,
+                                 relative_value);
+}
+
+// Resolve the  year’s ambiguity.
+// If the year in the date has 4 digits i.e. DD/MM/YYYY then there is no
+// ambiguity, the year value is YYYY but certain format i.e. MM/DD/YY is
+// ambiguous e.g. in {April/23/15} year value can be 15 or 1915 or 2015.
+// Following heuristic is used to resolve the ambiguity.
+// - For YYYY there is nothing to resolve.
+// - For all YY years
+//    - Value less than 50 will be resolved to 20YY
+//    - Value greater or equal 50 will be resolved to 19YY
+static int InterpretYear(int parsed_year) {
+  if (parsed_year == NO_VAL) {
+    return parsed_year;
+  }
+  if (parsed_year < 100) {
+    if (parsed_year < 50) {
+      return parsed_year + 2000;
+    }
+    return parsed_year + 1900;
+  }
+  return parsed_year;
+}
+
+Optional<DatetimeComponent> DateMatch::GetDatetimeComponent(
+    const DatetimeComponent::ComponentType& component_type) const {
+  switch (component_type) {
+    case DatetimeComponent::ComponentType::YEAR:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), InterpretYear(year),
+          (relative_match != nullptr) ? relative_match->year : NO_VAL);
+    case DatetimeComponent::ComponentType::MONTH:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), month,
+          (relative_match != nullptr) ? relative_match->month : NO_VAL);
+    case DatetimeComponent::ComponentType::DAY_OF_MONTH:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), day,
+          (relative_match != nullptr) ? relative_match->day : NO_VAL);
+    case DatetimeComponent::ComponentType::HOUR:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), hour,
+          (relative_match != nullptr) ? relative_match->hour : NO_VAL);
+    case DatetimeComponent::ComponentType::MINUTE:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), minute,
+          (relative_match != nullptr) ? relative_match->minute : NO_VAL);
+    case DatetimeComponent::ComponentType::SECOND:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), second,
+          (relative_match != nullptr) ? relative_match->second : NO_VAL);
+    case DatetimeComponent::ComponentType::DAY_OF_WEEK:
+      return CreateDayOfWeekComponent(relative_match, GetRelativeQualifier(),
+                                      day_of_week);
+    case DatetimeComponent::ComponentType::MERIDIEM:
+      return CreateDatetimeComponent(component_type, GetRelativeQualifier(),
+                                     GetMeridiemValue(time_span_code), NO_VAL);
+    case DatetimeComponent::ComponentType::ZONE_OFFSET:
+      if (HasTimeZoneOffset()) {
+        return Optional<DatetimeComponent>(DatetimeComponent(
+            component_type, DatetimeComponent::RelativeQualifier::UNSPECIFIED,
+            time_zone_offset, /*arg_relative_count=*/0));
+      }
+      return Optional<DatetimeComponent>();
+    case DatetimeComponent::ComponentType::WEEK:
+      return CreateDatetimeComponent(
+          component_type, GetRelativeQualifier(), NO_VAL,
+          HasRelativeDate() ? relative_match->week : NO_VAL);
+    default:
+      return Optional<DatetimeComponent>();
+  }
+}
+
 bool DateMatch::IsValid() const {
   if (!HasYear() && HasBcAd()) {
     return false;
@@ -194,6 +398,31 @@
   return (HasDateFields() || HasTimeFields() || HasRelativeDate());
 }
 
+void DateMatch::FillDatetimeComponents(
+    std::vector<DatetimeComponent>* datetime_component) const {
+  static const std::vector<DatetimeComponent::ComponentType>*
+      kDatetimeComponents = new std::vector<DatetimeComponent::ComponentType>{
+          DatetimeComponent::ComponentType::ZONE_OFFSET,
+          DatetimeComponent::ComponentType::MERIDIEM,
+          DatetimeComponent::ComponentType::SECOND,
+          DatetimeComponent::ComponentType::MINUTE,
+          DatetimeComponent::ComponentType::HOUR,
+          DatetimeComponent::ComponentType::DAY_OF_MONTH,
+          DatetimeComponent::ComponentType::DAY_OF_WEEK,
+          DatetimeComponent::ComponentType::WEEK,
+          DatetimeComponent::ComponentType::MONTH,
+          DatetimeComponent::ComponentType::YEAR};
+
+  for (const DatetimeComponent::ComponentType& component_type :
+       *kDatetimeComponents) {
+    Optional<DatetimeComponent> date_time =
+        GetDatetimeComponent(component_type);
+    if (date_time.has_value()) {
+      datetime_component->emplace_back(date_time.value());
+    }
+  }
+}
+
 std::string DateRangeMatch::DebugString() const {
   std::string res;
   // The method is only called for debugging purposes.

diff --git a/native/annotator/grammar/dates/utils/date-match.h b/native/annotator/grammar/dates/utils/date-match.h
index 472e3cf..285e9b3 100644
--- a/native/annotator/grammar/dates/utils/date-match.h
+++ b/native/annotator/grammar/dates/utils/date-match.h

@@ -498,6 +498,17 @@
   }
 
   bool IsValid() const;
+
+  // Overall relative qualifier of the DateMatch e.g. 2 year ago is 'PAST' and
+  // next week is 'FUTURE'.
+  DatetimeComponent::RelativeQualifier GetRelativeQualifier() const;
+
+  // Getter method to get the 'DatetimeComponent' of given 'ComponentType'.
+  Optional<DatetimeComponent> GetDatetimeComponent(
+      const DatetimeComponent::ComponentType& component_type) const;
+
+  void FillDatetimeComponents(
+      std::vector<DatetimeComponent>* datetime_component) const;
 };
 
 // Represent a matched date range which includes the from and to matched date.

diff --git a/native/annotator/grammar/dates/utils/date-match_test.cc b/native/annotator/grammar/dates/utils/date-match_test.cc
new file mode 100644
index 0000000..f10f32a
--- /dev/null
+++ b/native/annotator/grammar/dates/utils/date-match_test.cc

@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/grammar/dates/utils/date-match.h"
+
+#include <stdint.h>
+
+#include <string>
+
+#include "annotator/grammar/dates/dates_generated.h"
+#include "annotator/grammar/dates/timezone-code_generated.h"
+#include "annotator/grammar/dates/utils/date-utils.h"
+#include "utils/strings/append.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace dates {
+namespace {
+
+class DateMatchTest : public ::testing::Test {
+ protected:
+  enum {
+    X = NO_VAL,
+  };
+
+  static DayOfWeek DOW_X() { return DayOfWeek_DOW_NONE; }
+  static DayOfWeek SUN() { return DayOfWeek_SUNDAY; }
+
+  static BCAD BCAD_X() { return BCAD_BCAD_NONE; }
+  static BCAD BC() { return BCAD_BC; }
+
+  DateMatch& SetDate(DateMatch* date, int year, int8 month, int8 day,
+                     DayOfWeek day_of_week = DOW_X(), BCAD bc_ad = BCAD_X()) {
+    date->year = year;
+    date->month = month;
+    date->day = day;
+    date->day_of_week = day_of_week;
+    date->bc_ad = bc_ad;
+    return *date;
+  }
+
+  DateMatch& SetTimeValue(DateMatch* date, int8 hour, int8 minute = X,
+                          int8 second = X, double fraction_second = X) {
+    date->hour = hour;
+    date->minute = minute;
+    date->second = second;
+    date->fraction_second = fraction_second;
+    return *date;
+  }
+
+  DateMatch& SetTimeSpan(DateMatch* date, TimespanCode time_span_code) {
+    date->time_span_code = time_span_code;
+    return *date;
+  }
+
+  DateMatch& SetTimeZone(DateMatch* date, TimezoneCode time_zone_code,
+                         int16 time_zone_offset = INT16_MIN) {
+    date->time_zone_code = time_zone_code;
+    date->time_zone_offset = time_zone_offset;
+    return *date;
+  }
+
+  bool SameDate(const DateMatch& a, const DateMatch& b) {
+    return (a.day == b.day && a.month == b.month && a.year == b.year &&
+            a.day_of_week == b.day_of_week);
+  }
+
+  DateMatch& SetDayOfWeek(DateMatch* date, DayOfWeek dow) {
+    date->day_of_week = dow;
+    return *date;
+  }
+};
+
+TEST_F(DateMatchTest, BitFieldWidth) {
+  // For DateMatch::day_of_week (:8).
+  EXPECT_GE(DayOfWeek_MIN, INT8_MIN);
+  EXPECT_LE(DayOfWeek_MAX, INT8_MAX);
+
+  // For DateMatch::bc_ad (:8).
+  EXPECT_GE(BCAD_MIN, INT8_MIN);
+  EXPECT_LE(BCAD_MAX, INT8_MAX);
+
+  // For DateMatch::time_span_code (:16).
+  EXPECT_GE(TimespanCode_MIN, INT16_MIN);
+  EXPECT_LE(TimespanCode_MAX, INT16_MAX);
+}
+
+TEST_F(DateMatchTest, IsValid) {
+  // Valid: dates.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26, DOW_X(), BC());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Valid: times.
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, 59, 0.99);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, 59);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Valid: mixed.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26);
+    SetTimeValue(&d, 12, 30, 59, 0.99);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26);
+    SetTimeValue(&d, 12, 30, 59);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, X, SUN());
+    SetTimeValue(&d, 12, 30);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: dates.
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26, DOW_X(), BC());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, 26);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, X, SUN());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, X, SUN());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: times.
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, X, 59);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, X, X, 0.99);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, X, 0.99);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, X, 30);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: mixed.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, X);
+    SetTimeValue(&d, 12);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: empty.
+  {
+    DateMatch d;
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+}
+
+std::string DebugStrings(const std::vector<DateMatch>& instances) {
+  std::string res;
+  for (int i = 0; i < instances.size(); ++i) {
+    ::libtextclassifier3::strings::SStringAppendF(
+        &res, 0, "[%d] == %s\n", i, instances[i].DebugString().c_str());
+  }
+  return res;
+}
+
+TEST_F(DateMatchTest, IsRefinement) {
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, X);
+    DateMatch b;
+    SetDate(&b, 2014, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    DateMatch b;
+    SetDate(&b, 2014, 2, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    DateMatch b;
+    SetDate(&b, X, 2, 24);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, 0, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, 0, 0);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, 0, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    SetTimeSpan(&a, TimespanCode_AM);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    SetTimeZone(&a, TimezoneCode_PST8PDT);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    a.priority += 10;
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, X, 2, 24);
+    SetTimeValue(&b, 9, 0, X);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, X, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetTimeValue(&a, 9, 0, 0);
+    DateMatch b;
+    SetTimeValue(&b, 9, X, X);
+    SetTimeSpan(&b, TimespanCode_AM);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+}
+
+TEST_F(DateMatchTest, FillDateInstance_AnnotatorPriorityScore) {
+  DateMatch date_match;
+  SetDate(&date_match, 2014, 2, X);
+  date_match.annotator_priority_score = 0.5;
+  DatetimeParseResultSpan datetime_parse_result_span;
+  FillDateInstance(date_match, &datetime_parse_result_span);
+  EXPECT_FLOAT_EQ(datetime_parse_result_span.priority_score, 0.5)
+      << DebugStrings({date_match});
+}
+
+TEST_F(DateMatchTest, MergeDateMatch_AnnotatorPriorityScore) {
+  DateMatch a;
+  SetDate(&a, 2014, 2, 4);
+  a.annotator_priority_score = 0.5;
+
+  DateMatch b;
+  SetTimeValue(&b, 10, 45, 23);
+  b.annotator_priority_score = 1.0;
+
+  MergeDateMatch(b, &a, false);
+  EXPECT_FLOAT_EQ(a.annotator_priority_score, 1.0);
+}
+
+}  // namespace
+}  // namespace dates
+}  // namespace libtextclassifier3

diff --git a/native/annotator/grammar/dates/utils/date-utils.cc b/native/annotator/grammar/dates/utils/date-utils.cc
index 02f4873..ea8015d 100644
--- a/native/annotator/grammar/dates/utils/date-utils.cc
+++ b/native/annotator/grammar/dates/utils/date-utils.cc

@@ -22,6 +22,8 @@
 #include "annotator/grammar/dates/annotations/annotation-util.h"
 #include "annotator/grammar/dates/dates_generated.h"
 #include "annotator/grammar/dates/utils/annotation-keys.h"
+#include "annotator/grammar/dates/utils/date-match.h"
+#include "annotator/types.h"
 #include "utils/base/macros.h"
 
 namespace libtextclassifier3 {
@@ -244,517 +246,29 @@
   return false;
 }
 
-void IncrementOneDay(DateMatch* date) {
-  if (date->HasDayOfWeek()) {
-    IncrementDayOfWeek(&date->day_of_week);
-  }
-  if (date->HasYear() && date->HasMonth()) {
-    if (date->day < GetLastDayOfMonth(date->year, date->month)) {
-      date->day++;
-      return;
-    } else if (date->month < MONSPERYEAR) {
-      date->month++;
-      date->day = 1;
-      return;
-    } else {
-      date->year++;
-      date->month = 1;
-      date->day = 1;
-      return;
-    }
-  } else if (!date->HasYear() && date->HasMonth()) {
-    if (date->day < GetLastDayOfMonth(0, date->month)) {
-      date->day++;
-      return;
-    } else if (date->month < MONSPERYEAR) {
-      date->month++;
-      date->day = 1;
-      return;
-    }
-  } else {
-    date->day++;
-    return;
-  }
+void FillDateInstance(const DateMatch& date,
+                      DatetimeParseResultSpan* instance) {
+  instance->span.first = date.begin;
+  instance->span.second = date.end;
+  instance->priority_score = date.GetAnnotatorPriorityScore();
+  DatetimeParseResult datetime_parse_result;
+  date.FillDatetimeComponents(&datetime_parse_result.datetime_components);
+  instance->data.emplace_back(datetime_parse_result);
 }
 
-void FillDateInstance(const DateMatch& date, Annotation* instance) {
-  instance->begin = date.begin;
-  instance->end = date.end;
-  instance->annotator_priority_score = date.annotator_priority_score;
-  AnnotationData* thing = &instance->data;
-  thing->type = kDateTimeType;
+void FillDateRangeInstance(const DateRangeMatch& range,
+                           DatetimeParseResultSpan* instance) {
+  instance->span.first = range.begin;
+  instance->span.second = range.end;
+  instance->priority_score = range.GetAnnotatorPriorityScore();
 
-  // Add most common date time fields. Refer kDateTime to see the format.
-  auto has_value = [](int n) { return n >= 0; };
-  int sec_frac = -1;
-  if (date.HasFractionSecond()) {
-    sec_frac = static_cast<int>(date.fraction_second * 1000);
-  }
-  int datetime[] = {date.year,   date.month,  date.day, date.hour,
-                    date.minute, date.second, sec_frac, date.day_of_week};
-  if (std::any_of(datetime, datetime + TC3_ARRAYSIZE(datetime), has_value)) {
-    AddRepeatedIntProperty(kDateTime, datetime, TC3_ARRAYSIZE(datetime),
-                           instance);
-  }
+  // Filling from DatetimeParseResult.
+  instance->data.emplace_back();
+  range.from.FillDatetimeComponents(&instance->data.back().datetime_components);
 
-  // Refer comments of kDateTimeSupplementary to see the format.
-  int datetime_sup[] = {date.bc_ad, date.time_span_code, date.time_zone_code,
-                        date.time_zone_offset};
-  if (std::any_of(datetime_sup, datetime_sup + TC3_ARRAYSIZE(datetime_sup),
-                  has_value)) {
-    AddRepeatedIntProperty(kDateTimeSupplementary, datetime_sup,
-                           TC3_ARRAYSIZE(datetime_sup), instance);
-  }
-
-  if (date.HasRelativeDate()) {
-    const RelativeMatch* r_match = date.relative_match;
-    // Refer comments of kDateTimeRelative to see the format.
-    int is_future = -1;
-    if (r_match->existing & RelativeMatch::HAS_IS_FUTURE) {
-      is_future = r_match->is_future_date;
-    }
-    int rdate[] = {is_future,       r_match->year,   r_match->month,
-                   r_match->day,    r_match->week,   r_match->hour,
-                   r_match->minute, r_match->second, r_match->day_of_week};
-    int idx = AddRepeatedIntProperty(kDateTimeRelative, rdate,
-                                     TC3_ARRAYSIZE(rdate), instance);
-
-    if (r_match->existing & RelativeMatch::HAS_DAY_OF_WEEK) {
-      if (r_match->IsStandaloneRelativeDayOfWeek() &&
-          date.day_of_week == DayOfWeek_DOW_NONE) {
-        Property* prop = FindOrCreateDefaultDateTime(&instance->data);
-        prop->int_values[7] = r_match->day_of_week;
-      }
-      // Check if the relative date has day of week with week period.
-      // "Tuesday 6 weeks ago".
-      if (r_match->existing & RelativeMatch::HAS_WEEK) {
-        instance->data.properties[idx].int_values.push_back(
-            RelativeParameter_::Interpretation_SOME);
-      } else {
-        const NonterminalValue* nonterminal = r_match->day_of_week_nonterminal;
-        TC3_CHECK(nonterminal != nullptr);
-        TC3_CHECK(nonterminal->relative_parameter());
-        const RelativeParameter* rp = nonterminal->relative_parameter();
-        if (rp->day_of_week_interpretation()) {
-          for (const int interpretation : *rp->day_of_week_interpretation()) {
-            instance->data.properties[idx].int_values.push_back(interpretation);
-          }
-        }
-      }
-    }
-  }
-}
-
-void FillDateRangeInstance(const DateRangeMatch& range, Annotation* instance) {
-  instance->begin = range.begin;
-  instance->end = range.end;
-  instance->data.type = kDateTimeRangeType;
-
-  Annotation from_date;
-  FillDateInstance(range.from, &from_date);
-  AddAnnotationDataProperty(kDateTimeRangeFrom, from_date.data, instance);
-
-  Annotation to_date;
-  FillDateInstance(range.to, &to_date);
-  AddAnnotationDataProperty(kDateTimeRangeTo, to_date.data, instance);
-}
-
-namespace {
-int NormalizeField(int base, int zero, int* valp, int carry_in) {
-  int carry_out = 0;
-  int val = *valp;
-  if (zero != 0 && val < 0) {
-    val += base;
-    carry_out -= 1;
-  }
-  val -= zero;
-  carry_out += val / base;
-  int rem = val % base;
-  if (carry_in != 0) {
-    carry_out += carry_in / base;
-    rem += carry_in % base;
-    if (rem < 0) {
-      carry_out -= 1;
-      rem += base;
-    } else if (rem >= base) {
-      carry_out += 1;
-      rem -= base;
-    }
-  }
-  if (rem < 0) {
-    carry_out -= 1;
-    rem += base;
-  }
-  *valp = rem + zero;
-  return carry_out;
-}
-
-int DaysPerYear(int year) {
-  if (IsLeapYear(year)) {
-    return DAYSPERLYEAR;
-  }
-  return DAYSPERNYEAR;
-}
-
-const int8 kDaysPer100Years[401] = {
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-int DaysPer100Years(int eyear) { return 36524 + kDaysPer100Years[eyear]; }
-
-const int8 kDaysPer4Years[401] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-int DaysPer4Years(int eyear) { return 1460 + kDaysPer4Years[eyear]; }
-
-#define DAYORDYEARMAX (25252734927766553LL)
-#define DAYORDYEARMIN (-25252734927764584LL)
-
-// Normalize year, month, day, hour, minute and second to valid value. For
-// example:  1hour 15minute 62second is normalized as 1hour 16 minute 2second.
-bool NormalizeDateFields(int* year, int* month, int* day, int* hour,
-                         int* minute, int* second) {
-  int min_carry = NormalizeField(SECSPERMIN, 0, second, 0);
-  int hour_carry = NormalizeField(MINSPERHOUR, 0, minute, min_carry);
-  int day_carry = NormalizeField(HOURSPERDAY, 0, hour, hour_carry);
-  int year_carry = NormalizeField(MONSPERYEAR, 1, month, 0);
-  bool normalized = min_carry || hour_carry || day_carry || year_carry;
-
-  // Normalize the number of days within a 400-year (146097-day) period.
-  if (int c4_carry = NormalizeField(146097, 1, day, day_carry)) {
-    year_carry += c4_carry * 400;
-    normalized = true;
-  }
-
-  // Extract a [0:399] year calendrically equivalent to (year + year_carry)
-  // from that sum in order to simplify year/day normalization and to defer
-  // the possibility of int64 overflow until the final stage.
-  int eyear = *year % 400;
-  if (year_carry != 0) {
-    eyear += year_carry;
-    eyear %= 400;
-  }
-  if (eyear < 0) eyear += 400;
-  year_carry -= eyear;
-
-  int orig_day = *day;
-  if (*day > DAYSPERNYEAR) {
-    eyear += (*month > 2 ? 1 : 0);
-    if (*day > 146097 - DAYSPERNYEAR) {
-      // We often hit the 400th year when stepping a civil time backwards,
-      // so special case it to avoid counting up by 100/4/1 year chunks.
-      *day = DaysPerYear(eyear += 400 - 1) - (146097 - *day);
-    } else {
-      // Handle days in chunks of 100/4/1 years.
-      for (int ydays = DaysPer100Years(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPer100Years(eyear)) {
-        if ((eyear += 100) > 400) {
-          eyear -= 400;
-          year_carry += 400;
-        }
-      }
-      for (int ydays = DaysPer4Years(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPer4Years(eyear)) {
-        if ((eyear += 4) > 400) {
-          eyear -= 400;
-          year_carry += 400;
-        }
-      }
-      for (int ydays = DaysPerYear(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPerYear(eyear)) {
-        eyear += 1;
-      }
-    }
-    eyear -= (*month > 2 ? 1 : 0);
-  }
-  // Handle days within one year.
-  bool leap_year = IsLeapYear(eyear);
-  for (int mdays = kDaysPerMonth[leap_year][*month]; *day > mdays;
-       *day -= mdays, mdays = kDaysPerMonth[leap_year][*month]) {
-    if (++*month > MONSPERYEAR) {
-      *month = 1;
-      leap_year = IsLeapYear(++eyear);
-    }
-  }
-  if (*day != orig_day) normalized = true;
-
-  // Add the updated eyear back into (year + year_carry).
-  year_carry += eyear;
-  // Overflow.
-  if (*year > DAYORDYEARMAX - year_carry) {
-    return false;
-  } else if (*year < DAYORDYEARMIN - year_carry) {
-    return false;
-  }
-  *year += year_carry;
-  return true;
-}
-
-// Compute the day difference between the day of week in relative date and wday.
-// If the relative date is in future, return positive days. otherwise return the
-// negative future. For example:
-// if day of week in relative date is Mon this week and wday is Wed this week,
-// then return -2.
-// if day of week in relative date is Wed this week and wday is Mon this week,
-// then return 2.
-int32 RelativeDOWToDays(const Property& rd, const int wday) {
-  int days = -1;
-  int multiplier = 1;
-  for (int i = 9; i < rd.int_values.size(); ++i) {
-    int inter = rd.int_values.at(i);
-    int dow = rd.int_values.at(8) - 1;
-    int interval = 0;
-    int cur_multiplier = 1;
-    if (inter == RelativeParameter_::Interpretation_NEAREST_LAST ||
-        inter == RelativeParameter_::Interpretation_PREVIOUS) {
-      // Represent the DOW in the last week.
-      cur_multiplier = -1;
-      if (dow <= wday) {
-        interval = 7 + (wday - dow);
-      } else {
-        interval = 7 - (dow - wday);
-      }
-    } else if (inter == RelativeParameter_::Interpretation_SECOND_LAST) {
-      // Represent the DOW in the week before last week.
-      cur_multiplier = -1;
-      if (dow <= wday) {
-        interval = 14 + (wday - dow);
-      } else {
-        interval = 14 - (dow - wday);
-      }
-    } else if (inter == RelativeParameter_::Interpretation_NEAREST_NEXT ||
-               inter == RelativeParameter_::Interpretation_COMING) {
-      // Represent the DOW in the next week.
-      cur_multiplier = 1;
-      if (dow <= wday) {
-        interval = 7 - (wday - dow);
-      } else {
-        interval = 7 + (dow - wday);
-      }
-      // Represent the DOW in the week of next week.
-    } else if (inter == RelativeParameter_::Interpretation_SECOND_NEXT) {
-      cur_multiplier = 1;
-      if (dow <= wday) {
-        interval = 14 - (wday - dow);
-      } else {
-        interval = 14 + (dow - wday);
-      }
-      // Represent the DOW in the same week regardless of it's past of future.
-    } else if (inter == RelativeParameter_::Interpretation_CURRENT ||
-               inter == RelativeParameter_::Interpretation_NEAREST ||
-               inter == RelativeParameter_::Interpretation_SOME) {
-      interval = abs(wday - dow);
-      cur_multiplier = dow < wday ? -1 : 1;
-    }
-    if (days == -1 || interval < days) {
-      days = interval;
-      multiplier = cur_multiplier;
-    }
-  }
-  return days * multiplier;
-}
-
-// Compute the absolute date and time based on timestamp and relative date and
-// fill the fields year, month, day, hour, minute and second.
-bool RelativeDateToAbsoluteDate(struct tm ts, AnnotationData* date) {
-  int idx = GetPropertyIndex(kDateTimeRelative, *date);
-  if (idx < 0) {
-    return false;
-  }
-  Property* datetime = FindOrCreateDefaultDateTime(date);
-  Property* relative = &date->properties[idx];
-  int year = ts.tm_year + 1900;  // The year in struct tm is since 1900
-  int month = ts.tm_mon + 1;     // Convert to [1, 12]
-  int day = ts.tm_mday;
-  int hour = ts.tm_hour;
-  int minute = ts.tm_min;
-  int second = ts.tm_sec;
-  // If the instance has time, it doesn't make sense to update time based on
-  // relative time. so we simply clear the time in relative date.
-  // For example: 2 days 1 hours ago at 10:00am, the 1 hours will be removed.
-  if (datetime->int_values[3] > 0) {
-    relative->int_values[5] = -1;
-    relative->int_values[6] = -1;
-    relative->int_values[7] = -1;
-  }
-
-  // Get the relative year, month, day, hour, minute and second.
-  if (relative->int_values[8] > 0) {
-    day += RelativeDOWToDays(*relative, ts.tm_wday);
-  } else {
-    int multipler = (relative->int_values[0] > 0) ? 1 : -1;
-    if (relative->int_values[1] > 0) {
-      year += relative->int_values[1] * multipler;
-    }
-    if (relative->int_values[2] > 0) {
-      month += relative->int_values[2] * multipler;
-    }
-    if (relative->int_values[3] > 0) {
-      day += relative->int_values[3] * multipler;
-    }
-    if (relative->int_values[5] > 0) {
-      hour += relative->int_values[5] * multipler;
-    }
-    if (relative->int_values[6] > 0) {
-      minute += relative->int_values[6] * multipler;
-    }
-    if (relative->int_values[7] > 0) {
-      second += relative->int_values[7] * multipler;
-    }
-  }
-
-  if (!NormalizeDateFields(&year, &month, &day, &hour, &minute, &second)) {
-    TC3_VLOG(1) << "Can not normalize date " << year << "-" << month << "-"
-                << day << " " << hour << ":" << minute << ":" << second;
-    return false;
-  }
-
-  // Update year, month, day, hour, minute and second of date instance. We only
-  // update the time unit if the relative date has it. For example:
-  // if the relative date is "1 hour ago", then we don't set minite and second
-  // in data intance, but we set hour and the time unit which is larger than
-  // hour like day, month and year.
-  // if the relative date is "1 year ago", we only update year in date instance
-  // and ignore others.
-  bool set = false;
-  if (relative->int_values[7] >= 0) {
-    set = true;
-    datetime->int_values[5] = second;
-  }
-  if (set || relative->int_values[6] >= 0) {
-    set = true;
-    datetime->int_values[4] = minute;
-  }
-  if (set || relative->int_values[5] >= 0) {
-    set = true;
-    datetime->int_values[3] = hour;
-  }
-  if (set || relative->int_values[3] >= 0 || relative->int_values[8] >= 0) {
-    set = true;
-    datetime->int_values[2] = day;
-  }
-  if (set || relative->int_values[2] >= 0) {
-    set = true;
-    datetime->int_values[1] = month;
-  }
-  if (set || relative->int_values[1] >= 0) {
-    set = true;
-    datetime->int_values[0] = year;
-  }
-  return true;
-}
-
-// If the year is less than 100 and has no bc/ad, it should be normalized.
-static constexpr int kMinYearForNormalization = 100;
-
-// Normalize date instance.
-void NormalizeDateInstance(time_t timestamp, AnnotationData* inst) {
-  struct tm ts;
-  localtime_r(&timestamp, &ts);
-
-  int idx = GetPropertyIndex(kDateTime, *inst);
-  if (idx >= 0) {
-    Property* datetime = &inst->properties[idx];
-    int bc_ad = -1;
-    idx = GetPropertyIndex(kDateTimeSupplementary, *inst);
-    if (idx >= 0) {
-      bc_ad = inst->properties[idx].int_values[0];
-    }
-
-    int year = datetime->int_values[0];
-    if (bc_ad < 0 && year > 0 && year < kMinYearForNormalization) {
-      if (2000 + year <= ts.tm_year + 1900) {
-        datetime->int_values[0] = 2000 + year;
-      } else {
-        datetime->int_values[0] = 1900 + year;
-      }
-    }
-    // Day-of-week never only appear in date instance, it must be in both
-    // relative date and non-relative date. If the date instance already has day
-    // like "Monday, March 19", it doesn't make sense to convert the dow to
-    // absolute date again.
-    if (datetime->int_values[7] > 0 && datetime->int_values[2] > 0) {
-      return;
-    }
-  }
-  RelativeDateToAbsoluteDate(ts, inst);
-}
-
-// Convert normalized date instance to unix time.
-time_t DateInstanceToUnixTimeInternal(time_t timestamp,
-                                      const AnnotationData& inst) {
-  int idx = GetPropertyIndex(kDateTime, inst);
-  if (idx < 0) {
-    return -1;
-  }
-  const Property& prop = inst.properties[idx];
-
-  struct tm ts;
-  localtime_r(&timestamp, &ts);
-
-  if (prop.int_values[0] > 0) {
-    ts.tm_year = prop.int_values[0] - 1900;
-  }
-  if (prop.int_values[1] > 0) {
-    ts.tm_mon = prop.int_values[1] - 1;
-  }
-  if (prop.int_values[2] > 0) {
-    ts.tm_mday = prop.int_values[2];
-  }
-  if (prop.int_values[3] > 0) {
-    ts.tm_hour = prop.int_values[3];
-  }
-  if (prop.int_values[4] > 0) {
-    ts.tm_min = prop.int_values[4];
-  }
-  if (prop.int_values[5] > 0) {
-    ts.tm_sec = prop.int_values[5];
-  }
-  ts.tm_wday = -1;
-  ts.tm_yday = -1;
-  return mktime(&ts);
-}
-}  // namespace
-
-void NormalizeDateTimes(time_t timestamp, std::vector<Annotation>* dates) {
-  for (int i = 0; i < dates->size(); ++i) {
-    if ((*dates)[i].data.type == kDateTimeType) {
-      NormalizeDateInstance(timestamp, &(*dates)[i].data);
-    }
-  }
+  // Filling to DatetimeParseResult.
+  instance->data.emplace_back();
+  range.to.FillDatetimeComponents(&instance->data.back().datetime_components);
 }
 
 namespace {

diff --git a/native/annotator/grammar/dates/utils/date-utils.h b/native/annotator/grammar/dates/utils/date-utils.h
index de459ea..2fcda92 100644
--- a/native/annotator/grammar/dates/utils/date-utils.h
+++ b/native/annotator/grammar/dates/utils/date-utils.h

@@ -45,28 +45,23 @@
 bool IsRefinement(const DateMatch& a, const DateMatch& b);
 bool IsRefinement(const DateRangeMatch& a, const DateRangeMatch& b);
 
-// Increment the date by one day. Will consider special cases such as leap year.
-// However, the input date must be valid otherwise will return a valid date that
-// is not following the input date
-void IncrementOneDay(DateMatch* date);
-
 // Returns true iff "a" occurs strictly before "b"
 bool IsPrecedent(const DateMatch& a, const DateMatch& b);
 
-// Fill DateInstance proto based on DateMatch object which is created from
+// Fill DatetimeParseResult based on DateMatch object which is created from
 // matched rule. The matched string is extracted from tokenizer which provides
 // an interface to access the clean text based on the matched range.
-void FillDateInstance(const DateMatch& date, Annotation* instance);
+void FillDateInstance(const DateMatch& date, DatetimeParseResult* instance);
 
-// Fill DateRangeInstance proto based on DateRangeMatch object which is created
+// Fill DatetimeParseResultSpan based on DateMatch object which is created from
+// matched rule. The matched string is extracted from tokenizer which provides
+// an interface to access the clean text based on the matched range.
+void FillDateInstance(const DateMatch& date, DatetimeParseResultSpan* instance);
+
+// Fill DatetimeParseResultSpan based on DateRangeMatch object which i screated
 // from matched rule.
-void FillDateRangeInstance(const DateRangeMatch& range, Annotation* instance);
-
-// Normalize DateTimes based on timestamp.
-// Currently it does two things:
-//   -- Convert relative date to absolute date
-//   -- Normalize year if year is two digit
-void NormalizeDateTimes(time_t timestamp, std::vector<Annotation>* dates);
+void FillDateRangeInstance(const DateRangeMatch& range,
+                           DatetimeParseResultSpan* instance);
 
 // Merge the fields in DateMatch prev to next if there is no overlapped field.
 // If update_span is true, the span of next is also updated.
@@ -81,7 +76,6 @@
 // It's impossible to get DateMatch which only has DOW and not in relative
 // match according to current rules.
 bool IsDateMatchMergeable(const DateMatch& prev, const DateMatch& next);
-
 }  // namespace dates
 }  // namespace libtextclassifier3
 

diff --git a/native/annotator/grammar/grammar-annotator.cc b/native/annotator/grammar/grammar-annotator.cc
index 6d04b96..baa3fac 100644
--- a/native/annotator/grammar/grammar-annotator.cc
+++ b/native/annotator/grammar/grammar-annotator.cc

@@ -47,11 +47,11 @@
 
 class GrammarAnnotatorCallbackDelegate : public grammar::CallbackDelegate {
  public:
-  GrammarAnnotatorCallbackDelegate(
+  explicit GrammarAnnotatorCallbackDelegate(
       const UniLib* unilib, const GrammarModel* model,
       const ReflectiveFlatbufferBuilder* entity_data_builder,
       const ModeFlag mode)
-      : unilib_(unilib),
+      : unilib_(*unilib),
         model_(model),
         entity_data_builder_(entity_data_builder),
         mode_(mode) {}
@@ -64,24 +64,16 @@
         HandleRuleMatch(match, /*rule_id=*/value);
         return;
       }
-      case GrammarAnnotator::Callback::kCapturingMatch: {
-        HandleCapturingMatch(match, /*match_id=*/value, matcher);
-        return;
-      }
-      case GrammarAnnotator::Callback::kAssertionMatch: {
-        HandleAssertion(match, /*negative=*/(value != 0), matcher);
-        return;
-      }
       default:
-        TC3_LOG(ERROR) << "Unhandled match type: " << type;
+        grammar::CallbackDelegate::MatchFound(match, type, value, matcher);
     }
   }
 
   // Deduplicate and populate annotations from grammar matches.
   bool GetAnnotations(const std::vector<UnicodeText::const_iterator>& text,
                       std::vector<AnnotatedSpan>* annotations) const {
-    for (const grammar::RuleMatch& candidate :
-         grammar::DeduplicateMatches(candidates_)) {
+    for (const grammar::Derivation& candidate :
+         grammar::DeduplicateDerivations(candidates_)) {
       // Check that assertions are fulfilled.
       if (!grammar::VerifyAssertions(candidate.match)) {
         continue;
@@ -95,10 +87,10 @@
 
   bool GetTextSelection(const std::vector<UnicodeText::const_iterator>& text,
                         const CodepointSpan& selection, AnnotatedSpan* result) {
-    std::vector<grammar::RuleMatch> selection_candidates;
+    std::vector<grammar::Derivation> selection_candidates;
     // Deduplicate and verify matches.
     auto maybe_interpretation = GetBestValidInterpretation(
-        grammar::DeduplicateMatches(GetOverlappingRuleMatches(
+        grammar::DeduplicateDerivations(GetOverlappingRuleMatches(
             selection, candidates_, /*only_exact_overlap=*/false)));
     if (!maybe_interpretation.has_value()) {
       return false;
@@ -116,7 +108,7 @@
                          ClassificationResult* classification) const {
     // Deduplicate and verify matches.
     auto maybe_interpretation = GetBestValidInterpretation(
-        grammar::DeduplicateMatches(GetOverlappingRuleMatches(
+        grammar::DeduplicateDerivations(GetOverlappingRuleMatches(
             selection, candidates_, /*only_exact_overlap=*/true)));
     if (!maybe_interpretation.has_value()) {
       return false;
@@ -135,7 +127,7 @@
   void HandleRuleMatch(const grammar::Match* match, const int64 rule_id) {
     if ((model_->rule_classification_result()->Get(rule_id)->enabled_modes() &
          mode_) != 0) {
-      candidates_.push_back(grammar::RuleMatch{match, rule_id});
+      candidates_.push_back(grammar::Derivation{match, rule_id});
     }
   }
 
@@ -150,8 +142,13 @@
 
     // Set information from capturing matches.
     CodepointSpan span{kInvalidIndex, kInvalidIndex};
-    const std::unordered_map<uint16, const grammar::CapturingMatch*>
-        capturing_matches = GatherCapturingMatches(match);
+    // Gather active capturing matches.
+    std::unordered_map<uint16, const grammar::Match*> capturing_matches;
+    for (const grammar::MappingMatch* match :
+         grammar::SelectAllOfType<grammar::MappingMatch>(
+             match, grammar::Match::kMappingMatch)) {
+      capturing_matches[match->id] = match;
+    }
 
     // Compute span boundaries.
     for (int i = 0; i < classification->capturing_group()->size(); i++) {
@@ -175,12 +172,12 @@
   }
 
   // Filters out results that do not overlap with a reference span.
-  std::vector<grammar::RuleMatch> GetOverlappingRuleMatches(
+  std::vector<grammar::Derivation> GetOverlappingRuleMatches(
       const CodepointSpan& selection,
-      const std::vector<grammar::RuleMatch>& candidates,
+      const std::vector<grammar::Derivation>& candidates,
       const bool only_exact_overlap) const {
-    std::vector<grammar::RuleMatch> result;
-    for (const grammar::RuleMatch& candidate : candidates) {
+    std::vector<grammar::Derivation> result;
+    for (const grammar::Derivation& candidate : candidates) {
       // Discard matches that do not match the selection.
       // Simple check.
       if (!SpansOverlap(selection, candidate.match->codepoint_span)) {
@@ -205,11 +202,11 @@
   Optional<std::pair<const GrammarModel_::RuleClassificationResult*,
                      const grammar::Match*>>
   GetBestValidInterpretation(
-      const std::vector<grammar::RuleMatch>& candidates) const {
+      const std::vector<grammar::Derivation>& candidates) const {
     const GrammarModel_::RuleClassificationResult* best_interpretation =
         nullptr;
     const grammar::Match* best_match = nullptr;
-    for (const grammar::RuleMatch& candidate : candidates) {
+    for (const grammar::Derivation& candidate : candidates) {
       if (!grammar::VerifyAssertions(candidate.match)) {
         continue;
       }
@@ -238,7 +235,7 @@
   // result.
   bool AddAnnotatedSpanFromMatch(
       const std::vector<UnicodeText::const_iterator>& text,
-      const grammar::RuleMatch& candidate,
+      const grammar::Derivation& candidate,
       std::vector<AnnotatedSpan>* result) const {
     if (candidate.rule_id < 0 ||
         candidate.rule_id >= model_->rule_classification_result()->size()) {
@@ -293,8 +290,13 @@
 
     // Populate entity data from the capturing matches.
     if (interpretation->capturing_group() != nullptr) {
-      const std::unordered_map<uint16, const grammar::CapturingMatch*>
-          capturing_matches = GatherCapturingMatches(match);
+      // Gather active capturing matches.
+      std::unordered_map<uint16, const grammar::Match*> capturing_matches;
+      for (const grammar::MappingMatch* match :
+           grammar::SelectAllOfType<grammar::MappingMatch>(
+               match, grammar::Match::kMappingMatch)) {
+        capturing_matches[match->id] = match;
+      }
       for (int i = 0; i < interpretation->capturing_group()->size(); i++) {
         auto it = capturing_matches.find(i);
         if (it == capturing_matches.end()) {
@@ -312,7 +314,7 @@
 
         // Set entity field from captured text.
         if (group->entity_field_path() != nullptr) {
-          const grammar::CapturingMatch* capturing_match = it->second;
+          const grammar::Match* capturing_match = it->second;
           StringPiece group_text = StringPiece(
               text[capturing_match->codepoint_span.first].utf8_data(),
               text[capturing_match->codepoint_span.second].utf8_data() -
@@ -338,22 +340,22 @@
     return true;
   }
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const GrammarModel* model_;
   const ReflectiveFlatbufferBuilder* entity_data_builder_;
   const ModeFlag mode_;
 
   // All annotation/selection/classification rule match candidates.
   // Grammar rule matches are recorded, deduplicated and then instantiated.
-  std::vector<grammar::RuleMatch> candidates_;
+  std::vector<grammar::Derivation> candidates_;
 };
 
 GrammarAnnotator::GrammarAnnotator(
     const UniLib* unilib, const GrammarModel* model,
     const ReflectiveFlatbufferBuilder* entity_data_builder)
-    : unilib_(unilib),
+    : unilib_(*unilib),
       model_(model),
-      lexer_(*unilib),
+      lexer_(unilib, model->rules()),
       tokenizer_(BuildTokenizer(unilib, model->tokenizer_options())),
       entity_data_builder_(entity_data_builder),
       rules_locales_(grammar::ParseRulesLocales(model->rules())) {}
@@ -376,11 +378,12 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_ANNOTATION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
-  lexer_.Process(tokenizer_.Tokenize(text), /*matches=*/{}, &matcher);
+  lexer_.Process(text, tokenizer_.Tokenize(text), /*annotations=*/nullptr,
+                 &matcher);
 
   // Populate results.
   return callback_handler.GetAnnotations(UnicodeCodepointOffsets(text), result);
@@ -406,11 +409,12 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_SELECTION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
-  lexer_.Process(tokenizer_.Tokenize(text), /*matches=*/{}, &matcher);
+  lexer_.Process(text, tokenizer_.Tokenize(text), /*annotations=*/nullptr,
+                 &matcher);
 
   // Populate the result.
   return callback_handler.GetTextSelection(UnicodeCodepointOffsets(text),
@@ -437,16 +441,16 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_CLASSIFICATION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
 
   const std::vector<Token> tokens = tokenizer_.Tokenize(text);
   if (model_->context_left_num_tokens() == -1 &&
       model_->context_right_num_tokens() == -1) {
     // Use all tokens.
-    lexer_.Process(tokens, /*matches=*/{}, &matcher);
+    lexer_.Process(text, tokens, /*annotations=*/{}, &matcher);
   } else {
     TokenSpan context_span = CodepointSpanToTokenSpan(
         tokens, selection, /*snap_boundaries_to_containing_tokens=*/true);
@@ -463,8 +467,8 @@
                                  context_span.second +
                                      model_->context_right_num_tokens()));
     }
-    lexer_.Process(begin, end,
-                   /*matches=*/{}, &matcher);
+    lexer_.Process(text, begin, end,
+                   /*annotations=*/nullptr, &matcher);
   }
 
   // Populate result.

diff --git a/native/annotator/grammar/grammar-annotator.h b/native/annotator/grammar/grammar-annotator.h
index 08504de..365bb44 100644
--- a/native/annotator/grammar/grammar-annotator.h
+++ b/native/annotator/grammar/grammar-annotator.h

@@ -35,12 +35,11 @@
  public:
   enum class Callback : grammar::CallbackId {
     kRuleMatch = 1,
-    kCapturingMatch = 2,
-    kAssertionMatch = 3,
   };
 
-  GrammarAnnotator(const UniLib* unilib, const GrammarModel* model,
-                   const ReflectiveFlatbufferBuilder* entity_data_builder);
+  explicit GrammarAnnotator(
+      const UniLib* unilib, const GrammarModel* model,
+      const ReflectiveFlatbufferBuilder* entity_data_builder);
 
   // Annotates a given text.
   // Returns true if the text was successfully annotated.
@@ -60,7 +59,7 @@
                         AnnotatedSpan* result) const;
 
  private:
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const GrammarModel* model_;
   const grammar::Lexer lexer_;
   const Tokenizer tokenizer_;

diff --git a/native/annotator/grammar/utils.cc b/native/annotator/grammar/utils.cc
index 9c6af90..8b9363d 100644
--- a/native/annotator/grammar/utils.cc
+++ b/native/annotator/grammar/utils.cc

@@ -16,10 +16,19 @@
 
 #include "annotator/grammar/utils.h"
 
+#include "utils/grammar/utils/rules.h"
+
 namespace libtextclassifier3 {
+namespace {
+
+using ::libtextclassifier3::GrammarModel_::RuleClassificationResultT;
+
+}  // namespace
 
 Tokenizer BuildTokenizer(const UniLib* unilib,
                          const GrammarTokenizerOptions* options) {
+  TC3_CHECK(options != nullptr);
+
   std::vector<const TokenizationCodepointRange*> codepoint_config;
   if (options->tokenization_codepoint_config() != nullptr) {
     codepoint_config.insert(codepoint_config.end(),
@@ -42,4 +51,16 @@
                    /*icu_preserve_whitespace_tokens=*/false);
 }
 
+int AddRuleClassificationResult(const std::string& collection,
+                                const ModeFlag& enabled_modes,
+                                GrammarModelT* model) {
+  const int result_id = model->rule_classification_result.size();
+  model->rule_classification_result.emplace_back(new RuleClassificationResultT);
+  RuleClassificationResultT* result =
+      model->rule_classification_result.back().get();
+  result->collection_name = collection;
+  result->enabled_modes = enabled_modes;
+  return result_id;
+}
+
 }  // namespace libtextclassifier3

diff --git a/native/annotator/grammar/utils.h b/native/annotator/grammar/utils.h
index a86a4b2..4d870fd 100644
--- a/native/annotator/grammar/utils.h
+++ b/native/annotator/grammar/utils.h

@@ -29,6 +29,14 @@
 Tokenizer BuildTokenizer(const UniLib* unilib,
                          const GrammarTokenizerOptions* options);
 
+// Adds a rule classification result to the |model|.
+// collection: the classification entity detected.
+// enabled_modes: the target to apply the given rule.
+// Returns the ID associated with the created classification rule.
+int AddRuleClassificationResult(const std::string& collection,
+                                const ModeFlag& enabled_modes,
+                                GrammarModelT* model);
+
 }  // namespace libtextclassifier3
 
 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_GRAMMAR_UTILS_H_

diff --git a/native/annotator/knowledge/knowledge-engine-dummy.h b/native/annotator/knowledge/knowledge-engine-dummy.h
index 417500e..e9f688a 100644
--- a/native/annotator/knowledge/knowledge-engine-dummy.h
+++ b/native/annotator/knowledge/knowledge-engine-dummy.h

@@ -21,6 +21,7 @@
 
 #include "annotator/model_generated.h"
 #include "annotator/types.h"
+#include "utils/base/status.h"
 #include "utils/optional.h"
 #include "utils/utf8/unilib.h"
 
@@ -38,16 +39,27 @@
   bool ClassifyText(const std::string& text, CodepointSpan selection_indices,
                     AnnotationUsecase annotation_usecase,
                     const Optional<LocationContext>& location_context,
+                    const Permissions& permissions,
                     ClassificationResult* classification_result) const {
     return false;
   }
 
   bool Chunk(const std::string& text, AnnotationUsecase annotation_usecase,
              const Optional<LocationContext>& location_context,
+             const Permissions& permissions,
              std::vector<AnnotatedSpan>* result) const {
     return true;
   }
 
+  Status ChunkMultipleSpans(
+      const std::vector<std::string>& text_fragments,
+      AnnotationUsecase annotation_usecase,
+      const Optional<LocationContext>& location_context,
+      const Permissions& permissions,
+      std::vector<std::vector<AnnotatedSpan>>* results) const {
+    return Status::OK;
+  }
+
   bool LookUpEntity(const std::string& id,
                     std::string* serialized_knowledge_result) const {
     return false;

diff --git a/native/annotator/model-executor.h b/native/annotator/model-executor.h
index bcc318b..5d6c4a7 100644
--- a/native/annotator/model-executor.h
+++ b/native/annotator/model-executor.h

@@ -56,8 +56,8 @@
   explicit ModelExecutor(std::unique_ptr<const tflite::FlatBufferModel> model)
       : TfLiteModelExecutor(std::move(model)) {}
 
-  static const int kInputIndexFeatures = 0;
-  static const int kOutputIndexLogits = 0;
+  static constexpr int kInputIndexFeatures = 0;
+  static constexpr int kOutputIndexLogits = 0;
 };
 
 // Executor for embedding sparse features into a dense vector.

diff --git a/native/annotator/model.fbs b/native/annotator/model.fbs
index 85c5ec7..bdb7a17 100755
--- a/native/annotator/model.fbs
+++ b/native/annotator/model.fbs

@@ -15,6 +15,7 @@
 //
 
 include "annotator/entity-data.fbs";
+include "annotator/experimental/experimental.fbs";
 include "annotator/grammar/dates/dates.fbs";
 include "utils/codepoint-range.fbs";
 include "utils/flatbuffers.fbs";
@@ -395,16 +396,7 @@
   // If the flag is false, The extracted annotation contains two date
   // instance: "Monday" and "6pm".
   enable_date_range:bool = true;
-
-  // If enabled, expand a date series. Must have date_range enabled to be
-  // used. The date range cannot exceed 30 days.
-  // input: April 4-6, 6:30pm
-  // If the flag is true, the extracted annotation will contain 3 instance
-  // which are April 4 at 6:30pm, April 5 at 6:30pm and April 6 at 6:30pm
-  // all have the same begin and end annotation
-  // If the flag is false, the extracted annotation contains one time
-  // range instance and one date instance
-  expand_date_series:bool = true;
+  reserved_6:int16 (deprecated);
 
   // If enabled, the rule priority score is used to set the priority score of
   // the annotation.
@@ -417,7 +409,7 @@
   // e.g. '9:45' will be resolved to '9:45 AM' and '9:45 PM'.
   generate_alternative_interpretations_when_ambiguous:bool;
 
-  // List of tokens which grammar will ignore during the match e.g. if
+  // List of spans which grammar will ignore during the match e.g. if
   // “@” is in the allowed span list and input is “12 March @ 12PM” then “@”
   // will be ignored and 12 March @ 12PM will be translate to
   // {Day:12 Month: March Hour: 12 MERIDIAN: PM}.
@@ -426,7 +418,7 @@
   // <Digit_Day> <Month> @ <Time>
   // Though this is doable in the grammar but requires multiple rules, this
   // list enables the rule to represent multiple rules.
-  ignored_tokens:[string];
+  ignored_spans:[string];
 }
 
 namespace libtextclassifier3;
@@ -518,6 +510,12 @@
   separators:[int];
 }
 
+namespace libtextclassifier3.ModelTriggeringOptions_;
+table CollectionToPriorityEntry {
+  key:string (key, shared);
+  value:float;
+}
+
 // Options controlling the output of the Tensorflow Lite models.
 namespace libtextclassifier3;
 table ModelTriggeringOptions {
@@ -541,9 +539,12 @@
 
   // Priority score assigned to knowledge engine annotations.
   knowledge_priority_score:float = 0;
+  reserved_7:int16 (deprecated);
 
-  // If true, will prioritize the longest annotation during conflict resolution.
-  prioritize_longest_annotation:bool = false;
+  // Apply a factor to the priority score for entities that are added to this
+  // map. Key: collection type e.g. "address", "phone"..., Value: float number.
+  // NOTE: The entries here need to be sorted since we use LookupByKey.
+  collection_to_priority:[ModelTriggeringOptions_.CollectionToPriorityEntry];
 }
 
 // Options controlling the output of the classifier.
@@ -577,6 +578,18 @@
   pruned_row_bucket_id:int;
 }
 
+namespace libtextclassifier3.Model_;
+table ConflictResolutionOptions {
+  // If true, will prioritize the longest annotation during conflict
+  // resolution.
+  prioritize_longest_annotation:bool = false;
+
+  // If true, the annotator will perform conflict resolution between the
+  // different sub-annotators also in the RAW mode. If false, no conflict
+  // resolution will be performed in RAW mode.
+  do_conflict_resolution_in_raw_mode:bool = true;
+}
+
 namespace libtextclassifier3;
 table Model {
   // Comma-separated list of locales supported by the model as BCP 47 tags.
@@ -644,6 +657,8 @@
   money_parsing_options:MoneyParsingOptions;
   translate_annotator_options:TranslateAnnotatorOptions;
   grammar_model:GrammarModel;
+  conflict_resolution_options:Model_.ConflictResolutionOptions;
+  experimental_model:ExperimentalModel;
 }
 
 // Method for selecting the center token.
@@ -823,7 +838,7 @@
 
 namespace libtextclassifier3;
 table NumberAnnotatorOptions {
-  // If true, number annotations will be produced.
+  // If true, number and percentage annotations will be produced.
   enabled:bool = false;
 
   // Score to assign to the annotated numbers and percentages in the annotator.
@@ -832,32 +847,34 @@
   // Number priority score used for conflict resolution with the other models.
   priority_score:float = 0;
 
-  // The modes in which to enable number annotations.
+  // The modes in which to enable number and percentage annotations.
   enabled_modes:ModeFlag = ALL;
 
   // The annotation usecases for which to produce number annotations.
   // This is a flag field for values of AnnotationUsecase.
   enabled_annotation_usecases:uint = 4294967295;
 
-  // A list of codepoints that can form a prefix of a valid number.
+  // [Deprecated] A list of codepoints that can form a prefix of a valid number.
   allowed_prefix_codepoints:[int];
 
-  // A list of codepoints that can form a suffix of a valid number.
+  // [Deprecated] A list of codepoints that can form a suffix of a valid number.
   allowed_suffix_codepoints:[int];
 
-  // List of codepoints that will be stripped from beginning of predicted spans.
+  // [Deprecated] List of codepoints that will be stripped from beginning of
+  // predicted spans.
   ignored_prefix_span_boundary_codepoints:[int];
 
-  // List of codepoints that will be stripped from end of predicted spans.
+  // [Deprecated] List of codepoints that will be stripped from end of predicted
+  // spans.
   ignored_suffix_span_boundary_codepoints:[int];
 
-  // If true, percent annotations will be produced.
+  // [Deprecated] If true, percent annotations will be produced.
   enable_percentage:bool = false;
 
   // Zero separated and ordered list of suffixes that mark a percent.
   percentage_pieces_string:string (shared);
 
-  // List of suffixes offsets in the percent_pieces_string string.
+  // [Deprecated] List of suffixes offsets in the percent_pieces_string string.
   percentage_pieces_offsets:[int];
 
   // Priority score for the percentage annotation.
@@ -870,6 +887,10 @@
   // The maximum number of digits an annotated number can have. Requirement:
   // the value should be less or equal to 20.
   max_number_of_digits:int = 20;
+
+  // The annotation usecases for which to produce percentage annotations.
+  // This is a flag field for values of AnnotationUsecase.
+  percentage_annotation_usecases:uint = 2;
 }
 
 // DurationAnnotator is so far tailored for English and Japanese only.

diff --git a/native/annotator/number/number.cc b/native/annotator/number/number.cc
index fe986ae..3be6ad8 100644
--- a/native/annotator/number/number.cc
+++ b/native/annotator/number/number.cc

@@ -23,6 +23,7 @@
 #include "annotator/collections.h"
 #include "annotator/types.h"
 #include "utils/base/logging.h"
+#include "utils/strings/split.h"
 #include "utils/utf8/unicodetext.h"
 
 namespace libtextclassifier3 {
@@ -149,9 +150,8 @@
       UTF8ToUnicodeText(tokens[suffix_start_index].value, /*do_copy=*/false)
           .begin();
 
-  if (GetPercentSuffixLength(UTF8ToUnicodeText(tokens[suffix_start_index].value,
-                                               /*do_copy=*/false),
-                             0) > 0 &&
+  if (percent_suffixes_.find(tokens[suffix_start_index].value) !=
+          percent_suffixes_.end() &&
       TokensAreValidEnding(tokens, suffix_start_index + 1)) {
     return true;
   }
@@ -175,6 +175,25 @@
   return false;
 }
 
+int NumberAnnotator::FindPercentSuffixEndCodepoint(
+    const std::vector<Token>& tokens,
+    const int suffix_token_start_index) const {
+  if (suffix_token_start_index >= tokens.size()) {
+    return -1;
+  }
+
+  if (percent_suffixes_.find(tokens[suffix_token_start_index].value) !=
+          percent_suffixes_.end() &&
+      TokensAreValidEnding(tokens, suffix_token_start_index + 1)) {
+    return tokens[suffix_token_start_index].end;
+  }
+  if (tokens[suffix_token_start_index].is_whitespace) {
+    return FindPercentSuffixEndCodepoint(tokens, suffix_token_start_index + 1);
+  }
+
+  return -1;
+}
+
 bool NumberAnnotator::TryParseNumber(const UnicodeText& token_text,
                                      const bool is_negative,
                                      int64* parsed_int_value,
@@ -198,8 +217,7 @@
 bool NumberAnnotator::FindAll(const UnicodeText& context,
                               AnnotationUsecase annotation_usecase,
                               std::vector<AnnotatedSpan>* result) const {
-  if (!options_->enabled() || ((1 << annotation_usecase) &
-                               options_->enabled_annotation_usecases()) == 0) {
+  if (!options_->enabled()) {
     return true;
   }
 
@@ -230,80 +248,67 @@
     }
 
     const bool has_decimal = !(parsed_int_value == parsed_double_value);
+    const int new_start_codepoint = is_negative ? token.start - 1 : token.start;
 
-    ClassificationResult classification{Collections::Number(),
-                                        options_->score()};
-    classification.numeric_value = parsed_int_value;
-    classification.numeric_double_value = parsed_double_value;
-    classification.priority_score =
-        has_decimal ? options_->float_number_priority_score()
-                    : options_->priority_score();
+    if (((1 << annotation_usecase) & options_->enabled_annotation_usecases()) !=
+        0) {
+      result->push_back(CreateAnnotatedSpan(
+          new_start_codepoint, token.end, parsed_int_value, parsed_double_value,
+          Collections::Number(), options_->score(),
+          /*priority_score=*/
+          has_decimal ? options_->float_number_priority_score()
+                      : options_->priority_score()));
+    }
 
-    AnnotatedSpan annotated_span;
-    annotated_span.span = {is_negative ? token.start - 1 : token.start,
-                           token.end};
-    annotated_span.classification.push_back(classification);
-    result->push_back(annotated_span);
-  }
-
-  if (options_->enable_percentage()) {
-    FindPercentages(context, result);
+    const int percent_end_codepoint =
+        FindPercentSuffixEndCodepoint(tokens, i + 1);
+    if (percent_end_codepoint != -1 &&
+        ((1 << annotation_usecase) &
+         options_->percentage_annotation_usecases()) != 0) {
+      result->push_back(CreateAnnotatedSpan(
+          new_start_codepoint, percent_end_codepoint, parsed_int_value,
+          parsed_double_value, Collections::Percentage(), options_->score(),
+          options_->percentage_priority_score()));
+    }
   }
 
   return true;
 }
 
-std::vector<uint32> NumberAnnotator::FlatbuffersIntVectorToStdVector(
-    const flatbuffers::Vector<int32_t>* ints) {
-  if (ints == nullptr) {
-    return {};
-  }
-  return {ints->begin(), ints->end()};
+AnnotatedSpan NumberAnnotator::CreateAnnotatedSpan(
+    const int start, const int end, const int int_value,
+    const double double_value, const std::string collection, const float score,
+    const float priority_score) const {
+  ClassificationResult classification{collection, score};
+  classification.numeric_value = int_value;
+  classification.numeric_double_value = double_value;
+  classification.priority_score = priority_score;
+
+  AnnotatedSpan annotated_span;
+  annotated_span.span = {start, end};
+  annotated_span.classification.push_back(classification);
+  return annotated_span;
 }
 
-int NumberAnnotator::GetPercentSuffixLength(const UnicodeText& context,
-                                            int index_codepoints) const {
-  if (index_codepoints >= context.size_codepoints()) {
-    return -1;
+std::unordered_set<std::string>
+NumberAnnotator::FromFlatbufferStringToUnordredSet(
+    const flatbuffers::String* flatbuffer_percent_strings) {
+  std::unordered_set<std::string> strings_set;
+  if (flatbuffer_percent_strings == nullptr) {
+    return strings_set;
   }
-  auto context_it = context.begin();
-  std::advance(context_it, index_codepoints);
-  const StringPiece suffix_context(
-      context_it.utf8_data(),
-      std::distance(context_it.utf8_data(), context.end().utf8_data()));
-  StringSet::Match match;
-  percentage_suffixes_trie_.LongestPrefixMatch(suffix_context, &match);
 
-  if (match.match_length == -1) {
-    return match.match_length;
-  } else {
-    return UTF8ToUnicodeText(context_it.utf8_data(), match.match_length,
-                             /*do_copy=*/false)
-        .size_codepoints();
+  const std::string percent_strings = flatbuffer_percent_strings->str();
+  for (StringPiece suffix : strings::Split(percent_strings, '\0')) {
+    std::string percent_suffix = suffix.ToString();
+    percent_suffix.erase(
+        std::remove_if(percent_suffix.begin(), percent_suffix.end(),
+                       [](unsigned char x) { return std::isspace(x); }),
+        percent_suffix.end());
+    strings_set.insert(percent_suffix);
   }
-}
 
-void NumberAnnotator::FindPercentages(
-    const UnicodeText& context, std::vector<AnnotatedSpan>* result) const {
-  const int initial_result_size = result->size();
-  for (int i = 0; i < initial_result_size; ++i) {
-    AnnotatedSpan annotated_span = (*result)[i];
-    if (annotated_span.classification.empty() ||
-        annotated_span.classification[0].collection != Collections::Number()) {
-      continue;
-    }
-
-    const int match_length =
-        GetPercentSuffixLength(context, annotated_span.span.second);
-    if (match_length > 0) {
-      annotated_span.span = {annotated_span.span.first,
-                             annotated_span.span.second + match_length};
-      annotated_span.classification[0].collection = Collections::Percentage();
-      annotated_span.classification[0].priority_score =
-          options_->percentage_priority_score();
-      result->push_back(annotated_span);
-    }
-  }
+  return strings_set;
 }
 
 }  // namespace libtextclassifier3

diff --git a/native/annotator/number/number.h b/native/annotator/number/number.h
index 6022063..d83bea0 100644
--- a/native/annotator/number/number.h
+++ b/native/annotator/number/number.h

@@ -46,17 +46,8 @@
                              /*internal_tokenizer_codepoint_ranges=*/{},
                              /*split_on_script_change=*/false,
                              /*icu_preserve_whitespace_tokens=*/true)),
-        percentage_pieces_string_(
-            (options->percentage_pieces_string() == nullptr)
-                ? StringPiece()
-                : StringPiece(options->percentage_pieces_string()->data(),
-                              options->percentage_pieces_string()->size())),
-        percentage_pieces_offsets_(FlatbuffersIntVectorToStdVector(
-            options->percentage_pieces_offsets())),
-        percentage_suffixes_trie_(
-            SortedStringsTable(/*num_pieces=*/percentage_pieces_offsets_.size(),
-                               /*offsets=*/percentage_pieces_offsets_.data(),
-                               /*pieces=*/percentage_pieces_string_)),
+        percent_suffixes_(FromFlatbufferStringToUnordredSet(
+            options_->percentage_pieces_string())),
         max_number_of_digits_(options->max_number_of_digits()) {}
 
   // Classifies given text, and if it is a number, it passes the result in
@@ -71,12 +62,10 @@
                std::vector<AnnotatedSpan>* result) const;
 
  private:
-  static std::vector<uint32> FlatbuffersIntVectorToStdVector(
-      const flatbuffers::Vector<int32_t>* ints);
-
-  // Get the length of the percent suffix at the specified index in the context.
-  int GetPercentSuffixLength(const UnicodeText& context,
-                             int index_codepoints) const;
+  // Converts a Flatbuffer string containing zero-separated percent suffixes
+  // to an unordered set.
+  static std::unordered_set<std::string> FromFlatbufferStringToUnordredSet(
+      const flatbuffers::String* flatbuffer_percent_strings);
 
   // Checks if the annotated numbers from the context represent percentages.
   // If yes, replaces the collection type and the annotation boundary in the
@@ -87,38 +76,46 @@
   // Checks if the tokens from in the interval [start_index-2, start_index] are
   // valid characters that can preced a number context.
   bool TokensAreValidStart(const std::vector<Token>& tokens,
-                           const int start_index) const;
+                           int start_index) const;
 
   // Checks if the tokens in the interval (..., prefix_end_index] are a valid
   // number prefix.
   bool TokensAreValidNumberPrefix(const std::vector<Token>& tokens,
-                                  const int prefix_end_index) const;
+                                  int prefix_end_index) const;
 
   // Checks if the tokens from in the interval [ending_index, ending_index+2]
   // are valid characters that can follow a number context.
   bool TokensAreValidEnding(const std::vector<Token>& tokens,
-                            const int ending_index) const;
+                            int ending_index) const;
 
   // Checks if the tokens in the interval [suffix_start_index, ...) are a valid
   // number suffix.
   bool TokensAreValidNumberSuffix(const std::vector<Token>& tokens,
-                                  const int suffix_start_index) const;
+                                  int suffix_start_index) const;
+
+  // Checks if the tokens in the interval [suffix_start_index, ...) are a valid
+  // percent suffix. If false, returns -1, else returns the end codepoint.
+  int FindPercentSuffixEndCodepoint(const std::vector<Token>& tokens,
+                                    int suffix_token_start_index) const;
 
   // Checks if the given text represents a number (either int or double).
-  bool TryParseNumber(const UnicodeText& token_text, const bool is_negative,
+  bool TryParseNumber(const UnicodeText& token_text, bool is_negative,
                       int64* parsed_int_value,
                       double* parsed_double_value) const;
 
   // Checks if a word contains only CJT characters.
   bool IsCJTterm(UnicodeText::const_iterator token_begin_it,
-                 const int token_length) const;
+                 int token_length) const;
+
+  AnnotatedSpan CreateAnnotatedSpan(int start, int end, int int_value,
+                                    double double_value,
+                                    const std::string collection, float score,
+                                    float priority_score) const;
 
   const NumberAnnotatorOptions* options_;
   const UniLib* unilib_;
   const Tokenizer tokenizer_;
-  const StringPiece percentage_pieces_string_;
-  const std::vector<uint32> percentage_pieces_offsets_;
-  const SortedStringsTable percentage_suffixes_trie_;
+  const std::unordered_set<std::string> percent_suffixes_;
   const int max_number_of_digits_;
 };
 

diff --git a/native/annotator/person_name/person_name_model.fbs b/native/annotator/person_name/person_name_model.fbs
index 091ad31..b15543f 100755
--- a/native/annotator/person_name/person_name_model.fbs
+++ b/native/annotator/person_name/person_name_model.fbs

@@ -26,7 +26,7 @@
   person_name:string (shared);
 }
 
-// Next ID: 5
+// Next ID: 6
 namespace libtextclassifier3;
 table PersonNameModel {
   // Decides if the person name annotator is enabled.
@@ -44,6 +44,14 @@
   // the heuristic to find the longest person name match.
   // required
   end_of_person_name_indicators:[int];
+
+  // Decides if only capitalized names should be annotated. In general, a
+  // capitalized name starts with an uppercase character and continues with
+  // lower case characters. In order to capture names such as O'Conell and
+  // McFee, this heursistic considers names as capitalized if they start with an
+  // upper case character and have at least one lower case character.
+  // required
+  annotate_capitalized_names_only:bool;
 }
 
 root_type libtextclassifier3.PersonNameModel;

diff --git a/native/annotator/quantization_test.cc b/native/annotator/quantization_test.cc
new file mode 100644
index 0000000..b995096
--- /dev/null
+++ b/native/annotator/quantization_test.cc

@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/quantization.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::ElementsAreArray;
+using testing::FloatEq;
+using testing::Matcher;
+
+namespace libtextclassifier3 {
+namespace {
+
+Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
+  std::vector<Matcher<float>> matchers;
+  for (const float value : values) {
+    matchers.push_back(FloatEq(value));
+  }
+  return ElementsAreArray(matchers);
+}
+
+TEST(QuantizationTest, DequantizeAdd8bit) {
+  std::vector<float> scales{{0.1, 9.0, -7.0}};
+  std::vector<uint8> embeddings{{/*0: */ 0x00, 0xFF, 0x09, 0x00,
+                                 /*1: */ 0xFF, 0x09, 0x00, 0xFF,
+                                 /*2: */ 0x09, 0x00, 0xFF, 0x09}};
+
+  const int quantization_bits = 8;
+  const int bytes_per_embedding = 4;
+  const int num_sparse_features = 7;
+  {
+    const int bucket_id = 0;
+    std::vector<float> dest(4, 0.0);
+    DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                  num_sparse_features, quantization_bits, bucket_id,
+                  dest.data(), dest.size());
+
+    EXPECT_THAT(dest,
+                ElementsAreFloat(std::vector<float>{
+                    // clang-format off
+                    {1.0 / 7 * 0.1 * (0x00 - 128),
+                     1.0 / 7 * 0.1 * (0xFF - 128),
+                     1.0 / 7 * 0.1 * (0x09 - 128),
+                     1.0 / 7 * 0.1 * (0x00 - 128)}
+                    // clang-format on
+                }));
+  }
+
+  {
+    const int bucket_id = 1;
+    std::vector<float> dest(4, 0.0);
+    DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                  num_sparse_features, quantization_bits, bucket_id,
+                  dest.data(), dest.size());
+
+    EXPECT_THAT(dest,
+                ElementsAreFloat(std::vector<float>{
+                    // clang-format off
+                    {1.0 / 7 * 9.0 * (0xFF - 128),
+                     1.0 / 7 * 9.0 * (0x09 - 128),
+                     1.0 / 7 * 9.0 * (0x00 - 128),
+                     1.0 / 7 * 9.0 * (0xFF - 128)}
+                    // clang-format on
+                }));
+  }
+}
+
+TEST(QuantizationTest, DequantizeAdd1bitZeros) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 1;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets);
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets);
+  std::fill(scales.begin(), scales.end(), 1);
+  std::fill(embeddings.begin(), embeddings.end(), 0);
+
+  std::vector<float> dest(32);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+
+  std::vector<float> expected(32);
+  std::fill(expected.begin(), expected.end(),
+            1.0 / num_sparse_features * (0 - 1));
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+TEST(QuantizationTest, DequantizeAdd1bitOnes) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 1;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets, 1.0);
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets, 0xFF);
+
+  std::vector<float> dest(32);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+  std::vector<float> expected(32);
+  std::fill(expected.begin(), expected.end(),
+            1.0 / num_sparse_features * (1 - 1));
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+TEST(QuantizationTest, DequantizeAdd3bit) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 3;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets, 1.0);
+  scales[1] = 9.0;
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets, 0);
+  // For bucket_id=1, the embedding has values 0..9 for indices 0..9:
+  embeddings[4] = (1 << 7) | (1 << 6) | (1 << 4) | 1;
+  embeddings[5] = (1 << 6) | (1 << 4) | (1 << 3);
+  embeddings[6] = (1 << 4) | (1 << 3) | (1 << 2) | (1 << 1) | 1;
+
+  std::vector<float> dest(10);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+
+  std::vector<float> expected;
+  expected.push_back(1.0 / num_sparse_features * (1 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (2 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (3 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (4 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (5 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (6 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (7 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/strip-unpaired-brackets_test.cc b/native/annotator/strip-unpaired-brackets_test.cc
new file mode 100644
index 0000000..32585ce
--- /dev/null
+++ b/native/annotator/strip-unpaired-brackets_test.cc

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/strip-unpaired-brackets.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class StripUnpairedBracketsTest : public ::testing::Test {
+ protected:
+  StripUnpairedBracketsTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+TEST_F(StripUnpairedBracketsTest, StripUnpairedBrackets) {
+  // If the brackets match, nothing gets stripped.
+  EXPECT_EQ(StripUnpairedBrackets("call me (123) 456 today", {8, 17}, unilib_),
+            std::make_pair(8, 17));
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456) today", {8, 17}, unilib_),
+            std::make_pair(8, 17));
+
+  // If the brackets don't match, they get stripped.
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456 today", {8, 16}, unilib_),
+            std::make_pair(9, 16));
+  EXPECT_EQ(StripUnpairedBrackets("call me )123 456 today", {8, 16}, unilib_),
+            std::make_pair(9, 16));
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456) today", {8, 16}, unilib_),
+            std::make_pair(8, 15));
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456( today", {8, 16}, unilib_),
+            std::make_pair(8, 15));
+
+  // Strips brackets correctly from length-1 selections that consist of
+  // a bracket only.
+  EXPECT_EQ(StripUnpairedBrackets("call me at ) today", {11, 12}, unilib_),
+            std::make_pair(12, 12));
+  EXPECT_EQ(StripUnpairedBrackets("call me at ( today", {11, 12}, unilib_),
+            std::make_pair(12, 12));
+
+  // Handles invalid spans gracefully.
+  EXPECT_EQ(StripUnpairedBrackets("call me at  today", {11, 11}, unilib_),
+            std::make_pair(11, 11));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {0, 0}, unilib_),
+            std::make_pair(0, 0));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {11, 11}, unilib_),
+            std::make_pair(11, 11));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {-1, -1}, unilib_),
+            std::make_pair(-1, -1));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/test_data/test_grammar_model.fb b/native/annotator/test_data/test_grammar_model.fb
deleted file mode 100644
index d6affd3..0000000
--- a/native/annotator/test_data/test_grammar_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/test_model.fb b/native/annotator/test_data/test_model.fb
deleted file mode 100644
index 6462e9c..0000000
--- a/native/annotator/test_data/test_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/test_person_name_model.fb b/native/annotator/test_data/test_person_name_model.fb
deleted file mode 100644
index 4752a23..0000000
--- a/native/annotator/test_data/test_person_name_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/wrong_embeddings.fb b/native/annotator/test_data/wrong_embeddings.fb
deleted file mode 100644
index a9815ea..0000000
--- a/native/annotator/test_data/wrong_embeddings.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/types-test-util.h b/native/annotator/types-test-util.h
new file mode 100644
index 0000000..1d018a1
--- /dev/null
+++ b/native/annotator/types-test-util.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
+
+#include <ostream>
+
+#include "annotator/types.h"
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+#define TC3_DECLARE_PRINT_OPERATOR(TYPE_NAME)               \
+  inline std::ostream& operator<<(std::ostream& stream,     \
+                                  const TYPE_NAME& value) { \
+    logging::LoggingStringStream tmp_stream;                \
+    tmp_stream << value;                                    \
+    return stream << tmp_stream.message;                    \
+  }
+
+TC3_DECLARE_PRINT_OPERATOR(AnnotatedSpan)
+TC3_DECLARE_PRINT_OPERATOR(ClassificationResult)
+TC3_DECLARE_PRINT_OPERATOR(DatetimeParsedData)
+TC3_DECLARE_PRINT_OPERATOR(DatetimeParseResultSpan)
+TC3_DECLARE_PRINT_OPERATOR(Token)
+
+#undef TC3_DECLARE_PRINT_OPERATOR
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_

diff --git a/native/annotator/types.cc b/native/annotator/types.cc
index ed301a0..be542d3 100644
--- a/native/annotator/types.cc
+++ b/native/annotator/types.cc

@@ -16,6 +16,10 @@
 
 #include "annotator/types.h"
 
+#include <vector>
+
+#include "utils/optional.h"
+
 namespace libtextclassifier3 {
 
 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
@@ -117,7 +121,10 @@
 logging::LoggingStringStream& operator<<(logging::LoggingStringStream& stream,
                                          const DatetimeParseResultSpan& value) {
   stream << "DatetimeParseResultSpan({" << value.span.first << ", "
-         << value.span.second << "}, {";
+         << value.span.second << "}, "
+         << "/*target_classification_score=*/ "
+         << value.target_classification_score << "/*priority_score=*/"
+         << value.priority_score << " {";
   for (const DatetimeParseResult& data : value.data) {
     stream << "{/*time_ms_utc=*/ " << data.time_ms_utc << " /* "
            << FormatMillis(data.time_ms_utc) << " */, /*granularity=*/ "
@@ -228,6 +235,14 @@
   GetOrCreateDatetimeComponent(field_type).relative_count = relative_count;
 }
 
+void DatetimeParsedData::AddDatetimeComponents(
+    const std::vector<DatetimeComponent>& datetime_components) {
+  for (const DatetimeComponent& datetime_component : datetime_components) {
+    date_time_components_.insert(
+        {datetime_component.component_type, datetime_component});
+  }
+}
+
 bool DatetimeParsedData::HasFieldType(
     const DatetimeComponent::ComponentType& field_type) const {
   if (date_time_components_.find(field_type) == date_time_components_.end()) {
@@ -293,11 +308,26 @@
   }
 }
 
-DatetimeGranularity DatetimeParsedData::GetFinestGranularity() const {
+DatetimeComponent& DatetimeParsedData::GetOrCreateDatetimeComponent(
+    const DatetimeComponent::ComponentType& component_type) {
+  auto result =
+      date_time_components_
+          .insert(
+              {component_type,
+               DatetimeComponent(
+                   component_type,
+                   DatetimeComponent::RelativeQualifier::UNSPECIFIED, 0, 0)})
+          .first;
+  return result->second;
+}
+
+namespace {
+DatetimeGranularity GetFinestGranularityFromComponentTypes(
+    const std::vector<DatetimeComponent::ComponentType>&
+        datetime_component_types) {
   DatetimeGranularity granularity = DatetimeGranularity::GRANULARITY_UNKNOWN;
-  for (auto it = date_time_components_.begin();
-       it != date_time_components_.end(); it++) {
-    switch (it->first) {
+  for (const auto& component_type : datetime_component_types) {
+    switch (component_type) {
       case DatetimeComponent::ComponentType::YEAR:
         if (granularity < DatetimeGranularity::GRANULARITY_YEAR) {
           granularity = DatetimeGranularity::GRANULARITY_YEAR;
@@ -350,18 +380,40 @@
   }
   return granularity;
 }
+}  // namespace
 
-DatetimeComponent& DatetimeParsedData::GetOrCreateDatetimeComponent(
+DatetimeGranularity DatetimeParsedData::GetFinestGranularity() const {
+  std::vector<DatetimeComponent::ComponentType> component_types;
+  std::transform(date_time_components_.begin(), date_time_components_.end(),
+                 std::back_inserter(component_types),
+                 [](const std::map<DatetimeComponent::ComponentType,
+                                   DatetimeComponent>::value_type& pair) {
+                   return pair.first;
+                 });
+  return GetFinestGranularityFromComponentTypes(component_types);
+}
+
+Optional<DatetimeComponent> GetDatetimeComponent(
+    const std::vector<DatetimeComponent>& datetime_components,
     const DatetimeComponent::ComponentType& component_type) {
-  auto result =
-      date_time_components_
-          .insert(
-              {component_type,
-               DatetimeComponent(
-                   component_type,
-                   DatetimeComponent::RelativeQualifier::UNSPECIFIED, 0, 0)})
-          .first;
-  return result->second;
+  for (auto datetime_component : datetime_components) {
+    if (datetime_component.component_type == component_type) {
+      return Optional<DatetimeComponent>(datetime_component);
+    }
+  }
+  return Optional<DatetimeComponent>();
+}
+
+// Returns the granularity of the DatetimeComponents.
+DatetimeGranularity GetFinestGranularity(
+    const std::vector<DatetimeComponent>& datetime_component) {
+  std::vector<DatetimeComponent::ComponentType> component_types;
+  std::transform(datetime_component.begin(), datetime_component.end(),
+                 std::back_inserter(component_types),
+                 [](const DatetimeComponent& component) {
+                   return component.component_type;
+                 });
+  return GetFinestGranularityFromComponentTypes(component_types);
 }
 
 }  // namespace libtextclassifier3

diff --git a/native/annotator/types.h b/native/annotator/types.h
index ff9c87f..665d4b6 100644
--- a/native/annotator/types.h
+++ b/native/annotator/types.h

@@ -267,6 +267,16 @@
         relative_count(arg_relative_count) {}
 };
 
+// Utility method to calculate Returns the finest granularity of
+// DatetimeComponents.
+DatetimeGranularity GetFinestGranularity(
+    const std::vector<DatetimeComponent>& datetime_component);
+
+// Return the 'DatetimeComponent' from collection filter by component type.
+Optional<DatetimeComponent> GetDatetimeComponent(
+    const std::vector<DatetimeComponent>& datetime_components,
+    const DatetimeComponent::ComponentType& component_type);
+
 struct DatetimeParseResult {
   // The absolute time in milliseconds since the epoch in UTC.
   int64 time_ms_utc;
@@ -491,6 +501,20 @@
   }
 };
 
+struct Permissions {
+  // If true the user location can be used to provide better annotations.
+  bool has_location_permission = true;
+  // If true, annotators can use personal data to provide personalized
+  // annotations.
+  bool has_personalization_permission = true;
+
+  bool operator==(const Permissions& other) const {
+    return this->has_location_permission == other.has_location_permission &&
+           this->has_personalization_permission ==
+               other.has_personalization_permission;
+  }
+};
+
 struct AnnotationOptions : public BaseOptions, public DatetimeOptions {
   // List of entity types that should be used for annotation.
   std::unordered_set<std::string> entity_types;
@@ -498,9 +522,13 @@
   // If true, serialized_entity_data in the results is populated."
   bool is_serialized_entity_data_enabled = false;
 
+  // Defines the permissions for the annotators.
+  Permissions permissions;
+
   bool operator==(const AnnotationOptions& other) const {
     return this->is_serialized_entity_data_enabled ==
                other.is_serialized_entity_data_enabled &&
+           this->permissions == other.permissions &&
            this->entity_types == other.entity_types &&
            BaseOptions::operator==(other) && DatetimeOptions::operator==(other);
   }
@@ -521,7 +549,7 @@
 
 // Represents a result of Annotate call.
 struct AnnotatedSpan {
-  enum class Source { OTHER, KNOWLEDGE, DURATION, DATETIME };
+  enum class Source { OTHER, KNOWLEDGE, DURATION, DATETIME, PERSON_NAME };
 
   // Unicode codepoint indices in the input string.
   CodepointSpan span = {kInvalidIndex, kInvalidIndex};
@@ -537,6 +565,21 @@
   AnnotatedSpan(CodepointSpan arg_span,
                 std::vector<ClassificationResult> arg_classification)
       : span(arg_span), classification(std::move(arg_classification)) {}
+
+  AnnotatedSpan(CodepointSpan arg_span,
+                std::vector<ClassificationResult> arg_classification,
+                Source arg_source)
+      : span(arg_span),
+        classification(std::move(arg_classification)),
+        source(arg_source) {}
+};
+
+struct InputFragment {
+  std::string text;
+
+  // If present will override the AnnotationOptions reference time and timezone
+  // when annotating this specific string fragment.
+  Optional<DatetimeOptions> datetime_options;
 };
 
 // Pretty-printing function for AnnotatedSpan.
@@ -583,6 +626,10 @@
       const DatetimeComponent::ComponentType& field_type,
       const DatetimeComponent::RelativeQualifier& relative_value);
 
+  // Add collection of 'DatetimeComponent' to 'DatetimeParsedData'.
+  void AddDatetimeComponents(
+      const std::vector<DatetimeComponent>& datetime_components);
+
   // Function to set the relative count of DateTimeComponent, if the field is
   // not present the function will create the field and set the count.
   void SetRelativeCount(const DatetimeComponent::ComponentType& field_type,

diff --git a/native/annotator/zlib-utils_test.cc b/native/annotator/zlib-utils_test.cc
new file mode 100644
index 0000000..df33ea1
--- /dev/null
+++ b/native/annotator/zlib-utils_test.cc

@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/zlib-utils.h"
+
+#include <memory>
+
+#include "annotator/model_generated.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+
+TEST(AnnotatorZlibUtilsTest, CompressModel) {
+  ModelT model;
+  model.regex_model.reset(new RegexModelT);
+  model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+  model.regex_model->patterns.back()->pattern = "this is a test pattern";
+  model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+  model.regex_model->patterns.back()->pattern = "this is a second test pattern";
+
+  model.datetime_model.reset(new DatetimeModelT);
+  model.datetime_model->patterns.emplace_back(new DatetimeModelPatternT);
+  model.datetime_model->patterns.back()->regexes.emplace_back(
+      new DatetimeModelPattern_::RegexT);
+  model.datetime_model->patterns.back()->regexes.back()->pattern =
+      "an example datetime pattern";
+  model.datetime_model->extractors.emplace_back(new DatetimeModelExtractorT);
+  model.datetime_model->extractors.back()->pattern =
+      "an example datetime extractor";
+
+  model.intent_options.reset(new IntentFactoryModelT);
+  model.intent_options->generator.emplace_back(
+      new IntentFactoryModel_::IntentGeneratorT);
+  const std::string intent_generator1 = "lua generator 1";
+  model.intent_options->generator.back()->lua_template_generator =
+      std::vector<uint8_t>(intent_generator1.begin(), intent_generator1.end());
+  model.intent_options->generator.emplace_back(
+      new IntentFactoryModel_::IntentGeneratorT);
+  const std::string intent_generator2 = "lua generator 2";
+  model.intent_options->generator.back()->lua_template_generator =
+      std::vector<uint8_t>(intent_generator2.begin(), intent_generator2.end());
+
+  // NOTE: The resource strings contain some repetition, so that the compressed
+  // version is smaller than the uncompressed one. Because the compression code
+  // looks at that as well.
+  model.resources.reset(new ResourcePoolT);
+  model.resources->resource_entry.emplace_back(new ResourceEntryT);
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr1.1";
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr1.2";
+  model.resources->resource_entry.emplace_back(new ResourceEntryT);
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr2.1";
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr2.2";
+
+  // Compress the model.
+  EXPECT_TRUE(CompressModel(&model));
+
+  // Sanity check that uncompressed field is removed.
+  EXPECT_TRUE(model.regex_model->patterns[0]->pattern.empty());
+  EXPECT_TRUE(model.regex_model->patterns[1]->pattern.empty());
+  EXPECT_TRUE(model.datetime_model->patterns[0]->regexes[0]->pattern.empty());
+  EXPECT_TRUE(model.datetime_model->extractors[0]->pattern.empty());
+  EXPECT_TRUE(
+      model.intent_options->generator[0]->lua_template_generator.empty());
+  EXPECT_TRUE(
+      model.intent_options->generator[1]->lua_template_generator.empty());
+  EXPECT_TRUE(model.resources->resource_entry[0]->resource[0]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[0]->resource[1]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[1]->resource[0]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[1]->resource[1]->content.empty());
+
+  // Pack and load the model.
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(Model::Pack(builder, &model));
+  const Model* compressed_model =
+      GetModel(reinterpret_cast<const char*>(builder.GetBufferPointer()));
+  ASSERT_TRUE(compressed_model != nullptr);
+
+  // Decompress the fields again and check that they match the original.
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  ASSERT_TRUE(decompressor != nullptr);
+  std::string uncompressed_pattern;
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->regex_model()->patterns()->Get(0)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "this is a test pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->regex_model()->patterns()->Get(1)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "this is a second test pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(compressed_model->datetime_model()
+                                                ->patterns()
+                                                ->Get(0)
+                                                ->regexes()
+                                                ->Get(0)
+                                                ->compressed_pattern(),
+                                            &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "an example datetime pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(compressed_model->datetime_model()
+                                                ->extractors()
+                                                ->Get(0)
+                                                ->compressed_pattern(),
+                                            &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "an example datetime extractor");
+
+  EXPECT_TRUE(DecompressModel(&model));
+  EXPECT_EQ(model.regex_model->patterns[0]->pattern, "this is a test pattern");
+  EXPECT_EQ(model.regex_model->patterns[1]->pattern,
+            "this is a second test pattern");
+  EXPECT_EQ(model.datetime_model->patterns[0]->regexes[0]->pattern,
+            "an example datetime pattern");
+  EXPECT_EQ(model.datetime_model->extractors[0]->pattern,
+            "an example datetime extractor");
+  EXPECT_EQ(
+      model.intent_options->generator[0]->lua_template_generator,
+      std::vector<uint8_t>(intent_generator1.begin(), intent_generator1.end()));
+  EXPECT_EQ(
+      model.intent_options->generator[1]->lua_template_generator,
+      std::vector<uint8_t>(intent_generator2.begin(), intent_generator2.end()));
+  EXPECT_EQ(model.resources->resource_entry[0]->resource[0]->content,
+            "rrrrrrrrrrrrr1.1");
+  EXPECT_EQ(model.resources->resource_entry[0]->resource[1]->content,
+            "rrrrrrrrrrrrr1.2");
+  EXPECT_EQ(model.resources->resource_entry[1]->resource[0]->content,
+            "rrrrrrrrrrrrr2.1");
+  EXPECT_EQ(model.resources->resource_entry[1]->resource[1]->content,
+            "rrrrrrrrrrrrr2.2");
+}
+
+}  // namespace libtextclassifier3

diff --git a/native/lang_id/common/math/fastexp.h b/native/lang_id/common/math/fastexp.h
index 05b654a..761e9ac 100644
--- a/native/lang_id/common/math/fastexp.h
+++ b/native/lang_id/common/math/fastexp.h

@@ -33,9 +33,9 @@
 
 class FastMathClass {
  private:
-  static const int kBits = 7;
-  static const int kMask1 = (1 << kBits) - 1;
-  static const int kMask2 = 0xFF << kBits;
+  static constexpr int kBits = 7;
+  static constexpr int kMask1 = (1 << kBits) - 1;
+  static constexpr int kMask2 = 0xFF << kBits;
   static constexpr float kLogBase2OfE = 1.44269504088896340736f;
 
   struct Table {

diff --git a/native/lang_id/script/approx-script-data.cc b/native/lang_id/script/approx-script-data.cc
index e11d7b7..233653f 100755
--- a/native/lang_id/script/approx-script-data.cc
+++ b/native/lang_id/script/approx-script-data.cc

@@ -27,7 +27,7 @@
 namespace mobile {
 namespace approx_script_internal {
 
-const int kNumRanges = 367;
+const int kNumRanges = 376;
 
 const uint32 kRangeFirst[] = {
   65,  // Range #0: [65, 90, Latin]
@@ -48,355 +48,364 @@
   1008,  // Range #15: [1008, 1023, Greek]
   1024,  // Range #16: [1024, 1156, Cyrillic]
   1159,  // Range #17: [1159, 1327, Cyrillic]
-  1329,  // Range #18: [1329, 1416, Armenian]
-  1418,  // Range #19: [1418, 1423, Armenian]
-  1425,  // Range #20: [1425, 1479, Hebrew]
-  1488,  // Range #21: [1488, 1524, Hebrew]
-  1536,  // Range #22: [1536, 1540, Arabic]
-  1542,  // Range #23: [1542, 1547, Arabic]
-  1549,  // Range #24: [1549, 1562, Arabic]
-  1564,  // Range #25: [1564, 1566, Arabic]
-  1568,  // Range #26: [1568, 1599, Arabic]
-  1601,  // Range #27: [1601, 1610, Arabic]
-  1622,  // Range #28: [1622, 1647, Arabic]
-  1649,  // Range #29: [1649, 1756, Arabic]
-  1758,  // Range #30: [1758, 1791, Arabic]
-  1792,  // Range #31: [1792, 1871, Syriac]
-  1872,  // Range #32: [1872, 1919, Arabic]
-  1920,  // Range #33: [1920, 1969, Thaana]
-  1984,  // Range #34: [1984, 2047, Nko]
-  2048,  // Range #35: [2048, 2110, Samaritan]
-  2112,  // Range #36: [2112, 2142, Mandaic]
-  2144,  // Range #37: [2144, 2154, Syriac]
-  2208,  // Range #38: [2208, 2237, Arabic]
-  2259,  // Range #39: [2259, 2273, Arabic]
-  2275,  // Range #40: [2275, 2303, Arabic]
-  2304,  // Range #41: [2304, 2384, Devanagari]
-  2389,  // Range #42: [2389, 2403, Devanagari]
-  2406,  // Range #43: [2406, 2431, Devanagari]
-  2432,  // Range #44: [2432, 2510, Bengali]
-  2519,  // Range #45: [2519, 2558, Bengali]
-  2561,  // Range #46: [2561, 2641, Gurmukhi]
-  2649,  // Range #47: [2649, 2654, Gurmukhi]
-  2662,  // Range #48: [2662, 2678, Gurmukhi]
-  2689,  // Range #49: [2689, 2768, Gujarati]
-  2784,  // Range #50: [2784, 2801, Gujarati]
-  2809,  // Range #51: [2809, 2815, Gujarati]
-  2817,  // Range #52: [2817, 2893, Oriya]
-  2902,  // Range #53: [2902, 2935, Oriya]
-  2946,  // Range #54: [2946, 3024, Tamil]
-  3031,  // Range #55: [3031, 3031, Tamil]
-  3046,  // Range #56: [3046, 3066, Tamil]
-  3072,  // Range #57: [3072, 3149, Telugu]
-  3157,  // Range #58: [3157, 3162, Telugu]
-  3168,  // Range #59: [3168, 3183, Telugu]
-  3191,  // Range #60: [3191, 3199, Telugu]
-  3200,  // Range #61: [3200, 3277, Kannada]
-  3285,  // Range #62: [3285, 3286, Kannada]
-  3294,  // Range #63: [3294, 3314, Kannada]
-  3328,  // Range #64: [3328, 3455, Malayalam]
-  3458,  // Range #65: [3458, 3551, Sinhala]
-  3558,  // Range #66: [3558, 3572, Sinhala]
-  3585,  // Range #67: [3585, 3642, Thai]
-  3648,  // Range #68: [3648, 3675, Thai]
-  3713,  // Range #69: [3713, 3807, Lao]
-  3840,  // Range #70: [3840, 4052, Tibetan]
-  4057,  // Range #71: [4057, 4058, Tibetan]
-  4096,  // Range #72: [4096, 4255, Myanmar]
-  4256,  // Range #73: [4256, 4295, Georgian]
-  4301,  // Range #74: [4301, 4346, Georgian]
-  4348,  // Range #75: [4348, 4351, Georgian]
-  4352,  // Range #76: [4352, 4607, Hangul]
-  4608,  // Range #77: [4608, 5017, Ethiopic]
-  5024,  // Range #78: [5024, 5117, Cherokee]
-  5120,  // Range #79: [5120, 5759, Canadian_Aboriginal]
-  5760,  // Range #80: [5760, 5788, Ogham]
-  5792,  // Range #81: [5792, 5866, Runic]
-  5870,  // Range #82: [5870, 5880, Runic]
-  5888,  // Range #83: [5888, 5908, Tagalog]
-  5920,  // Range #84: [5920, 5940, Hanunoo]
-  5952,  // Range #85: [5952, 5971, Buhid]
-  5984,  // Range #86: [5984, 6003, Tagbanwa]
-  6016,  // Range #87: [6016, 6121, Khmer]
-  6128,  // Range #88: [6128, 6137, Khmer]
-  6144,  // Range #89: [6144, 6145, Mongolian]
-  6148,  // Range #90: [6148, 6148, Mongolian]
-  6150,  // Range #91: [6150, 6169, Mongolian]
-  6176,  // Range #92: [6176, 6264, Mongolian]
-  6272,  // Range #93: [6272, 6314, Mongolian]
-  6320,  // Range #94: [6320, 6389, Canadian_Aboriginal]
-  6400,  // Range #95: [6400, 6479, Limbu]
-  6480,  // Range #96: [6480, 6516, Tai_Le]
-  6528,  // Range #97: [6528, 6601, New_Tai_Lue]
-  6608,  // Range #98: [6608, 6623, New_Tai_Lue]
-  6624,  // Range #99: [6624, 6655, Khmer]
-  6656,  // Range #100: [6656, 6687, Buginese]
-  6688,  // Range #101: [6688, 6793, Tai_Tham]
-  6800,  // Range #102: [6800, 6809, Tai_Tham]
-  6816,  // Range #103: [6816, 6829, Tai_Tham]
-  6912,  // Range #104: [6912, 7036, Balinese]
-  7040,  // Range #105: [7040, 7103, Sundanese]
-  7104,  // Range #106: [7104, 7155, Batak]
-  7164,  // Range #107: [7164, 7167, Batak]
-  7168,  // Range #108: [7168, 7247, Lepcha]
-  7248,  // Range #109: [7248, 7295, Ol_Chiki]
-  7296,  // Range #110: [7296, 7304, Cyrillic]
-  7312,  // Range #111: [7312, 7359, Georgian]
-  7360,  // Range #112: [7360, 7367, Sundanese]
-  7424,  // Range #113: [7424, 7461, Latin]
-  7462,  // Range #114: [7462, 7466, Greek]
-  7467,  // Range #115: [7467, 7467, Cyrillic]
-  7468,  // Range #116: [7468, 7516, Latin]
-  7517,  // Range #117: [7517, 7521, Greek]
-  7522,  // Range #118: [7522, 7525, Latin]
-  7526,  // Range #119: [7526, 7530, Greek]
-  7531,  // Range #120: [7531, 7543, Latin]
-  7544,  // Range #121: [7544, 7544, Cyrillic]
-  7545,  // Range #122: [7545, 7614, Latin]
-  7615,  // Range #123: [7615, 7615, Greek]
-  7680,  // Range #124: [7680, 7935, Latin]
-  7936,  // Range #125: [7936, 8190, Greek]
-  8305,  // Range #126: [8305, 8305, Latin]
-  8319,  // Range #127: [8319, 8319, Latin]
-  8336,  // Range #128: [8336, 8348, Latin]
-  8486,  // Range #129: [8486, 8486, Greek]
-  8490,  // Range #130: [8490, 8491, Latin]
-  8498,  // Range #131: [8498, 8498, Latin]
-  8526,  // Range #132: [8526, 8526, Latin]
-  8544,  // Range #133: [8544, 8584, Latin]
-  10240,  // Range #134: [10240, 10495, Braille]
-  11264,  // Range #135: [11264, 11358, Glagolitic]
-  11360,  // Range #136: [11360, 11391, Latin]
-  11392,  // Range #137: [11392, 11507, Coptic]
-  11513,  // Range #138: [11513, 11519, Coptic]
-  11520,  // Range #139: [11520, 11559, Georgian]
-  11565,  // Range #140: [11565, 11565, Georgian]
-  11568,  // Range #141: [11568, 11623, Tifinagh]
-  11631,  // Range #142: [11631, 11632, Tifinagh]
-  11647,  // Range #143: [11647, 11647, Tifinagh]
-  11648,  // Range #144: [11648, 11670, Ethiopic]
-  11680,  // Range #145: [11680, 11742, Ethiopic]
-  11744,  // Range #146: [11744, 11775, Cyrillic]
-  11904,  // Range #147: [11904, 12019, Han]
-  12032,  // Range #148: [12032, 12245, Han]
-  12293,  // Range #149: [12293, 12293, Han]
-  12295,  // Range #150: [12295, 12295, Han]
-  12321,  // Range #151: [12321, 12329, Han]
-  12334,  // Range #152: [12334, 12335, Hangul]
-  12344,  // Range #153: [12344, 12347, Han]
-  12353,  // Range #154: [12353, 12438, Hiragana]
-  12445,  // Range #155: [12445, 12447, Hiragana]
-  12449,  // Range #156: [12449, 12538, Katakana]
-  12541,  // Range #157: [12541, 12543, Katakana]
-  12549,  // Range #158: [12549, 12591, Bopomofo]
-  12593,  // Range #159: [12593, 12686, Hangul]
-  12704,  // Range #160: [12704, 12730, Bopomofo]
-  12784,  // Range #161: [12784, 12799, Katakana]
-  12800,  // Range #162: [12800, 12830, Hangul]
-  12896,  // Range #163: [12896, 12926, Hangul]
-  13008,  // Range #164: [13008, 13054, Katakana]
-  13056,  // Range #165: [13056, 13143, Katakana]
-  13312,  // Range #166: [13312, 19893, Han]
-  19968,  // Range #167: [19968, 40943, Han]
-  40960,  // Range #168: [40960, 42182, Yi]
-  42192,  // Range #169: [42192, 42239, Lisu]
-  42240,  // Range #170: [42240, 42539, Vai]
-  42560,  // Range #171: [42560, 42655, Cyrillic]
-  42656,  // Range #172: [42656, 42743, Bamum]
-  42786,  // Range #173: [42786, 42887, Latin]
-  42891,  // Range #174: [42891, 42950, Latin]
-  42999,  // Range #175: [42999, 43007, Latin]
-  43008,  // Range #176: [43008, 43051, Syloti_Nagri]
-  43072,  // Range #177: [43072, 43127, Phags_Pa]
-  43136,  // Range #178: [43136, 43205, Saurashtra]
-  43214,  // Range #179: [43214, 43225, Saurashtra]
-  43232,  // Range #180: [43232, 43263, Devanagari]
-  43264,  // Range #181: [43264, 43309, Kayah_Li]
-  43311,  // Range #182: [43311, 43311, Kayah_Li]
-  43312,  // Range #183: [43312, 43347, Rejang]
-  43359,  // Range #184: [43359, 43359, Rejang]
-  43360,  // Range #185: [43360, 43388, Hangul]
-  43392,  // Range #186: [43392, 43469, Javanese]
-  43472,  // Range #187: [43472, 43487, Javanese]
-  43488,  // Range #188: [43488, 43518, Myanmar]
-  43520,  // Range #189: [43520, 43574, Cham]
-  43584,  // Range #190: [43584, 43615, Cham]
-  43616,  // Range #191: [43616, 43647, Myanmar]
-  43648,  // Range #192: [43648, 43714, Tai_Viet]
-  43739,  // Range #193: [43739, 43743, Tai_Viet]
-  43744,  // Range #194: [43744, 43766, Meetei_Mayek]
-  43777,  // Range #195: [43777, 43798, Ethiopic]
-  43808,  // Range #196: [43808, 43822, Ethiopic]
-  43824,  // Range #197: [43824, 43866, Latin]
-  43868,  // Range #198: [43868, 43876, Latin]
-  43877,  // Range #199: [43877, 43877, Greek]
-  43878,  // Range #200: [43878, 43879, Latin]
-  43888,  // Range #201: [43888, 43967, Cherokee]
-  43968,  // Range #202: [43968, 44025, Meetei_Mayek]
-  44032,  // Range #203: [44032, 55203, Hangul]
-  55216,  // Range #204: [55216, 55291, Hangul]
-  63744,  // Range #205: [63744, 64217, Han]
-  64256,  // Range #206: [64256, 64262, Latin]
-  64275,  // Range #207: [64275, 64279, Armenian]
-  64285,  // Range #208: [64285, 64335, Hebrew]
-  64336,  // Range #209: [64336, 64449, Arabic]
-  64467,  // Range #210: [64467, 64829, Arabic]
-  64848,  // Range #211: [64848, 64967, Arabic]
-  65008,  // Range #212: [65008, 65021, Arabic]
-  65070,  // Range #213: [65070, 65071, Cyrillic]
-  65136,  // Range #214: [65136, 65276, Arabic]
-  65313,  // Range #215: [65313, 65338, Latin]
-  65345,  // Range #216: [65345, 65370, Latin]
-  65382,  // Range #217: [65382, 65391, Katakana]
-  65393,  // Range #218: [65393, 65437, Katakana]
-  65440,  // Range #219: [65440, 65500, Hangul]
-  65536,  // Range #220: [65536, 65629, Linear_B]
-  65664,  // Range #221: [65664, 65786, Linear_B]
-  65856,  // Range #222: [65856, 65934, Greek]
-  65952,  // Range #223: [65952, 65952, Greek]
-  66176,  // Range #224: [66176, 66204, Lycian]
-  66208,  // Range #225: [66208, 66256, Carian]
-  66304,  // Range #226: [66304, 66339, Old_Italic]
-  66349,  // Range #227: [66349, 66351, Old_Italic]
-  66352,  // Range #228: [66352, 66378, Gothic]
-  66384,  // Range #229: [66384, 66426, Old_Permic]
-  66432,  // Range #230: [66432, 66463, Ugaritic]
-  66464,  // Range #231: [66464, 66517, Old_Persian]
-  66560,  // Range #232: [66560, 66639, Deseret]
-  66640,  // Range #233: [66640, 66687, Shavian]
-  66688,  // Range #234: [66688, 66729, Osmanya]
-  66736,  // Range #235: [66736, 66811, Osage]
-  66816,  // Range #236: [66816, 66855, Elbasan]
-  66864,  // Range #237: [66864, 66915, Caucasian_Albanian]
-  66927,  // Range #238: [66927, 66927, Caucasian_Albanian]
-  67072,  // Range #239: [67072, 67382, Linear_A]
-  67392,  // Range #240: [67392, 67413, Linear_A]
-  67424,  // Range #241: [67424, 67431, Linear_A]
-  67584,  // Range #242: [67584, 67647, Cypriot]
-  67648,  // Range #243: [67648, 67679, Imperial_Aramaic]
-  67680,  // Range #244: [67680, 67711, Palmyrene]
-  67712,  // Range #245: [67712, 67742, Nabataean]
-  67751,  // Range #246: [67751, 67759, Nabataean]
-  67808,  // Range #247: [67808, 67829, Hatran]
-  67835,  // Range #248: [67835, 67839, Hatran]
-  67840,  // Range #249: [67840, 67871, Phoenician]
-  67872,  // Range #250: [67872, 67897, Lydian]
-  67903,  // Range #251: [67903, 67903, Lydian]
-  67968,  // Range #252: [67968, 67999, Meroitic_Hieroglyphs]
-  68000,  // Range #253: [68000, 68095, Meroitic_Cursive]
-  68096,  // Range #254: [68096, 68102, Kharoshthi]
-  68108,  // Range #255: [68108, 68168, Kharoshthi]
-  68176,  // Range #256: [68176, 68184, Kharoshthi]
-  68192,  // Range #257: [68192, 68223, Old_South_Arabian]
-  68224,  // Range #258: [68224, 68255, Old_North_Arabian]
-  68288,  // Range #259: [68288, 68342, Manichaean]
-  68352,  // Range #260: [68352, 68415, Avestan]
-  68416,  // Range #261: [68416, 68447, Inscriptional_Parthian]
-  68448,  // Range #262: [68448, 68466, Inscriptional_Pahlavi]
-  68472,  // Range #263: [68472, 68479, Inscriptional_Pahlavi]
-  68480,  // Range #264: [68480, 68497, Psalter_Pahlavi]
-  68505,  // Range #265: [68505, 68508, Psalter_Pahlavi]
-  68521,  // Range #266: [68521, 68527, Psalter_Pahlavi]
-  68608,  // Range #267: [68608, 68680, Old_Turkic]
-  68736,  // Range #268: [68736, 68786, Old_Hungarian]
-  68800,  // Range #269: [68800, 68850, Old_Hungarian]
-  68858,  // Range #270: [68858, 68863, Old_Hungarian]
-  68864,  // Range #271: [68864, 68903, Hanifi_Rohingya]
-  68912,  // Range #272: [68912, 68921, Hanifi_Rohingya]
-  69216,  // Range #273: [69216, 69246, Arabic]
+  1329,  // Range #18: [1329, 1423, Armenian]
+  1425,  // Range #19: [1425, 1479, Hebrew]
+  1488,  // Range #20: [1488, 1524, Hebrew]
+  1536,  // Range #21: [1536, 1540, Arabic]
+  1542,  // Range #22: [1542, 1547, Arabic]
+  1549,  // Range #23: [1549, 1562, Arabic]
+  1564,  // Range #24: [1564, 1566, Arabic]
+  1568,  // Range #25: [1568, 1599, Arabic]
+  1601,  // Range #26: [1601, 1610, Arabic]
+  1622,  // Range #27: [1622, 1647, Arabic]
+  1649,  // Range #28: [1649, 1756, Arabic]
+  1758,  // Range #29: [1758, 1791, Arabic]
+  1792,  // Range #30: [1792, 1871, Syriac]
+  1872,  // Range #31: [1872, 1919, Arabic]
+  1920,  // Range #32: [1920, 1969, Thaana]
+  1984,  // Range #33: [1984, 2047, Nko]
+  2048,  // Range #34: [2048, 2110, Samaritan]
+  2112,  // Range #35: [2112, 2142, Mandaic]
+  2144,  // Range #36: [2144, 2154, Syriac]
+  2208,  // Range #37: [2208, 2247, Arabic]
+  2259,  // Range #38: [2259, 2273, Arabic]
+  2275,  // Range #39: [2275, 2303, Arabic]
+  2304,  // Range #40: [2304, 2384, Devanagari]
+  2389,  // Range #41: [2389, 2403, Devanagari]
+  2406,  // Range #42: [2406, 2431, Devanagari]
+  2432,  // Range #43: [2432, 2510, Bengali]
+  2519,  // Range #44: [2519, 2558, Bengali]
+  2561,  // Range #45: [2561, 2641, Gurmukhi]
+  2649,  // Range #46: [2649, 2654, Gurmukhi]
+  2662,  // Range #47: [2662, 2678, Gurmukhi]
+  2689,  // Range #48: [2689, 2768, Gujarati]
+  2784,  // Range #49: [2784, 2801, Gujarati]
+  2809,  // Range #50: [2809, 2815, Gujarati]
+  2817,  // Range #51: [2817, 2893, Oriya]
+  2901,  // Range #52: [2901, 2935, Oriya]
+  2946,  // Range #53: [2946, 3024, Tamil]
+  3031,  // Range #54: [3031, 3031, Tamil]
+  3046,  // Range #55: [3046, 3066, Tamil]
+  3072,  // Range #56: [3072, 3149, Telugu]
+  3157,  // Range #57: [3157, 3162, Telugu]
+  3168,  // Range #58: [3168, 3183, Telugu]
+  3191,  // Range #59: [3191, 3199, Telugu]
+  3200,  // Range #60: [3200, 3277, Kannada]
+  3285,  // Range #61: [3285, 3286, Kannada]
+  3294,  // Range #62: [3294, 3314, Kannada]
+  3328,  // Range #63: [3328, 3455, Malayalam]
+  3457,  // Range #64: [3457, 3551, Sinhala]
+  3558,  // Range #65: [3558, 3572, Sinhala]
+  3585,  // Range #66: [3585, 3642, Thai]
+  3648,  // Range #67: [3648, 3675, Thai]
+  3713,  // Range #68: [3713, 3807, Lao]
+  3840,  // Range #69: [3840, 4052, Tibetan]
+  4057,  // Range #70: [4057, 4058, Tibetan]
+  4096,  // Range #71: [4096, 4255, Myanmar]
+  4256,  // Range #72: [4256, 4295, Georgian]
+  4301,  // Range #73: [4301, 4346, Georgian]
+  4348,  // Range #74: [4348, 4351, Georgian]
+  4352,  // Range #75: [4352, 4607, Hangul]
+  4608,  // Range #76: [4608, 5017, Ethiopic]
+  5024,  // Range #77: [5024, 5117, Cherokee]
+  5120,  // Range #78: [5120, 5759, Canadian_Aboriginal]
+  5760,  // Range #79: [5760, 5788, Ogham]
+  5792,  // Range #80: [5792, 5866, Runic]
+  5870,  // Range #81: [5870, 5880, Runic]
+  5888,  // Range #82: [5888, 5908, Tagalog]
+  5920,  // Range #83: [5920, 5940, Hanunoo]
+  5952,  // Range #84: [5952, 5971, Buhid]
+  5984,  // Range #85: [5984, 6003, Tagbanwa]
+  6016,  // Range #86: [6016, 6121, Khmer]
+  6128,  // Range #87: [6128, 6137, Khmer]
+  6144,  // Range #88: [6144, 6145, Mongolian]
+  6148,  // Range #89: [6148, 6148, Mongolian]
+  6150,  // Range #90: [6150, 6169, Mongolian]
+  6176,  // Range #91: [6176, 6264, Mongolian]
+  6272,  // Range #92: [6272, 6314, Mongolian]
+  6320,  // Range #93: [6320, 6389, Canadian_Aboriginal]
+  6400,  // Range #94: [6400, 6479, Limbu]
+  6480,  // Range #95: [6480, 6516, Tai_Le]
+  6528,  // Range #96: [6528, 6601, New_Tai_Lue]
+  6608,  // Range #97: [6608, 6623, New_Tai_Lue]
+  6624,  // Range #98: [6624, 6655, Khmer]
+  6656,  // Range #99: [6656, 6687, Buginese]
+  6688,  // Range #100: [6688, 6793, Tai_Tham]
+  6800,  // Range #101: [6800, 6809, Tai_Tham]
+  6816,  // Range #102: [6816, 6829, Tai_Tham]
+  6912,  // Range #103: [6912, 7036, Balinese]
+  7040,  // Range #104: [7040, 7103, Sundanese]
+  7104,  // Range #105: [7104, 7155, Batak]
+  7164,  // Range #106: [7164, 7167, Batak]
+  7168,  // Range #107: [7168, 7247, Lepcha]
+  7248,  // Range #108: [7248, 7295, Ol_Chiki]
+  7296,  // Range #109: [7296, 7304, Cyrillic]
+  7312,  // Range #110: [7312, 7359, Georgian]
+  7360,  // Range #111: [7360, 7367, Sundanese]
+  7424,  // Range #112: [7424, 7461, Latin]
+  7462,  // Range #113: [7462, 7466, Greek]
+  7467,  // Range #114: [7467, 7467, Cyrillic]
+  7468,  // Range #115: [7468, 7516, Latin]
+  7517,  // Range #116: [7517, 7521, Greek]
+  7522,  // Range #117: [7522, 7525, Latin]
+  7526,  // Range #118: [7526, 7530, Greek]
+  7531,  // Range #119: [7531, 7543, Latin]
+  7544,  // Range #120: [7544, 7544, Cyrillic]
+  7545,  // Range #121: [7545, 7614, Latin]
+  7615,  // Range #122: [7615, 7615, Greek]
+  7680,  // Range #123: [7680, 7935, Latin]
+  7936,  // Range #124: [7936, 8190, Greek]
+  8305,  // Range #125: [8305, 8305, Latin]
+  8319,  // Range #126: [8319, 8319, Latin]
+  8336,  // Range #127: [8336, 8348, Latin]
+  8486,  // Range #128: [8486, 8486, Greek]
+  8490,  // Range #129: [8490, 8491, Latin]
+  8498,  // Range #130: [8498, 8498, Latin]
+  8526,  // Range #131: [8526, 8526, Latin]
+  8544,  // Range #132: [8544, 8584, Latin]
+  10240,  // Range #133: [10240, 10495, Braille]
+  11264,  // Range #134: [11264, 11358, Glagolitic]
+  11360,  // Range #135: [11360, 11391, Latin]
+  11392,  // Range #136: [11392, 11507, Coptic]
+  11513,  // Range #137: [11513, 11519, Coptic]
+  11520,  // Range #138: [11520, 11559, Georgian]
+  11565,  // Range #139: [11565, 11565, Georgian]
+  11568,  // Range #140: [11568, 11623, Tifinagh]
+  11631,  // Range #141: [11631, 11632, Tifinagh]
+  11647,  // Range #142: [11647, 11647, Tifinagh]
+  11648,  // Range #143: [11648, 11670, Ethiopic]
+  11680,  // Range #144: [11680, 11742, Ethiopic]
+  11744,  // Range #145: [11744, 11775, Cyrillic]
+  11904,  // Range #146: [11904, 12019, Han]
+  12032,  // Range #147: [12032, 12245, Han]
+  12293,  // Range #148: [12293, 12293, Han]
+  12295,  // Range #149: [12295, 12295, Han]
+  12321,  // Range #150: [12321, 12329, Han]
+  12334,  // Range #151: [12334, 12335, Hangul]
+  12344,  // Range #152: [12344, 12347, Han]
+  12353,  // Range #153: [12353, 12438, Hiragana]
+  12445,  // Range #154: [12445, 12447, Hiragana]
+  12449,  // Range #155: [12449, 12538, Katakana]
+  12541,  // Range #156: [12541, 12543, Katakana]
+  12549,  // Range #157: [12549, 12591, Bopomofo]
+  12593,  // Range #158: [12593, 12686, Hangul]
+  12704,  // Range #159: [12704, 12735, Bopomofo]
+  12784,  // Range #160: [12784, 12799, Katakana]
+  12800,  // Range #161: [12800, 12830, Hangul]
+  12896,  // Range #162: [12896, 12926, Hangul]
+  13008,  // Range #163: [13008, 13054, Katakana]
+  13056,  // Range #164: [13056, 13143, Katakana]
+  13312,  // Range #165: [13312, 19903, Han]
+  19968,  // Range #166: [19968, 40956, Han]
+  40960,  // Range #167: [40960, 42182, Yi]
+  42192,  // Range #168: [42192, 42239, Lisu]
+  42240,  // Range #169: [42240, 42539, Vai]
+  42560,  // Range #170: [42560, 42655, Cyrillic]
+  42656,  // Range #171: [42656, 42743, Bamum]
+  42786,  // Range #172: [42786, 42887, Latin]
+  42891,  // Range #173: [42891, 42954, Latin]
+  42997,  // Range #174: [42997, 43007, Latin]
+  43008,  // Range #175: [43008, 43052, Syloti_Nagri]
+  43072,  // Range #176: [43072, 43127, Phags_Pa]
+  43136,  // Range #177: [43136, 43205, Saurashtra]
+  43214,  // Range #178: [43214, 43225, Saurashtra]
+  43232,  // Range #179: [43232, 43263, Devanagari]
+  43264,  // Range #180: [43264, 43309, Kayah_Li]
+  43311,  // Range #181: [43311, 43311, Kayah_Li]
+  43312,  // Range #182: [43312, 43347, Rejang]
+  43359,  // Range #183: [43359, 43359, Rejang]
+  43360,  // Range #184: [43360, 43388, Hangul]
+  43392,  // Range #185: [43392, 43469, Javanese]
+  43472,  // Range #186: [43472, 43487, Javanese]
+  43488,  // Range #187: [43488, 43518, Myanmar]
+  43520,  // Range #188: [43520, 43574, Cham]
+  43584,  // Range #189: [43584, 43615, Cham]
+  43616,  // Range #190: [43616, 43647, Myanmar]
+  43648,  // Range #191: [43648, 43714, Tai_Viet]
+  43739,  // Range #192: [43739, 43743, Tai_Viet]
+  43744,  // Range #193: [43744, 43766, Meetei_Mayek]
+  43777,  // Range #194: [43777, 43798, Ethiopic]
+  43808,  // Range #195: [43808, 43822, Ethiopic]
+  43824,  // Range #196: [43824, 43866, Latin]
+  43868,  // Range #197: [43868, 43876, Latin]
+  43877,  // Range #198: [43877, 43877, Greek]
+  43878,  // Range #199: [43878, 43881, Latin]
+  43888,  // Range #200: [43888, 43967, Cherokee]
+  43968,  // Range #201: [43968, 44025, Meetei_Mayek]
+  44032,  // Range #202: [44032, 55203, Hangul]
+  55216,  // Range #203: [55216, 55291, Hangul]
+  63744,  // Range #204: [63744, 64217, Han]
+  64256,  // Range #205: [64256, 64262, Latin]
+  64275,  // Range #206: [64275, 64279, Armenian]
+  64285,  // Range #207: [64285, 64335, Hebrew]
+  64336,  // Range #208: [64336, 64449, Arabic]
+  64467,  // Range #209: [64467, 64829, Arabic]
+  64848,  // Range #210: [64848, 64967, Arabic]
+  65008,  // Range #211: [65008, 65021, Arabic]
+  65070,  // Range #212: [65070, 65071, Cyrillic]
+  65136,  // Range #213: [65136, 65276, Arabic]
+  65313,  // Range #214: [65313, 65338, Latin]
+  65345,  // Range #215: [65345, 65370, Latin]
+  65382,  // Range #216: [65382, 65391, Katakana]
+  65393,  // Range #217: [65393, 65437, Katakana]
+  65440,  // Range #218: [65440, 65500, Hangul]
+  65536,  // Range #219: [65536, 65629, Linear_B]
+  65664,  // Range #220: [65664, 65786, Linear_B]
+  65856,  // Range #221: [65856, 65934, Greek]
+  65952,  // Range #222: [65952, 65952, Greek]
+  66176,  // Range #223: [66176, 66204, Lycian]
+  66208,  // Range #224: [66208, 66256, Carian]
+  66304,  // Range #225: [66304, 66339, Old_Italic]
+  66349,  // Range #226: [66349, 66351, Old_Italic]
+  66352,  // Range #227: [66352, 66378, Gothic]
+  66384,  // Range #228: [66384, 66426, Old_Permic]
+  66432,  // Range #229: [66432, 66463, Ugaritic]
+  66464,  // Range #230: [66464, 66517, Old_Persian]
+  66560,  // Range #231: [66560, 66639, Deseret]
+  66640,  // Range #232: [66640, 66687, Shavian]
+  66688,  // Range #233: [66688, 66729, Osmanya]
+  66736,  // Range #234: [66736, 66811, Osage]
+  66816,  // Range #235: [66816, 66855, Elbasan]
+  66864,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  66927,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  67072,  // Range #238: [67072, 67382, Linear_A]
+  67392,  // Range #239: [67392, 67413, Linear_A]
+  67424,  // Range #240: [67424, 67431, Linear_A]
+  67584,  // Range #241: [67584, 67647, Cypriot]
+  67648,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  67680,  // Range #243: [67680, 67711, Palmyrene]
+  67712,  // Range #244: [67712, 67742, Nabataean]
+  67751,  // Range #245: [67751, 67759, Nabataean]
+  67808,  // Range #246: [67808, 67829, Hatran]
+  67835,  // Range #247: [67835, 67839, Hatran]
+  67840,  // Range #248: [67840, 67871, Phoenician]
+  67872,  // Range #249: [67872, 67897, Lydian]
+  67903,  // Range #250: [67903, 67903, Lydian]
+  67968,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  68000,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  68096,  // Range #253: [68096, 68102, Kharoshthi]
+  68108,  // Range #254: [68108, 68168, Kharoshthi]
+  68176,  // Range #255: [68176, 68184, Kharoshthi]
+  68192,  // Range #256: [68192, 68223, Old_South_Arabian]
+  68224,  // Range #257: [68224, 68255, Old_North_Arabian]
+  68288,  // Range #258: [68288, 68342, Manichaean]
+  68352,  // Range #259: [68352, 68415, Avestan]
+  68416,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  68448,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  68472,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  68480,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  68505,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  68521,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  68608,  // Range #266: [68608, 68680, Old_Turkic]
+  68736,  // Range #267: [68736, 68786, Old_Hungarian]
+  68800,  // Range #268: [68800, 68850, Old_Hungarian]
+  68858,  // Range #269: [68858, 68863, Old_Hungarian]
+  68864,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  68912,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  69216,  // Range #272: [69216, 69246, Arabic]
+  69248,  // Range #273: [69248, 69297, Yezidi]
   69376,  // Range #274: [69376, 69415, Old_Sogdian]
   69424,  // Range #275: [69424, 69465, Sogdian]
-  69600,  // Range #276: [69600, 69622, Elymaic]
-  69632,  // Range #277: [69632, 69743, Brahmi]
-  69759,  // Range #278: [69759, 69759, Brahmi]
-  69760,  // Range #279: [69760, 69825, Kaithi]
-  69837,  // Range #280: [69837, 69837, Kaithi]
-  69840,  // Range #281: [69840, 69864, Sora_Sompeng]
-  69872,  // Range #282: [69872, 69881, Sora_Sompeng]
-  69888,  // Range #283: [69888, 69958, Chakma]
-  69968,  // Range #284: [69968, 70006, Mahajani]
-  70016,  // Range #285: [70016, 70111, Sharada]
-  70113,  // Range #286: [70113, 70132, Sinhala]
-  70144,  // Range #287: [70144, 70206, Khojki]
-  70272,  // Range #288: [70272, 70313, Multani]
-  70320,  // Range #289: [70320, 70378, Khudawadi]
-  70384,  // Range #290: [70384, 70393, Khudawadi]
-  70400,  // Range #291: [70400, 70457, Grantha]
-  70460,  // Range #292: [70460, 70480, Grantha]
-  70487,  // Range #293: [70487, 70487, Grantha]
-  70493,  // Range #294: [70493, 70516, Grantha]
-  70656,  // Range #295: [70656, 70751, Newa]
-  70784,  // Range #296: [70784, 70855, Tirhuta]
-  70864,  // Range #297: [70864, 70873, Tirhuta]
-  71040,  // Range #298: [71040, 71133, Siddham]
-  71168,  // Range #299: [71168, 71236, Modi]
-  71248,  // Range #300: [71248, 71257, Modi]
-  71264,  // Range #301: [71264, 71276, Mongolian]
-  71296,  // Range #302: [71296, 71352, Takri]
-  71360,  // Range #303: [71360, 71369, Takri]
-  71424,  // Range #304: [71424, 71487, Ahom]
-  71680,  // Range #305: [71680, 71739, Dogra]
-  71840,  // Range #306: [71840, 71922, Warang_Citi]
-  71935,  // Range #307: [71935, 71935, Warang_Citi]
-  72096,  // Range #308: [72096, 72164, Nandinagari]
-  72192,  // Range #309: [72192, 72263, Zanabazar_Square]
-  72272,  // Range #310: [72272, 72354, Soyombo]
-  72384,  // Range #311: [72384, 72440, Pau_Cin_Hau]
-  72704,  // Range #312: [72704, 72773, Bhaiksuki]
-  72784,  // Range #313: [72784, 72812, Bhaiksuki]
-  72816,  // Range #314: [72816, 72886, Marchen]
-  72960,  // Range #315: [72960, 73031, Masaram_Gondi]
-  73040,  // Range #316: [73040, 73049, Masaram_Gondi]
-  73056,  // Range #317: [73056, 73112, Gunjala_Gondi]
-  73120,  // Range #318: [73120, 73129, Gunjala_Gondi]
-  73440,  // Range #319: [73440, 73464, Makasar]
-  73664,  // Range #320: [73664, 73713, Tamil]
-  73727,  // Range #321: [73727, 73727, Tamil]
-  73728,  // Range #322: [73728, 74649, Cuneiform]
-  74752,  // Range #323: [74752, 74868, Cuneiform]
-  74880,  // Range #324: [74880, 75075, Cuneiform]
-  77824,  // Range #325: [77824, 78904, Egyptian_Hieroglyphs]
-  82944,  // Range #326: [82944, 83526, Anatolian_Hieroglyphs]
-  92160,  // Range #327: [92160, 92728, Bamum]
-  92736,  // Range #328: [92736, 92783, Mro]
-  92880,  // Range #329: [92880, 92917, Bassa_Vah]
-  92928,  // Range #330: [92928, 92997, Pahawh_Hmong]
-  93008,  // Range #331: [93008, 93047, Pahawh_Hmong]
-  93053,  // Range #332: [93053, 93071, Pahawh_Hmong]
-  93760,  // Range #333: [93760, 93850, Medefaidrin]
-  93952,  // Range #334: [93952, 94087, Miao]
-  94095,  // Range #335: [94095, 94111, Miao]
-  94176,  // Range #336: [94176, 94176, Tangut]
-  94177,  // Range #337: [94177, 94177, Nushu]
-  94208,  // Range #338: [94208, 100343, Tangut]
-  100352,  // Range #339: [100352, 101106, Tangut]
-  110592,  // Range #340: [110592, 110592, Katakana]
-  110593,  // Range #341: [110593, 110878, Hiragana]
-  110928,  // Range #342: [110928, 110930, Hiragana]
-  110948,  // Range #343: [110948, 110951, Katakana]
-  110960,  // Range #344: [110960, 111355, Nushu]
-  113664,  // Range #345: [113664, 113770, Duployan]
-  113776,  // Range #346: [113776, 113800, Duployan]
-  113808,  // Range #347: [113808, 113823, Duployan]
-  119296,  // Range #348: [119296, 119365, Greek]
-  120832,  // Range #349: [120832, 121483, SignWriting]
-  121499,  // Range #350: [121499, 121519, SignWriting]
-  122880,  // Range #351: [122880, 122922, Glagolitic]
-  123136,  // Range #352: [123136, 123215, Nyiakeng_Puachue_Hmong]
-  123584,  // Range #353: [123584, 123641, Wancho]
-  123647,  // Range #354: [123647, 123647, Wancho]
-  124928,  // Range #355: [124928, 125142, Mende_Kikakui]
-  125184,  // Range #356: [125184, 125279, Adlam]
-  126464,  // Range #357: [126464, 126523, Arabic]
-  126530,  // Range #358: [126530, 126619, Arabic]
-  126625,  // Range #359: [126625, 126651, Arabic]
-  126704,  // Range #360: [126704, 126705, Arabic]
-  127488,  // Range #361: [127488, 127488, Hiragana]
-  131072,  // Range #362: [131072, 173782, Han]
-  173824,  // Range #363: [173824, 177972, Han]
-  177984,  // Range #364: [177984, 183969, Han]
-  183984,  // Range #365: [183984, 191456, Han]
-  194560,  // Range #366: [194560, 195101, Han]
+  69552,  // Range #276: [69552, 69579, Chorasmian]
+  69600,  // Range #277: [69600, 69622, Elymaic]
+  69632,  // Range #278: [69632, 69743, Brahmi]
+  69759,  // Range #279: [69759, 69759, Brahmi]
+  69760,  // Range #280: [69760, 69825, Kaithi]
+  69837,  // Range #281: [69837, 69837, Kaithi]
+  69840,  // Range #282: [69840, 69864, Sora_Sompeng]
+  69872,  // Range #283: [69872, 69881, Sora_Sompeng]
+  69888,  // Range #284: [69888, 69959, Chakma]
+  69968,  // Range #285: [69968, 70006, Mahajani]
+  70016,  // Range #286: [70016, 70111, Sharada]
+  70113,  // Range #287: [70113, 70132, Sinhala]
+  70144,  // Range #288: [70144, 70206, Khojki]
+  70272,  // Range #289: [70272, 70313, Multani]
+  70320,  // Range #290: [70320, 70378, Khudawadi]
+  70384,  // Range #291: [70384, 70393, Khudawadi]
+  70400,  // Range #292: [70400, 70457, Grantha]
+  70460,  // Range #293: [70460, 70480, Grantha]
+  70487,  // Range #294: [70487, 70487, Grantha]
+  70493,  // Range #295: [70493, 70516, Grantha]
+  70656,  // Range #296: [70656, 70753, Newa]
+  70784,  // Range #297: [70784, 70855, Tirhuta]
+  70864,  // Range #298: [70864, 70873, Tirhuta]
+  71040,  // Range #299: [71040, 71133, Siddham]
+  71168,  // Range #300: [71168, 71236, Modi]
+  71248,  // Range #301: [71248, 71257, Modi]
+  71264,  // Range #302: [71264, 71276, Mongolian]
+  71296,  // Range #303: [71296, 71352, Takri]
+  71360,  // Range #304: [71360, 71369, Takri]
+  71424,  // Range #305: [71424, 71487, Ahom]
+  71680,  // Range #306: [71680, 71739, Dogra]
+  71840,  // Range #307: [71840, 71922, Warang_Citi]
+  71935,  // Range #308: [71935, 71935, Warang_Citi]
+  71936,  // Range #309: [71936, 72006, Dives_Akuru]
+  72016,  // Range #310: [72016, 72025, Dives_Akuru]
+  72096,  // Range #311: [72096, 72164, Nandinagari]
+  72192,  // Range #312: [72192, 72263, Zanabazar_Square]
+  72272,  // Range #313: [72272, 72354, Soyombo]
+  72384,  // Range #314: [72384, 72440, Pau_Cin_Hau]
+  72704,  // Range #315: [72704, 72773, Bhaiksuki]
+  72784,  // Range #316: [72784, 72812, Bhaiksuki]
+  72816,  // Range #317: [72816, 72886, Marchen]
+  72960,  // Range #318: [72960, 73031, Masaram_Gondi]
+  73040,  // Range #319: [73040, 73049, Masaram_Gondi]
+  73056,  // Range #320: [73056, 73112, Gunjala_Gondi]
+  73120,  // Range #321: [73120, 73129, Gunjala_Gondi]
+  73440,  // Range #322: [73440, 73464, Makasar]
+  73648,  // Range #323: [73648, 73648, Lisu]
+  73664,  // Range #324: [73664, 73713, Tamil]
+  73727,  // Range #325: [73727, 73727, Tamil]
+  73728,  // Range #326: [73728, 74649, Cuneiform]
+  74752,  // Range #327: [74752, 74868, Cuneiform]
+  74880,  // Range #328: [74880, 75075, Cuneiform]
+  77824,  // Range #329: [77824, 78904, Egyptian_Hieroglyphs]
+  82944,  // Range #330: [82944, 83526, Anatolian_Hieroglyphs]
+  92160,  // Range #331: [92160, 92728, Bamum]
+  92736,  // Range #332: [92736, 92783, Mro]
+  92880,  // Range #333: [92880, 92917, Bassa_Vah]
+  92928,  // Range #334: [92928, 92997, Pahawh_Hmong]
+  93008,  // Range #335: [93008, 93047, Pahawh_Hmong]
+  93053,  // Range #336: [93053, 93071, Pahawh_Hmong]
+  93760,  // Range #337: [93760, 93850, Medefaidrin]
+  93952,  // Range #338: [93952, 94087, Miao]
+  94095,  // Range #339: [94095, 94111, Miao]
+  94176,  // Range #340: [94176, 94176, Tangut]
+  94177,  // Range #341: [94177, 94177, Nushu]
+  94180,  // Range #342: [94180, 94180, Khitan_Small_Script]
+  94192,  // Range #343: [94192, 94193, Han]
+  94208,  // Range #344: [94208, 100343, Tangut]
+  100352,  // Range #345: [100352, 101119, Tangut]
+  101120,  // Range #346: [101120, 101589, Khitan_Small_Script]
+  101632,  // Range #347: [101632, 101640, Tangut]
+  110592,  // Range #348: [110592, 110592, Katakana]
+  110593,  // Range #349: [110593, 110878, Hiragana]
+  110928,  // Range #350: [110928, 110930, Hiragana]
+  110948,  // Range #351: [110948, 110951, Katakana]
+  110960,  // Range #352: [110960, 111355, Nushu]
+  113664,  // Range #353: [113664, 113770, Duployan]
+  113776,  // Range #354: [113776, 113800, Duployan]
+  113808,  // Range #355: [113808, 113823, Duployan]
+  119296,  // Range #356: [119296, 119365, Greek]
+  120832,  // Range #357: [120832, 121483, SignWriting]
+  121499,  // Range #358: [121499, 121519, SignWriting]
+  122880,  // Range #359: [122880, 122922, Glagolitic]
+  123136,  // Range #360: [123136, 123215, Nyiakeng_Puachue_Hmong]
+  123584,  // Range #361: [123584, 123641, Wancho]
+  123647,  // Range #362: [123647, 123647, Wancho]
+  124928,  // Range #363: [124928, 125142, Mende_Kikakui]
+  125184,  // Range #364: [125184, 125279, Adlam]
+  126464,  // Range #365: [126464, 126523, Arabic]
+  126530,  // Range #366: [126530, 126619, Arabic]
+  126625,  // Range #367: [126625, 126651, Arabic]
+  126704,  // Range #368: [126704, 126705, Arabic]
+  127488,  // Range #369: [127488, 127488, Hiragana]
+  131072,  // Range #370: [131072, 173789, Han]
+  173824,  // Range #371: [173824, 177972, Han]
+  177984,  // Range #372: [177984, 183969, Han]
+  183984,  // Range #373: [183984, 191456, Han]
+  194560,  // Range #374: [194560, 195101, Han]
+  196608,  // Range #375: [196608, 201546, Han]
 };
 
 const uint16 kRangeSizeMinusOne[] = {
@@ -418,355 +427,364 @@
   15,  // Range #15: [1008, 1023, Greek]
   132,  // Range #16: [1024, 1156, Cyrillic]
   168,  // Range #17: [1159, 1327, Cyrillic]
-  87,  // Range #18: [1329, 1416, Armenian]
-  5,  // Range #19: [1418, 1423, Armenian]
-  54,  // Range #20: [1425, 1479, Hebrew]
-  36,  // Range #21: [1488, 1524, Hebrew]
-  4,  // Range #22: [1536, 1540, Arabic]
-  5,  // Range #23: [1542, 1547, Arabic]
-  13,  // Range #24: [1549, 1562, Arabic]
-  2,  // Range #25: [1564, 1566, Arabic]
-  31,  // Range #26: [1568, 1599, Arabic]
-  9,  // Range #27: [1601, 1610, Arabic]
-  25,  // Range #28: [1622, 1647, Arabic]
-  107,  // Range #29: [1649, 1756, Arabic]
-  33,  // Range #30: [1758, 1791, Arabic]
-  79,  // Range #31: [1792, 1871, Syriac]
-  47,  // Range #32: [1872, 1919, Arabic]
-  49,  // Range #33: [1920, 1969, Thaana]
-  63,  // Range #34: [1984, 2047, Nko]
-  62,  // Range #35: [2048, 2110, Samaritan]
-  30,  // Range #36: [2112, 2142, Mandaic]
-  10,  // Range #37: [2144, 2154, Syriac]
-  29,  // Range #38: [2208, 2237, Arabic]
-  14,  // Range #39: [2259, 2273, Arabic]
-  28,  // Range #40: [2275, 2303, Arabic]
-  80,  // Range #41: [2304, 2384, Devanagari]
-  14,  // Range #42: [2389, 2403, Devanagari]
-  25,  // Range #43: [2406, 2431, Devanagari]
-  78,  // Range #44: [2432, 2510, Bengali]
-  39,  // Range #45: [2519, 2558, Bengali]
-  80,  // Range #46: [2561, 2641, Gurmukhi]
-  5,  // Range #47: [2649, 2654, Gurmukhi]
-  16,  // Range #48: [2662, 2678, Gurmukhi]
-  79,  // Range #49: [2689, 2768, Gujarati]
-  17,  // Range #50: [2784, 2801, Gujarati]
-  6,  // Range #51: [2809, 2815, Gujarati]
-  76,  // Range #52: [2817, 2893, Oriya]
-  33,  // Range #53: [2902, 2935, Oriya]
-  78,  // Range #54: [2946, 3024, Tamil]
-  0,  // Range #55: [3031, 3031, Tamil]
-  20,  // Range #56: [3046, 3066, Tamil]
-  77,  // Range #57: [3072, 3149, Telugu]
-  5,  // Range #58: [3157, 3162, Telugu]
-  15,  // Range #59: [3168, 3183, Telugu]
-  8,  // Range #60: [3191, 3199, Telugu]
-  77,  // Range #61: [3200, 3277, Kannada]
-  1,  // Range #62: [3285, 3286, Kannada]
-  20,  // Range #63: [3294, 3314, Kannada]
-  127,  // Range #64: [3328, 3455, Malayalam]
-  93,  // Range #65: [3458, 3551, Sinhala]
-  14,  // Range #66: [3558, 3572, Sinhala]
-  57,  // Range #67: [3585, 3642, Thai]
-  27,  // Range #68: [3648, 3675, Thai]
-  94,  // Range #69: [3713, 3807, Lao]
-  212,  // Range #70: [3840, 4052, Tibetan]
-  1,  // Range #71: [4057, 4058, Tibetan]
-  159,  // Range #72: [4096, 4255, Myanmar]
-  39,  // Range #73: [4256, 4295, Georgian]
-  45,  // Range #74: [4301, 4346, Georgian]
-  3,  // Range #75: [4348, 4351, Georgian]
-  255,  // Range #76: [4352, 4607, Hangul]
-  409,  // Range #77: [4608, 5017, Ethiopic]
-  93,  // Range #78: [5024, 5117, Cherokee]
-  639,  // Range #79: [5120, 5759, Canadian_Aboriginal]
-  28,  // Range #80: [5760, 5788, Ogham]
-  74,  // Range #81: [5792, 5866, Runic]
-  10,  // Range #82: [5870, 5880, Runic]
-  20,  // Range #83: [5888, 5908, Tagalog]
-  20,  // Range #84: [5920, 5940, Hanunoo]
-  19,  // Range #85: [5952, 5971, Buhid]
-  19,  // Range #86: [5984, 6003, Tagbanwa]
-  105,  // Range #87: [6016, 6121, Khmer]
-  9,  // Range #88: [6128, 6137, Khmer]
-  1,  // Range #89: [6144, 6145, Mongolian]
-  0,  // Range #90: [6148, 6148, Mongolian]
-  19,  // Range #91: [6150, 6169, Mongolian]
-  88,  // Range #92: [6176, 6264, Mongolian]
-  42,  // Range #93: [6272, 6314, Mongolian]
-  69,  // Range #94: [6320, 6389, Canadian_Aboriginal]
-  79,  // Range #95: [6400, 6479, Limbu]
-  36,  // Range #96: [6480, 6516, Tai_Le]
-  73,  // Range #97: [6528, 6601, New_Tai_Lue]
-  15,  // Range #98: [6608, 6623, New_Tai_Lue]
-  31,  // Range #99: [6624, 6655, Khmer]
-  31,  // Range #100: [6656, 6687, Buginese]
-  105,  // Range #101: [6688, 6793, Tai_Tham]
-  9,  // Range #102: [6800, 6809, Tai_Tham]
-  13,  // Range #103: [6816, 6829, Tai_Tham]
-  124,  // Range #104: [6912, 7036, Balinese]
-  63,  // Range #105: [7040, 7103, Sundanese]
-  51,  // Range #106: [7104, 7155, Batak]
-  3,  // Range #107: [7164, 7167, Batak]
-  79,  // Range #108: [7168, 7247, Lepcha]
-  47,  // Range #109: [7248, 7295, Ol_Chiki]
-  8,  // Range #110: [7296, 7304, Cyrillic]
-  47,  // Range #111: [7312, 7359, Georgian]
-  7,  // Range #112: [7360, 7367, Sundanese]
-  37,  // Range #113: [7424, 7461, Latin]
-  4,  // Range #114: [7462, 7466, Greek]
-  0,  // Range #115: [7467, 7467, Cyrillic]
-  48,  // Range #116: [7468, 7516, Latin]
-  4,  // Range #117: [7517, 7521, Greek]
-  3,  // Range #118: [7522, 7525, Latin]
-  4,  // Range #119: [7526, 7530, Greek]
-  12,  // Range #120: [7531, 7543, Latin]
-  0,  // Range #121: [7544, 7544, Cyrillic]
-  69,  // Range #122: [7545, 7614, Latin]
-  0,  // Range #123: [7615, 7615, Greek]
-  255,  // Range #124: [7680, 7935, Latin]
-  254,  // Range #125: [7936, 8190, Greek]
-  0,  // Range #126: [8305, 8305, Latin]
-  0,  // Range #127: [8319, 8319, Latin]
-  12,  // Range #128: [8336, 8348, Latin]
-  0,  // Range #129: [8486, 8486, Greek]
-  1,  // Range #130: [8490, 8491, Latin]
-  0,  // Range #131: [8498, 8498, Latin]
-  0,  // Range #132: [8526, 8526, Latin]
-  40,  // Range #133: [8544, 8584, Latin]
-  255,  // Range #134: [10240, 10495, Braille]
-  94,  // Range #135: [11264, 11358, Glagolitic]
-  31,  // Range #136: [11360, 11391, Latin]
-  115,  // Range #137: [11392, 11507, Coptic]
-  6,  // Range #138: [11513, 11519, Coptic]
-  39,  // Range #139: [11520, 11559, Georgian]
-  0,  // Range #140: [11565, 11565, Georgian]
-  55,  // Range #141: [11568, 11623, Tifinagh]
-  1,  // Range #142: [11631, 11632, Tifinagh]
-  0,  // Range #143: [11647, 11647, Tifinagh]
-  22,  // Range #144: [11648, 11670, Ethiopic]
-  62,  // Range #145: [11680, 11742, Ethiopic]
-  31,  // Range #146: [11744, 11775, Cyrillic]
-  115,  // Range #147: [11904, 12019, Han]
-  213,  // Range #148: [12032, 12245, Han]
-  0,  // Range #149: [12293, 12293, Han]
-  0,  // Range #150: [12295, 12295, Han]
-  8,  // Range #151: [12321, 12329, Han]
-  1,  // Range #152: [12334, 12335, Hangul]
-  3,  // Range #153: [12344, 12347, Han]
-  85,  // Range #154: [12353, 12438, Hiragana]
-  2,  // Range #155: [12445, 12447, Hiragana]
-  89,  // Range #156: [12449, 12538, Katakana]
-  2,  // Range #157: [12541, 12543, Katakana]
-  42,  // Range #158: [12549, 12591, Bopomofo]
-  93,  // Range #159: [12593, 12686, Hangul]
-  26,  // Range #160: [12704, 12730, Bopomofo]
-  15,  // Range #161: [12784, 12799, Katakana]
-  30,  // Range #162: [12800, 12830, Hangul]
-  30,  // Range #163: [12896, 12926, Hangul]
-  46,  // Range #164: [13008, 13054, Katakana]
-  87,  // Range #165: [13056, 13143, Katakana]
-  6581,  // Range #166: [13312, 19893, Han]
-  20975,  // Range #167: [19968, 40943, Han]
-  1222,  // Range #168: [40960, 42182, Yi]
-  47,  // Range #169: [42192, 42239, Lisu]
-  299,  // Range #170: [42240, 42539, Vai]
-  95,  // Range #171: [42560, 42655, Cyrillic]
-  87,  // Range #172: [42656, 42743, Bamum]
-  101,  // Range #173: [42786, 42887, Latin]
-  59,  // Range #174: [42891, 42950, Latin]
-  8,  // Range #175: [42999, 43007, Latin]
-  43,  // Range #176: [43008, 43051, Syloti_Nagri]
-  55,  // Range #177: [43072, 43127, Phags_Pa]
-  69,  // Range #178: [43136, 43205, Saurashtra]
-  11,  // Range #179: [43214, 43225, Saurashtra]
-  31,  // Range #180: [43232, 43263, Devanagari]
-  45,  // Range #181: [43264, 43309, Kayah_Li]
-  0,  // Range #182: [43311, 43311, Kayah_Li]
-  35,  // Range #183: [43312, 43347, Rejang]
-  0,  // Range #184: [43359, 43359, Rejang]
-  28,  // Range #185: [43360, 43388, Hangul]
-  77,  // Range #186: [43392, 43469, Javanese]
-  15,  // Range #187: [43472, 43487, Javanese]
-  30,  // Range #188: [43488, 43518, Myanmar]
-  54,  // Range #189: [43520, 43574, Cham]
-  31,  // Range #190: [43584, 43615, Cham]
-  31,  // Range #191: [43616, 43647, Myanmar]
-  66,  // Range #192: [43648, 43714, Tai_Viet]
-  4,  // Range #193: [43739, 43743, Tai_Viet]
-  22,  // Range #194: [43744, 43766, Meetei_Mayek]
-  21,  // Range #195: [43777, 43798, Ethiopic]
-  14,  // Range #196: [43808, 43822, Ethiopic]
-  42,  // Range #197: [43824, 43866, Latin]
-  8,  // Range #198: [43868, 43876, Latin]
-  0,  // Range #199: [43877, 43877, Greek]
-  1,  // Range #200: [43878, 43879, Latin]
-  79,  // Range #201: [43888, 43967, Cherokee]
-  57,  // Range #202: [43968, 44025, Meetei_Mayek]
-  11171,  // Range #203: [44032, 55203, Hangul]
-  75,  // Range #204: [55216, 55291, Hangul]
-  473,  // Range #205: [63744, 64217, Han]
-  6,  // Range #206: [64256, 64262, Latin]
-  4,  // Range #207: [64275, 64279, Armenian]
-  50,  // Range #208: [64285, 64335, Hebrew]
-  113,  // Range #209: [64336, 64449, Arabic]
-  362,  // Range #210: [64467, 64829, Arabic]
-  119,  // Range #211: [64848, 64967, Arabic]
-  13,  // Range #212: [65008, 65021, Arabic]
-  1,  // Range #213: [65070, 65071, Cyrillic]
-  140,  // Range #214: [65136, 65276, Arabic]
-  25,  // Range #215: [65313, 65338, Latin]
-  25,  // Range #216: [65345, 65370, Latin]
-  9,  // Range #217: [65382, 65391, Katakana]
-  44,  // Range #218: [65393, 65437, Katakana]
-  60,  // Range #219: [65440, 65500, Hangul]
-  93,  // Range #220: [65536, 65629, Linear_B]
-  122,  // Range #221: [65664, 65786, Linear_B]
-  78,  // Range #222: [65856, 65934, Greek]
-  0,  // Range #223: [65952, 65952, Greek]
-  28,  // Range #224: [66176, 66204, Lycian]
-  48,  // Range #225: [66208, 66256, Carian]
-  35,  // Range #226: [66304, 66339, Old_Italic]
-  2,  // Range #227: [66349, 66351, Old_Italic]
-  26,  // Range #228: [66352, 66378, Gothic]
-  42,  // Range #229: [66384, 66426, Old_Permic]
-  31,  // Range #230: [66432, 66463, Ugaritic]
-  53,  // Range #231: [66464, 66517, Old_Persian]
-  79,  // Range #232: [66560, 66639, Deseret]
-  47,  // Range #233: [66640, 66687, Shavian]
-  41,  // Range #234: [66688, 66729, Osmanya]
-  75,  // Range #235: [66736, 66811, Osage]
-  39,  // Range #236: [66816, 66855, Elbasan]
-  51,  // Range #237: [66864, 66915, Caucasian_Albanian]
-  0,  // Range #238: [66927, 66927, Caucasian_Albanian]
-  310,  // Range #239: [67072, 67382, Linear_A]
-  21,  // Range #240: [67392, 67413, Linear_A]
-  7,  // Range #241: [67424, 67431, Linear_A]
-  63,  // Range #242: [67584, 67647, Cypriot]
-  31,  // Range #243: [67648, 67679, Imperial_Aramaic]
-  31,  // Range #244: [67680, 67711, Palmyrene]
-  30,  // Range #245: [67712, 67742, Nabataean]
-  8,  // Range #246: [67751, 67759, Nabataean]
-  21,  // Range #247: [67808, 67829, Hatran]
-  4,  // Range #248: [67835, 67839, Hatran]
-  31,  // Range #249: [67840, 67871, Phoenician]
-  25,  // Range #250: [67872, 67897, Lydian]
-  0,  // Range #251: [67903, 67903, Lydian]
-  31,  // Range #252: [67968, 67999, Meroitic_Hieroglyphs]
-  95,  // Range #253: [68000, 68095, Meroitic_Cursive]
-  6,  // Range #254: [68096, 68102, Kharoshthi]
-  60,  // Range #255: [68108, 68168, Kharoshthi]
-  8,  // Range #256: [68176, 68184, Kharoshthi]
-  31,  // Range #257: [68192, 68223, Old_South_Arabian]
-  31,  // Range #258: [68224, 68255, Old_North_Arabian]
-  54,  // Range #259: [68288, 68342, Manichaean]
-  63,  // Range #260: [68352, 68415, Avestan]
-  31,  // Range #261: [68416, 68447, Inscriptional_Parthian]
-  18,  // Range #262: [68448, 68466, Inscriptional_Pahlavi]
-  7,  // Range #263: [68472, 68479, Inscriptional_Pahlavi]
-  17,  // Range #264: [68480, 68497, Psalter_Pahlavi]
-  3,  // Range #265: [68505, 68508, Psalter_Pahlavi]
-  6,  // Range #266: [68521, 68527, Psalter_Pahlavi]
-  72,  // Range #267: [68608, 68680, Old_Turkic]
-  50,  // Range #268: [68736, 68786, Old_Hungarian]
-  50,  // Range #269: [68800, 68850, Old_Hungarian]
-  5,  // Range #270: [68858, 68863, Old_Hungarian]
-  39,  // Range #271: [68864, 68903, Hanifi_Rohingya]
-  9,  // Range #272: [68912, 68921, Hanifi_Rohingya]
-  30,  // Range #273: [69216, 69246, Arabic]
+  94,  // Range #18: [1329, 1423, Armenian]
+  54,  // Range #19: [1425, 1479, Hebrew]
+  36,  // Range #20: [1488, 1524, Hebrew]
+  4,  // Range #21: [1536, 1540, Arabic]
+  5,  // Range #22: [1542, 1547, Arabic]
+  13,  // Range #23: [1549, 1562, Arabic]
+  2,  // Range #24: [1564, 1566, Arabic]
+  31,  // Range #25: [1568, 1599, Arabic]
+  9,  // Range #26: [1601, 1610, Arabic]
+  25,  // Range #27: [1622, 1647, Arabic]
+  107,  // Range #28: [1649, 1756, Arabic]
+  33,  // Range #29: [1758, 1791, Arabic]
+  79,  // Range #30: [1792, 1871, Syriac]
+  47,  // Range #31: [1872, 1919, Arabic]
+  49,  // Range #32: [1920, 1969, Thaana]
+  63,  // Range #33: [1984, 2047, Nko]
+  62,  // Range #34: [2048, 2110, Samaritan]
+  30,  // Range #35: [2112, 2142, Mandaic]
+  10,  // Range #36: [2144, 2154, Syriac]
+  39,  // Range #37: [2208, 2247, Arabic]
+  14,  // Range #38: [2259, 2273, Arabic]
+  28,  // Range #39: [2275, 2303, Arabic]
+  80,  // Range #40: [2304, 2384, Devanagari]
+  14,  // Range #41: [2389, 2403, Devanagari]
+  25,  // Range #42: [2406, 2431, Devanagari]
+  78,  // Range #43: [2432, 2510, Bengali]
+  39,  // Range #44: [2519, 2558, Bengali]
+  80,  // Range #45: [2561, 2641, Gurmukhi]
+  5,  // Range #46: [2649, 2654, Gurmukhi]
+  16,  // Range #47: [2662, 2678, Gurmukhi]
+  79,  // Range #48: [2689, 2768, Gujarati]
+  17,  // Range #49: [2784, 2801, Gujarati]
+  6,  // Range #50: [2809, 2815, Gujarati]
+  76,  // Range #51: [2817, 2893, Oriya]
+  34,  // Range #52: [2901, 2935, Oriya]
+  78,  // Range #53: [2946, 3024, Tamil]
+  0,  // Range #54: [3031, 3031, Tamil]
+  20,  // Range #55: [3046, 3066, Tamil]
+  77,  // Range #56: [3072, 3149, Telugu]
+  5,  // Range #57: [3157, 3162, Telugu]
+  15,  // Range #58: [3168, 3183, Telugu]
+  8,  // Range #59: [3191, 3199, Telugu]
+  77,  // Range #60: [3200, 3277, Kannada]
+  1,  // Range #61: [3285, 3286, Kannada]
+  20,  // Range #62: [3294, 3314, Kannada]
+  127,  // Range #63: [3328, 3455, Malayalam]
+  94,  // Range #64: [3457, 3551, Sinhala]
+  14,  // Range #65: [3558, 3572, Sinhala]
+  57,  // Range #66: [3585, 3642, Thai]
+  27,  // Range #67: [3648, 3675, Thai]
+  94,  // Range #68: [3713, 3807, Lao]
+  212,  // Range #69: [3840, 4052, Tibetan]
+  1,  // Range #70: [4057, 4058, Tibetan]
+  159,  // Range #71: [4096, 4255, Myanmar]
+  39,  // Range #72: [4256, 4295, Georgian]
+  45,  // Range #73: [4301, 4346, Georgian]
+  3,  // Range #74: [4348, 4351, Georgian]
+  255,  // Range #75: [4352, 4607, Hangul]
+  409,  // Range #76: [4608, 5017, Ethiopic]
+  93,  // Range #77: [5024, 5117, Cherokee]
+  639,  // Range #78: [5120, 5759, Canadian_Aboriginal]
+  28,  // Range #79: [5760, 5788, Ogham]
+  74,  // Range #80: [5792, 5866, Runic]
+  10,  // Range #81: [5870, 5880, Runic]
+  20,  // Range #82: [5888, 5908, Tagalog]
+  20,  // Range #83: [5920, 5940, Hanunoo]
+  19,  // Range #84: [5952, 5971, Buhid]
+  19,  // Range #85: [5984, 6003, Tagbanwa]
+  105,  // Range #86: [6016, 6121, Khmer]
+  9,  // Range #87: [6128, 6137, Khmer]
+  1,  // Range #88: [6144, 6145, Mongolian]
+  0,  // Range #89: [6148, 6148, Mongolian]
+  19,  // Range #90: [6150, 6169, Mongolian]
+  88,  // Range #91: [6176, 6264, Mongolian]
+  42,  // Range #92: [6272, 6314, Mongolian]
+  69,  // Range #93: [6320, 6389, Canadian_Aboriginal]
+  79,  // Range #94: [6400, 6479, Limbu]
+  36,  // Range #95: [6480, 6516, Tai_Le]
+  73,  // Range #96: [6528, 6601, New_Tai_Lue]
+  15,  // Range #97: [6608, 6623, New_Tai_Lue]
+  31,  // Range #98: [6624, 6655, Khmer]
+  31,  // Range #99: [6656, 6687, Buginese]
+  105,  // Range #100: [6688, 6793, Tai_Tham]
+  9,  // Range #101: [6800, 6809, Tai_Tham]
+  13,  // Range #102: [6816, 6829, Tai_Tham]
+  124,  // Range #103: [6912, 7036, Balinese]
+  63,  // Range #104: [7040, 7103, Sundanese]
+  51,  // Range #105: [7104, 7155, Batak]
+  3,  // Range #106: [7164, 7167, Batak]
+  79,  // Range #107: [7168, 7247, Lepcha]
+  47,  // Range #108: [7248, 7295, Ol_Chiki]
+  8,  // Range #109: [7296, 7304, Cyrillic]
+  47,  // Range #110: [7312, 7359, Georgian]
+  7,  // Range #111: [7360, 7367, Sundanese]
+  37,  // Range #112: [7424, 7461, Latin]
+  4,  // Range #113: [7462, 7466, Greek]
+  0,  // Range #114: [7467, 7467, Cyrillic]
+  48,  // Range #115: [7468, 7516, Latin]
+  4,  // Range #116: [7517, 7521, Greek]
+  3,  // Range #117: [7522, 7525, Latin]
+  4,  // Range #118: [7526, 7530, Greek]
+  12,  // Range #119: [7531, 7543, Latin]
+  0,  // Range #120: [7544, 7544, Cyrillic]
+  69,  // Range #121: [7545, 7614, Latin]
+  0,  // Range #122: [7615, 7615, Greek]
+  255,  // Range #123: [7680, 7935, Latin]
+  254,  // Range #124: [7936, 8190, Greek]
+  0,  // Range #125: [8305, 8305, Latin]
+  0,  // Range #126: [8319, 8319, Latin]
+  12,  // Range #127: [8336, 8348, Latin]
+  0,  // Range #128: [8486, 8486, Greek]
+  1,  // Range #129: [8490, 8491, Latin]
+  0,  // Range #130: [8498, 8498, Latin]
+  0,  // Range #131: [8526, 8526, Latin]
+  40,  // Range #132: [8544, 8584, Latin]
+  255,  // Range #133: [10240, 10495, Braille]
+  94,  // Range #134: [11264, 11358, Glagolitic]
+  31,  // Range #135: [11360, 11391, Latin]
+  115,  // Range #136: [11392, 11507, Coptic]
+  6,  // Range #137: [11513, 11519, Coptic]
+  39,  // Range #138: [11520, 11559, Georgian]
+  0,  // Range #139: [11565, 11565, Georgian]
+  55,  // Range #140: [11568, 11623, Tifinagh]
+  1,  // Range #141: [11631, 11632, Tifinagh]
+  0,  // Range #142: [11647, 11647, Tifinagh]
+  22,  // Range #143: [11648, 11670, Ethiopic]
+  62,  // Range #144: [11680, 11742, Ethiopic]
+  31,  // Range #145: [11744, 11775, Cyrillic]
+  115,  // Range #146: [11904, 12019, Han]
+  213,  // Range #147: [12032, 12245, Han]
+  0,  // Range #148: [12293, 12293, Han]
+  0,  // Range #149: [12295, 12295, Han]
+  8,  // Range #150: [12321, 12329, Han]
+  1,  // Range #151: [12334, 12335, Hangul]
+  3,  // Range #152: [12344, 12347, Han]
+  85,  // Range #153: [12353, 12438, Hiragana]
+  2,  // Range #154: [12445, 12447, Hiragana]
+  89,  // Range #155: [12449, 12538, Katakana]
+  2,  // Range #156: [12541, 12543, Katakana]
+  42,  // Range #157: [12549, 12591, Bopomofo]
+  93,  // Range #158: [12593, 12686, Hangul]
+  31,  // Range #159: [12704, 12735, Bopomofo]
+  15,  // Range #160: [12784, 12799, Katakana]
+  30,  // Range #161: [12800, 12830, Hangul]
+  30,  // Range #162: [12896, 12926, Hangul]
+  46,  // Range #163: [13008, 13054, Katakana]
+  87,  // Range #164: [13056, 13143, Katakana]
+  6591,  // Range #165: [13312, 19903, Han]
+  20988,  // Range #166: [19968, 40956, Han]
+  1222,  // Range #167: [40960, 42182, Yi]
+  47,  // Range #168: [42192, 42239, Lisu]
+  299,  // Range #169: [42240, 42539, Vai]
+  95,  // Range #170: [42560, 42655, Cyrillic]
+  87,  // Range #171: [42656, 42743, Bamum]
+  101,  // Range #172: [42786, 42887, Latin]
+  63,  // Range #173: [42891, 42954, Latin]
+  10,  // Range #174: [42997, 43007, Latin]
+  44,  // Range #175: [43008, 43052, Syloti_Nagri]
+  55,  // Range #176: [43072, 43127, Phags_Pa]
+  69,  // Range #177: [43136, 43205, Saurashtra]
+  11,  // Range #178: [43214, 43225, Saurashtra]
+  31,  // Range #179: [43232, 43263, Devanagari]
+  45,  // Range #180: [43264, 43309, Kayah_Li]
+  0,  // Range #181: [43311, 43311, Kayah_Li]
+  35,  // Range #182: [43312, 43347, Rejang]
+  0,  // Range #183: [43359, 43359, Rejang]
+  28,  // Range #184: [43360, 43388, Hangul]
+  77,  // Range #185: [43392, 43469, Javanese]
+  15,  // Range #186: [43472, 43487, Javanese]
+  30,  // Range #187: [43488, 43518, Myanmar]
+  54,  // Range #188: [43520, 43574, Cham]
+  31,  // Range #189: [43584, 43615, Cham]
+  31,  // Range #190: [43616, 43647, Myanmar]
+  66,  // Range #191: [43648, 43714, Tai_Viet]
+  4,  // Range #192: [43739, 43743, Tai_Viet]
+  22,  // Range #193: [43744, 43766, Meetei_Mayek]
+  21,  // Range #194: [43777, 43798, Ethiopic]
+  14,  // Range #195: [43808, 43822, Ethiopic]
+  42,  // Range #196: [43824, 43866, Latin]
+  8,  // Range #197: [43868, 43876, Latin]
+  0,  // Range #198: [43877, 43877, Greek]
+  3,  // Range #199: [43878, 43881, Latin]
+  79,  // Range #200: [43888, 43967, Cherokee]
+  57,  // Range #201: [43968, 44025, Meetei_Mayek]
+  11171,  // Range #202: [44032, 55203, Hangul]
+  75,  // Range #203: [55216, 55291, Hangul]
+  473,  // Range #204: [63744, 64217, Han]
+  6,  // Range #205: [64256, 64262, Latin]
+  4,  // Range #206: [64275, 64279, Armenian]
+  50,  // Range #207: [64285, 64335, Hebrew]
+  113,  // Range #208: [64336, 64449, Arabic]
+  362,  // Range #209: [64467, 64829, Arabic]
+  119,  // Range #210: [64848, 64967, Arabic]
+  13,  // Range #211: [65008, 65021, Arabic]
+  1,  // Range #212: [65070, 65071, Cyrillic]
+  140,  // Range #213: [65136, 65276, Arabic]
+  25,  // Range #214: [65313, 65338, Latin]
+  25,  // Range #215: [65345, 65370, Latin]
+  9,  // Range #216: [65382, 65391, Katakana]
+  44,  // Range #217: [65393, 65437, Katakana]
+  60,  // Range #218: [65440, 65500, Hangul]
+  93,  // Range #219: [65536, 65629, Linear_B]
+  122,  // Range #220: [65664, 65786, Linear_B]
+  78,  // Range #221: [65856, 65934, Greek]
+  0,  // Range #222: [65952, 65952, Greek]
+  28,  // Range #223: [66176, 66204, Lycian]
+  48,  // Range #224: [66208, 66256, Carian]
+  35,  // Range #225: [66304, 66339, Old_Italic]
+  2,  // Range #226: [66349, 66351, Old_Italic]
+  26,  // Range #227: [66352, 66378, Gothic]
+  42,  // Range #228: [66384, 66426, Old_Permic]
+  31,  // Range #229: [66432, 66463, Ugaritic]
+  53,  // Range #230: [66464, 66517, Old_Persian]
+  79,  // Range #231: [66560, 66639, Deseret]
+  47,  // Range #232: [66640, 66687, Shavian]
+  41,  // Range #233: [66688, 66729, Osmanya]
+  75,  // Range #234: [66736, 66811, Osage]
+  39,  // Range #235: [66816, 66855, Elbasan]
+  51,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  0,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  310,  // Range #238: [67072, 67382, Linear_A]
+  21,  // Range #239: [67392, 67413, Linear_A]
+  7,  // Range #240: [67424, 67431, Linear_A]
+  63,  // Range #241: [67584, 67647, Cypriot]
+  31,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  31,  // Range #243: [67680, 67711, Palmyrene]
+  30,  // Range #244: [67712, 67742, Nabataean]
+  8,  // Range #245: [67751, 67759, Nabataean]
+  21,  // Range #246: [67808, 67829, Hatran]
+  4,  // Range #247: [67835, 67839, Hatran]
+  31,  // Range #248: [67840, 67871, Phoenician]
+  25,  // Range #249: [67872, 67897, Lydian]
+  0,  // Range #250: [67903, 67903, Lydian]
+  31,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  95,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  6,  // Range #253: [68096, 68102, Kharoshthi]
+  60,  // Range #254: [68108, 68168, Kharoshthi]
+  8,  // Range #255: [68176, 68184, Kharoshthi]
+  31,  // Range #256: [68192, 68223, Old_South_Arabian]
+  31,  // Range #257: [68224, 68255, Old_North_Arabian]
+  54,  // Range #258: [68288, 68342, Manichaean]
+  63,  // Range #259: [68352, 68415, Avestan]
+  31,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  18,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  7,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  17,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  3,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  6,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  72,  // Range #266: [68608, 68680, Old_Turkic]
+  50,  // Range #267: [68736, 68786, Old_Hungarian]
+  50,  // Range #268: [68800, 68850, Old_Hungarian]
+  5,  // Range #269: [68858, 68863, Old_Hungarian]
+  39,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  9,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  30,  // Range #272: [69216, 69246, Arabic]
+  49,  // Range #273: [69248, 69297, Yezidi]
   39,  // Range #274: [69376, 69415, Old_Sogdian]
   41,  // Range #275: [69424, 69465, Sogdian]
-  22,  // Range #276: [69600, 69622, Elymaic]
-  111,  // Range #277: [69632, 69743, Brahmi]
-  0,  // Range #278: [69759, 69759, Brahmi]
-  65,  // Range #279: [69760, 69825, Kaithi]
-  0,  // Range #280: [69837, 69837, Kaithi]
-  24,  // Range #281: [69840, 69864, Sora_Sompeng]
-  9,  // Range #282: [69872, 69881, Sora_Sompeng]
-  70,  // Range #283: [69888, 69958, Chakma]
-  38,  // Range #284: [69968, 70006, Mahajani]
-  95,  // Range #285: [70016, 70111, Sharada]
-  19,  // Range #286: [70113, 70132, Sinhala]
-  62,  // Range #287: [70144, 70206, Khojki]
-  41,  // Range #288: [70272, 70313, Multani]
-  58,  // Range #289: [70320, 70378, Khudawadi]
-  9,  // Range #290: [70384, 70393, Khudawadi]
-  57,  // Range #291: [70400, 70457, Grantha]
-  20,  // Range #292: [70460, 70480, Grantha]
-  0,  // Range #293: [70487, 70487, Grantha]
-  23,  // Range #294: [70493, 70516, Grantha]
-  95,  // Range #295: [70656, 70751, Newa]
-  71,  // Range #296: [70784, 70855, Tirhuta]
-  9,  // Range #297: [70864, 70873, Tirhuta]
-  93,  // Range #298: [71040, 71133, Siddham]
-  68,  // Range #299: [71168, 71236, Modi]
-  9,  // Range #300: [71248, 71257, Modi]
-  12,  // Range #301: [71264, 71276, Mongolian]
-  56,  // Range #302: [71296, 71352, Takri]
-  9,  // Range #303: [71360, 71369, Takri]
-  63,  // Range #304: [71424, 71487, Ahom]
-  59,  // Range #305: [71680, 71739, Dogra]
-  82,  // Range #306: [71840, 71922, Warang_Citi]
-  0,  // Range #307: [71935, 71935, Warang_Citi]
-  68,  // Range #308: [72096, 72164, Nandinagari]
-  71,  // Range #309: [72192, 72263, Zanabazar_Square]
-  82,  // Range #310: [72272, 72354, Soyombo]
-  56,  // Range #311: [72384, 72440, Pau_Cin_Hau]
-  69,  // Range #312: [72704, 72773, Bhaiksuki]
-  28,  // Range #313: [72784, 72812, Bhaiksuki]
-  70,  // Range #314: [72816, 72886, Marchen]
-  71,  // Range #315: [72960, 73031, Masaram_Gondi]
-  9,  // Range #316: [73040, 73049, Masaram_Gondi]
-  56,  // Range #317: [73056, 73112, Gunjala_Gondi]
-  9,  // Range #318: [73120, 73129, Gunjala_Gondi]
-  24,  // Range #319: [73440, 73464, Makasar]
-  49,  // Range #320: [73664, 73713, Tamil]
-  0,  // Range #321: [73727, 73727, Tamil]
-  921,  // Range #322: [73728, 74649, Cuneiform]
-  116,  // Range #323: [74752, 74868, Cuneiform]
-  195,  // Range #324: [74880, 75075, Cuneiform]
-  1080,  // Range #325: [77824, 78904, Egyptian_Hieroglyphs]
-  582,  // Range #326: [82944, 83526, Anatolian_Hieroglyphs]
-  568,  // Range #327: [92160, 92728, Bamum]
-  47,  // Range #328: [92736, 92783, Mro]
-  37,  // Range #329: [92880, 92917, Bassa_Vah]
-  69,  // Range #330: [92928, 92997, Pahawh_Hmong]
-  39,  // Range #331: [93008, 93047, Pahawh_Hmong]
-  18,  // Range #332: [93053, 93071, Pahawh_Hmong]
-  90,  // Range #333: [93760, 93850, Medefaidrin]
-  135,  // Range #334: [93952, 94087, Miao]
-  16,  // Range #335: [94095, 94111, Miao]
-  0,  // Range #336: [94176, 94176, Tangut]
-  0,  // Range #337: [94177, 94177, Nushu]
-  6135,  // Range #338: [94208, 100343, Tangut]
-  754,  // Range #339: [100352, 101106, Tangut]
-  0,  // Range #340: [110592, 110592, Katakana]
-  285,  // Range #341: [110593, 110878, Hiragana]
-  2,  // Range #342: [110928, 110930, Hiragana]
-  3,  // Range #343: [110948, 110951, Katakana]
-  395,  // Range #344: [110960, 111355, Nushu]
-  106,  // Range #345: [113664, 113770, Duployan]
-  24,  // Range #346: [113776, 113800, Duployan]
-  15,  // Range #347: [113808, 113823, Duployan]
-  69,  // Range #348: [119296, 119365, Greek]
-  651,  // Range #349: [120832, 121483, SignWriting]
-  20,  // Range #350: [121499, 121519, SignWriting]
-  42,  // Range #351: [122880, 122922, Glagolitic]
-  79,  // Range #352: [123136, 123215, Nyiakeng_Puachue_Hmong]
-  57,  // Range #353: [123584, 123641, Wancho]
-  0,  // Range #354: [123647, 123647, Wancho]
-  214,  // Range #355: [124928, 125142, Mende_Kikakui]
-  95,  // Range #356: [125184, 125279, Adlam]
-  59,  // Range #357: [126464, 126523, Arabic]
-  89,  // Range #358: [126530, 126619, Arabic]
-  26,  // Range #359: [126625, 126651, Arabic]
-  1,  // Range #360: [126704, 126705, Arabic]
-  0,  // Range #361: [127488, 127488, Hiragana]
-  42710,  // Range #362: [131072, 173782, Han]
-  4148,  // Range #363: [173824, 177972, Han]
-  5985,  // Range #364: [177984, 183969, Han]
-  7472,  // Range #365: [183984, 191456, Han]
-  541,  // Range #366: [194560, 195101, Han]
+  27,  // Range #276: [69552, 69579, Chorasmian]
+  22,  // Range #277: [69600, 69622, Elymaic]
+  111,  // Range #278: [69632, 69743, Brahmi]
+  0,  // Range #279: [69759, 69759, Brahmi]
+  65,  // Range #280: [69760, 69825, Kaithi]
+  0,  // Range #281: [69837, 69837, Kaithi]
+  24,  // Range #282: [69840, 69864, Sora_Sompeng]
+  9,  // Range #283: [69872, 69881, Sora_Sompeng]
+  71,  // Range #284: [69888, 69959, Chakma]
+  38,  // Range #285: [69968, 70006, Mahajani]
+  95,  // Range #286: [70016, 70111, Sharada]
+  19,  // Range #287: [70113, 70132, Sinhala]
+  62,  // Range #288: [70144, 70206, Khojki]
+  41,  // Range #289: [70272, 70313, Multani]
+  58,  // Range #290: [70320, 70378, Khudawadi]
+  9,  // Range #291: [70384, 70393, Khudawadi]
+  57,  // Range #292: [70400, 70457, Grantha]
+  20,  // Range #293: [70460, 70480, Grantha]
+  0,  // Range #294: [70487, 70487, Grantha]
+  23,  // Range #295: [70493, 70516, Grantha]
+  97,  // Range #296: [70656, 70753, Newa]
+  71,  // Range #297: [70784, 70855, Tirhuta]
+  9,  // Range #298: [70864, 70873, Tirhuta]
+  93,  // Range #299: [71040, 71133, Siddham]
+  68,  // Range #300: [71168, 71236, Modi]
+  9,  // Range #301: [71248, 71257, Modi]
+  12,  // Range #302: [71264, 71276, Mongolian]
+  56,  // Range #303: [71296, 71352, Takri]
+  9,  // Range #304: [71360, 71369, Takri]
+  63,  // Range #305: [71424, 71487, Ahom]
+  59,  // Range #306: [71680, 71739, Dogra]
+  82,  // Range #307: [71840, 71922, Warang_Citi]
+  0,  // Range #308: [71935, 71935, Warang_Citi]
+  70,  // Range #309: [71936, 72006, Dives_Akuru]
+  9,  // Range #310: [72016, 72025, Dives_Akuru]
+  68,  // Range #311: [72096, 72164, Nandinagari]
+  71,  // Range #312: [72192, 72263, Zanabazar_Square]
+  82,  // Range #313: [72272, 72354, Soyombo]
+  56,  // Range #314: [72384, 72440, Pau_Cin_Hau]
+  69,  // Range #315: [72704, 72773, Bhaiksuki]
+  28,  // Range #316: [72784, 72812, Bhaiksuki]
+  70,  // Range #317: [72816, 72886, Marchen]
+  71,  // Range #318: [72960, 73031, Masaram_Gondi]
+  9,  // Range #319: [73040, 73049, Masaram_Gondi]
+  56,  // Range #320: [73056, 73112, Gunjala_Gondi]
+  9,  // Range #321: [73120, 73129, Gunjala_Gondi]
+  24,  // Range #322: [73440, 73464, Makasar]
+  0,  // Range #323: [73648, 73648, Lisu]
+  49,  // Range #324: [73664, 73713, Tamil]
+  0,  // Range #325: [73727, 73727, Tamil]
+  921,  // Range #326: [73728, 74649, Cuneiform]
+  116,  // Range #327: [74752, 74868, Cuneiform]
+  195,  // Range #328: [74880, 75075, Cuneiform]
+  1080,  // Range #329: [77824, 78904, Egyptian_Hieroglyphs]
+  582,  // Range #330: [82944, 83526, Anatolian_Hieroglyphs]
+  568,  // Range #331: [92160, 92728, Bamum]
+  47,  // Range #332: [92736, 92783, Mro]
+  37,  // Range #333: [92880, 92917, Bassa_Vah]
+  69,  // Range #334: [92928, 92997, Pahawh_Hmong]
+  39,  // Range #335: [93008, 93047, Pahawh_Hmong]
+  18,  // Range #336: [93053, 93071, Pahawh_Hmong]
+  90,  // Range #337: [93760, 93850, Medefaidrin]
+  135,  // Range #338: [93952, 94087, Miao]
+  16,  // Range #339: [94095, 94111, Miao]
+  0,  // Range #340: [94176, 94176, Tangut]
+  0,  // Range #341: [94177, 94177, Nushu]
+  0,  // Range #342: [94180, 94180, Khitan_Small_Script]
+  1,  // Range #343: [94192, 94193, Han]
+  6135,  // Range #344: [94208, 100343, Tangut]
+  767,  // Range #345: [100352, 101119, Tangut]
+  469,  // Range #346: [101120, 101589, Khitan_Small_Script]
+  8,  // Range #347: [101632, 101640, Tangut]
+  0,  // Range #348: [110592, 110592, Katakana]
+  285,  // Range #349: [110593, 110878, Hiragana]
+  2,  // Range #350: [110928, 110930, Hiragana]
+  3,  // Range #351: [110948, 110951, Katakana]
+  395,  // Range #352: [110960, 111355, Nushu]
+  106,  // Range #353: [113664, 113770, Duployan]
+  24,  // Range #354: [113776, 113800, Duployan]
+  15,  // Range #355: [113808, 113823, Duployan]
+  69,  // Range #356: [119296, 119365, Greek]
+  651,  // Range #357: [120832, 121483, SignWriting]
+  20,  // Range #358: [121499, 121519, SignWriting]
+  42,  // Range #359: [122880, 122922, Glagolitic]
+  79,  // Range #360: [123136, 123215, Nyiakeng_Puachue_Hmong]
+  57,  // Range #361: [123584, 123641, Wancho]
+  0,  // Range #362: [123647, 123647, Wancho]
+  214,  // Range #363: [124928, 125142, Mende_Kikakui]
+  95,  // Range #364: [125184, 125279, Adlam]
+  59,  // Range #365: [126464, 126523, Arabic]
+  89,  // Range #366: [126530, 126619, Arabic]
+  26,  // Range #367: [126625, 126651, Arabic]
+  1,  // Range #368: [126704, 126705, Arabic]
+  0,  // Range #369: [127488, 127488, Hiragana]
+  42717,  // Range #370: [131072, 173789, Han]
+  4148,  // Range #371: [173824, 177972, Han]
+  5985,  // Range #372: [177984, 183969, Han]
+  7472,  // Range #373: [183984, 191456, Han]
+  541,  // Range #374: [194560, 195101, Han]
+  4938,  // Range #375: [196608, 201546, Han]
 };
 
 const uint8 kRangeScript[] = {
@@ -788,358 +806,367 @@
   14,  // Range #15: [1008, 1023, Greek]
   8,  // Range #16: [1024, 1156, Cyrillic]
   8,  // Range #17: [1159, 1327, Cyrillic]
-  3,  // Range #18: [1329, 1416, Armenian]
-  3,  // Range #19: [1418, 1423, Armenian]
-  19,  // Range #20: [1425, 1479, Hebrew]
-  19,  // Range #21: [1488, 1524, Hebrew]
-  2,  // Range #22: [1536, 1540, Arabic]
-  2,  // Range #23: [1542, 1547, Arabic]
-  2,  // Range #24: [1549, 1562, Arabic]
-  2,  // Range #25: [1564, 1566, Arabic]
-  2,  // Range #26: [1568, 1599, Arabic]
-  2,  // Range #27: [1601, 1610, Arabic]
-  2,  // Range #28: [1622, 1647, Arabic]
-  2,  // Range #29: [1649, 1756, Arabic]
-  2,  // Range #30: [1758, 1791, Arabic]
-  34,  // Range #31: [1792, 1871, Syriac]
-  2,  // Range #32: [1872, 1919, Arabic]
-  37,  // Range #33: [1920, 1969, Thaana]
-  87,  // Range #34: [1984, 2047, Nko]
-  126,  // Range #35: [2048, 2110, Samaritan]
-  84,  // Range #36: [2112, 2142, Mandaic]
-  34,  // Range #37: [2144, 2154, Syriac]
-  2,  // Range #38: [2208, 2237, Arabic]
-  2,  // Range #39: [2259, 2273, Arabic]
-  2,  // Range #40: [2275, 2303, Arabic]
-  10,  // Range #41: [2304, 2384, Devanagari]
-  10,  // Range #42: [2389, 2403, Devanagari]
-  10,  // Range #43: [2406, 2431, Devanagari]
-  4,  // Range #44: [2432, 2510, Bengali]
-  4,  // Range #45: [2519, 2558, Bengali]
-  16,  // Range #46: [2561, 2641, Gurmukhi]
-  16,  // Range #47: [2649, 2654, Gurmukhi]
-  16,  // Range #48: [2662, 2678, Gurmukhi]
-  15,  // Range #49: [2689, 2768, Gujarati]
-  15,  // Range #50: [2784, 2801, Gujarati]
-  15,  // Range #51: [2809, 2815, Gujarati]
-  31,  // Range #52: [2817, 2893, Oriya]
-  31,  // Range #53: [2902, 2935, Oriya]
-  35,  // Range #54: [2946, 3024, Tamil]
-  35,  // Range #55: [3031, 3031, Tamil]
-  35,  // Range #56: [3046, 3066, Tamil]
-  36,  // Range #57: [3072, 3149, Telugu]
-  36,  // Range #58: [3157, 3162, Telugu]
-  36,  // Range #59: [3168, 3183, Telugu]
-  36,  // Range #60: [3191, 3199, Telugu]
-  21,  // Range #61: [3200, 3277, Kannada]
-  21,  // Range #62: [3285, 3286, Kannada]
-  21,  // Range #63: [3294, 3314, Kannada]
-  26,  // Range #64: [3328, 3455, Malayalam]
-  33,  // Range #65: [3458, 3551, Sinhala]
-  33,  // Range #66: [3558, 3572, Sinhala]
-  38,  // Range #67: [3585, 3642, Thai]
-  38,  // Range #68: [3648, 3675, Thai]
-  24,  // Range #69: [3713, 3807, Lao]
-  39,  // Range #70: [3840, 4052, Tibetan]
-  39,  // Range #71: [4057, 4058, Tibetan]
-  28,  // Range #72: [4096, 4255, Myanmar]
-  12,  // Range #73: [4256, 4295, Georgian]
-  12,  // Range #74: [4301, 4346, Georgian]
-  12,  // Range #75: [4348, 4351, Georgian]
-  18,  // Range #76: [4352, 4607, Hangul]
-  11,  // Range #77: [4608, 5017, Ethiopic]
-  6,  // Range #78: [5024, 5117, Cherokee]
-  40,  // Range #79: [5120, 5759, Canadian_Aboriginal]
-  29,  // Range #80: [5760, 5788, Ogham]
-  32,  // Range #81: [5792, 5866, Runic]
-  32,  // Range #82: [5870, 5880, Runic]
-  42,  // Range #83: [5888, 5908, Tagalog]
-  43,  // Range #84: [5920, 5940, Hanunoo]
-  44,  // Range #85: [5952, 5971, Buhid]
-  45,  // Range #86: [5984, 6003, Tagbanwa]
-  23,  // Range #87: [6016, 6121, Khmer]
-  23,  // Range #88: [6128, 6137, Khmer]
-  27,  // Range #89: [6144, 6145, Mongolian]
-  27,  // Range #90: [6148, 6148, Mongolian]
-  27,  // Range #91: [6150, 6169, Mongolian]
-  27,  // Range #92: [6176, 6264, Mongolian]
-  27,  // Range #93: [6272, 6314, Mongolian]
-  40,  // Range #94: [6320, 6389, Canadian_Aboriginal]
-  48,  // Range #95: [6400, 6479, Limbu]
-  52,  // Range #96: [6480, 6516, Tai_Le]
-  59,  // Range #97: [6528, 6601, New_Tai_Lue]
-  59,  // Range #98: [6608, 6623, New_Tai_Lue]
-  23,  // Range #99: [6624, 6655, Khmer]
-  55,  // Range #100: [6656, 6687, Buginese]
-  106,  // Range #101: [6688, 6793, Tai_Tham]
-  106,  // Range #102: [6800, 6809, Tai_Tham]
-  106,  // Range #103: [6816, 6829, Tai_Tham]
-  62,  // Range #104: [6912, 7036, Balinese]
-  113,  // Range #105: [7040, 7103, Sundanese]
-  63,  // Range #106: [7104, 7155, Batak]
-  63,  // Range #107: [7164, 7167, Batak]
-  82,  // Range #108: [7168, 7247, Lepcha]
-  109,  // Range #109: [7248, 7295, Ol_Chiki]
-  8,  // Range #110: [7296, 7304, Cyrillic]
-  12,  // Range #111: [7312, 7359, Georgian]
-  113,  // Range #112: [7360, 7367, Sundanese]
-  25,  // Range #113: [7424, 7461, Latin]
-  14,  // Range #114: [7462, 7466, Greek]
-  8,  // Range #115: [7467, 7467, Cyrillic]
-  25,  // Range #116: [7468, 7516, Latin]
-  14,  // Range #117: [7517, 7521, Greek]
-  25,  // Range #118: [7522, 7525, Latin]
-  14,  // Range #119: [7526, 7530, Greek]
-  25,  // Range #120: [7531, 7543, Latin]
-  8,  // Range #121: [7544, 7544, Cyrillic]
-  25,  // Range #122: [7545, 7614, Latin]
-  14,  // Range #123: [7615, 7615, Greek]
-  25,  // Range #124: [7680, 7935, Latin]
-  14,  // Range #125: [7936, 8190, Greek]
-  25,  // Range #126: [8305, 8305, Latin]
-  25,  // Range #127: [8319, 8319, Latin]
-  25,  // Range #128: [8336, 8348, Latin]
-  14,  // Range #129: [8486, 8486, Greek]
-  25,  // Range #130: [8490, 8491, Latin]
-  25,  // Range #131: [8498, 8498, Latin]
-  25,  // Range #132: [8526, 8526, Latin]
-  25,  // Range #133: [8544, 8584, Latin]
-  46,  // Range #134: [10240, 10495, Braille]
-  56,  // Range #135: [11264, 11358, Glagolitic]
-  25,  // Range #136: [11360, 11391, Latin]
-  7,  // Range #137: [11392, 11507, Coptic]
-  7,  // Range #138: [11513, 11519, Coptic]
-  12,  // Range #139: [11520, 11559, Georgian]
-  12,  // Range #140: [11565, 11565, Georgian]
-  60,  // Range #141: [11568, 11623, Tifinagh]
-  60,  // Range #142: [11631, 11632, Tifinagh]
-  60,  // Range #143: [11647, 11647, Tifinagh]
-  11,  // Range #144: [11648, 11670, Ethiopic]
-  11,  // Range #145: [11680, 11742, Ethiopic]
-  8,  // Range #146: [11744, 11775, Cyrillic]
-  17,  // Range #147: [11904, 12019, Han]
-  17,  // Range #148: [12032, 12245, Han]
-  17,  // Range #149: [12293, 12293, Han]
-  17,  // Range #150: [12295, 12295, Han]
-  17,  // Range #151: [12321, 12329, Han]
-  18,  // Range #152: [12334, 12335, Hangul]
-  17,  // Range #153: [12344, 12347, Han]
-  20,  // Range #154: [12353, 12438, Hiragana]
-  20,  // Range #155: [12445, 12447, Hiragana]
-  22,  // Range #156: [12449, 12538, Katakana]
-  22,  // Range #157: [12541, 12543, Katakana]
-  5,  // Range #158: [12549, 12591, Bopomofo]
-  18,  // Range #159: [12593, 12686, Hangul]
-  5,  // Range #160: [12704, 12730, Bopomofo]
-  22,  // Range #161: [12784, 12799, Katakana]
-  18,  // Range #162: [12800, 12830, Hangul]
-  18,  // Range #163: [12896, 12926, Hangul]
-  22,  // Range #164: [13008, 13054, Katakana]
-  22,  // Range #165: [13056, 13143, Katakana]
-  17,  // Range #166: [13312, 19893, Han]
-  17,  // Range #167: [19968, 40943, Han]
-  41,  // Range #168: [40960, 42182, Yi]
-  131,  // Range #169: [42192, 42239, Lisu]
-  99,  // Range #170: [42240, 42539, Vai]
-  8,  // Range #171: [42560, 42655, Cyrillic]
-  130,  // Range #172: [42656, 42743, Bamum]
-  25,  // Range #173: [42786, 42887, Latin]
-  25,  // Range #174: [42891, 42950, Latin]
-  25,  // Range #175: [42999, 43007, Latin]
-  58,  // Range #176: [43008, 43051, Syloti_Nagri]
-  90,  // Range #177: [43072, 43127, Phags_Pa]
-  111,  // Range #178: [43136, 43205, Saurashtra]
-  111,  // Range #179: [43214, 43225, Saurashtra]
-  10,  // Range #180: [43232, 43263, Devanagari]
-  79,  // Range #181: [43264, 43309, Kayah_Li]
-  79,  // Range #182: [43311, 43311, Kayah_Li]
-  110,  // Range #183: [43312, 43347, Rejang]
-  110,  // Range #184: [43359, 43359, Rejang]
-  18,  // Range #185: [43360, 43388, Hangul]
-  78,  // Range #186: [43392, 43469, Javanese]
-  78,  // Range #187: [43472, 43487, Javanese]
-  28,  // Range #188: [43488, 43518, Myanmar]
-  66,  // Range #189: [43520, 43574, Cham]
-  66,  // Range #190: [43584, 43615, Cham]
-  28,  // Range #191: [43616, 43647, Myanmar]
-  127,  // Range #192: [43648, 43714, Tai_Viet]
-  127,  // Range #193: [43739, 43743, Tai_Viet]
-  115,  // Range #194: [43744, 43766, Meetei_Mayek]
-  11,  // Range #195: [43777, 43798, Ethiopic]
-  11,  // Range #196: [43808, 43822, Ethiopic]
-  25,  // Range #197: [43824, 43866, Latin]
-  25,  // Range #198: [43868, 43876, Latin]
-  14,  // Range #199: [43877, 43877, Greek]
-  25,  // Range #200: [43878, 43879, Latin]
-  6,  // Range #201: [43888, 43967, Cherokee]
-  115,  // Range #202: [43968, 44025, Meetei_Mayek]
-  18,  // Range #203: [44032, 55203, Hangul]
-  18,  // Range #204: [55216, 55291, Hangul]
-  17,  // Range #205: [63744, 64217, Han]
-  25,  // Range #206: [64256, 64262, Latin]
-  3,  // Range #207: [64275, 64279, Armenian]
-  19,  // Range #208: [64285, 64335, Hebrew]
-  2,  // Range #209: [64336, 64449, Arabic]
-  2,  // Range #210: [64467, 64829, Arabic]
-  2,  // Range #211: [64848, 64967, Arabic]
-  2,  // Range #212: [65008, 65021, Arabic]
-  8,  // Range #213: [65070, 65071, Cyrillic]
-  2,  // Range #214: [65136, 65276, Arabic]
-  25,  // Range #215: [65313, 65338, Latin]
-  25,  // Range #216: [65345, 65370, Latin]
-  22,  // Range #217: [65382, 65391, Katakana]
-  22,  // Range #218: [65393, 65437, Katakana]
-  18,  // Range #219: [65440, 65500, Hangul]
-  49,  // Range #220: [65536, 65629, Linear_B]
-  49,  // Range #221: [65664, 65786, Linear_B]
-  14,  // Range #222: [65856, 65934, Greek]
-  14,  // Range #223: [65952, 65952, Greek]
-  107,  // Range #224: [66176, 66204, Lycian]
-  104,  // Range #225: [66208, 66256, Carian]
-  30,  // Range #226: [66304, 66339, Old_Italic]
-  30,  // Range #227: [66349, 66351, Old_Italic]
-  13,  // Range #228: [66352, 66378, Gothic]
-  89,  // Range #229: [66384, 66426, Old_Permic]
-  53,  // Range #230: [66432, 66463, Ugaritic]
-  61,  // Range #231: [66464, 66517, Old_Persian]
-  9,  // Range #232: [66560, 66639, Deseret]
-  51,  // Range #233: [66640, 66687, Shavian]
-  50,  // Range #234: [66688, 66729, Osmanya]
-  171,  // Range #235: [66736, 66811, Osage]
-  136,  // Range #236: [66816, 66855, Elbasan]
-  159,  // Range #237: [66864, 66915, Caucasian_Albanian]
-  159,  // Range #238: [66927, 66927, Caucasian_Albanian]
-  83,  // Range #239: [67072, 67382, Linear_A]
-  83,  // Range #240: [67392, 67413, Linear_A]
-  83,  // Range #241: [67424, 67431, Linear_A]
-  47,  // Range #242: [67584, 67647, Cypriot]
-  116,  // Range #243: [67648, 67679, Imperial_Aramaic]
-  144,  // Range #244: [67680, 67711, Palmyrene]
-  143,  // Range #245: [67712, 67742, Nabataean]
-  143,  // Range #246: [67751, 67759, Nabataean]
-  162,  // Range #247: [67808, 67829, Hatran]
-  162,  // Range #248: [67835, 67839, Hatran]
-  91,  // Range #249: [67840, 67871, Phoenician]
-  108,  // Range #250: [67872, 67897, Lydian]
-  108,  // Range #251: [67903, 67903, Lydian]
-  86,  // Range #252: [67968, 67999, Meroitic_Hieroglyphs]
-  141,  // Range #253: [68000, 68095, Meroitic_Cursive]
-  57,  // Range #254: [68096, 68102, Kharoshthi]
-  57,  // Range #255: [68108, 68168, Kharoshthi]
-  57,  // Range #256: [68176, 68184, Kharoshthi]
-  133,  // Range #257: [68192, 68223, Old_South_Arabian]
-  142,  // Range #258: [68224, 68255, Old_North_Arabian]
-  121,  // Range #259: [68288, 68342, Manichaean]
-  117,  // Range #260: [68352, 68415, Avestan]
-  125,  // Range #261: [68416, 68447, Inscriptional_Parthian]
-  122,  // Range #262: [68448, 68466, Inscriptional_Pahlavi]
-  122,  // Range #263: [68472, 68479, Inscriptional_Pahlavi]
-  123,  // Range #264: [68480, 68497, Psalter_Pahlavi]
-  123,  // Range #265: [68505, 68508, Psalter_Pahlavi]
-  123,  // Range #266: [68521, 68527, Psalter_Pahlavi]
-  88,  // Range #267: [68608, 68680, Old_Turkic]
-  76,  // Range #268: [68736, 68786, Old_Hungarian]
-  76,  // Range #269: [68800, 68850, Old_Hungarian]
-  76,  // Range #270: [68858, 68863, Old_Hungarian]
-  182,  // Range #271: [68864, 68903, Hanifi_Rohingya]
-  182,  // Range #272: [68912, 68921, Hanifi_Rohingya]
-  2,  // Range #273: [69216, 69246, Arabic]
+  3,  // Range #18: [1329, 1423, Armenian]
+  19,  // Range #19: [1425, 1479, Hebrew]
+  19,  // Range #20: [1488, 1524, Hebrew]
+  2,  // Range #21: [1536, 1540, Arabic]
+  2,  // Range #22: [1542, 1547, Arabic]
+  2,  // Range #23: [1549, 1562, Arabic]
+  2,  // Range #24: [1564, 1566, Arabic]
+  2,  // Range #25: [1568, 1599, Arabic]
+  2,  // Range #26: [1601, 1610, Arabic]
+  2,  // Range #27: [1622, 1647, Arabic]
+  2,  // Range #28: [1649, 1756, Arabic]
+  2,  // Range #29: [1758, 1791, Arabic]
+  34,  // Range #30: [1792, 1871, Syriac]
+  2,  // Range #31: [1872, 1919, Arabic]
+  37,  // Range #32: [1920, 1969, Thaana]
+  87,  // Range #33: [1984, 2047, Nko]
+  126,  // Range #34: [2048, 2110, Samaritan]
+  84,  // Range #35: [2112, 2142, Mandaic]
+  34,  // Range #36: [2144, 2154, Syriac]
+  2,  // Range #37: [2208, 2247, Arabic]
+  2,  // Range #38: [2259, 2273, Arabic]
+  2,  // Range #39: [2275, 2303, Arabic]
+  10,  // Range #40: [2304, 2384, Devanagari]
+  10,  // Range #41: [2389, 2403, Devanagari]
+  10,  // Range #42: [2406, 2431, Devanagari]
+  4,  // Range #43: [2432, 2510, Bengali]
+  4,  // Range #44: [2519, 2558, Bengali]
+  16,  // Range #45: [2561, 2641, Gurmukhi]
+  16,  // Range #46: [2649, 2654, Gurmukhi]
+  16,  // Range #47: [2662, 2678, Gurmukhi]
+  15,  // Range #48: [2689, 2768, Gujarati]
+  15,  // Range #49: [2784, 2801, Gujarati]
+  15,  // Range #50: [2809, 2815, Gujarati]
+  31,  // Range #51: [2817, 2893, Oriya]
+  31,  // Range #52: [2901, 2935, Oriya]
+  35,  // Range #53: [2946, 3024, Tamil]
+  35,  // Range #54: [3031, 3031, Tamil]
+  35,  // Range #55: [3046, 3066, Tamil]
+  36,  // Range #56: [3072, 3149, Telugu]
+  36,  // Range #57: [3157, 3162, Telugu]
+  36,  // Range #58: [3168, 3183, Telugu]
+  36,  // Range #59: [3191, 3199, Telugu]
+  21,  // Range #60: [3200, 3277, Kannada]
+  21,  // Range #61: [3285, 3286, Kannada]
+  21,  // Range #62: [3294, 3314, Kannada]
+  26,  // Range #63: [3328, 3455, Malayalam]
+  33,  // Range #64: [3457, 3551, Sinhala]
+  33,  // Range #65: [3558, 3572, Sinhala]
+  38,  // Range #66: [3585, 3642, Thai]
+  38,  // Range #67: [3648, 3675, Thai]
+  24,  // Range #68: [3713, 3807, Lao]
+  39,  // Range #69: [3840, 4052, Tibetan]
+  39,  // Range #70: [4057, 4058, Tibetan]
+  28,  // Range #71: [4096, 4255, Myanmar]
+  12,  // Range #72: [4256, 4295, Georgian]
+  12,  // Range #73: [4301, 4346, Georgian]
+  12,  // Range #74: [4348, 4351, Georgian]
+  18,  // Range #75: [4352, 4607, Hangul]
+  11,  // Range #76: [4608, 5017, Ethiopic]
+  6,  // Range #77: [5024, 5117, Cherokee]
+  40,  // Range #78: [5120, 5759, Canadian_Aboriginal]
+  29,  // Range #79: [5760, 5788, Ogham]
+  32,  // Range #80: [5792, 5866, Runic]
+  32,  // Range #81: [5870, 5880, Runic]
+  42,  // Range #82: [5888, 5908, Tagalog]
+  43,  // Range #83: [5920, 5940, Hanunoo]
+  44,  // Range #84: [5952, 5971, Buhid]
+  45,  // Range #85: [5984, 6003, Tagbanwa]
+  23,  // Range #86: [6016, 6121, Khmer]
+  23,  // Range #87: [6128, 6137, Khmer]
+  27,  // Range #88: [6144, 6145, Mongolian]
+  27,  // Range #89: [6148, 6148, Mongolian]
+  27,  // Range #90: [6150, 6169, Mongolian]
+  27,  // Range #91: [6176, 6264, Mongolian]
+  27,  // Range #92: [6272, 6314, Mongolian]
+  40,  // Range #93: [6320, 6389, Canadian_Aboriginal]
+  48,  // Range #94: [6400, 6479, Limbu]
+  52,  // Range #95: [6480, 6516, Tai_Le]
+  59,  // Range #96: [6528, 6601, New_Tai_Lue]
+  59,  // Range #97: [6608, 6623, New_Tai_Lue]
+  23,  // Range #98: [6624, 6655, Khmer]
+  55,  // Range #99: [6656, 6687, Buginese]
+  106,  // Range #100: [6688, 6793, Tai_Tham]
+  106,  // Range #101: [6800, 6809, Tai_Tham]
+  106,  // Range #102: [6816, 6829, Tai_Tham]
+  62,  // Range #103: [6912, 7036, Balinese]
+  113,  // Range #104: [7040, 7103, Sundanese]
+  63,  // Range #105: [7104, 7155, Batak]
+  63,  // Range #106: [7164, 7167, Batak]
+  82,  // Range #107: [7168, 7247, Lepcha]
+  109,  // Range #108: [7248, 7295, Ol_Chiki]
+  8,  // Range #109: [7296, 7304, Cyrillic]
+  12,  // Range #110: [7312, 7359, Georgian]
+  113,  // Range #111: [7360, 7367, Sundanese]
+  25,  // Range #112: [7424, 7461, Latin]
+  14,  // Range #113: [7462, 7466, Greek]
+  8,  // Range #114: [7467, 7467, Cyrillic]
+  25,  // Range #115: [7468, 7516, Latin]
+  14,  // Range #116: [7517, 7521, Greek]
+  25,  // Range #117: [7522, 7525, Latin]
+  14,  // Range #118: [7526, 7530, Greek]
+  25,  // Range #119: [7531, 7543, Latin]
+  8,  // Range #120: [7544, 7544, Cyrillic]
+  25,  // Range #121: [7545, 7614, Latin]
+  14,  // Range #122: [7615, 7615, Greek]
+  25,  // Range #123: [7680, 7935, Latin]
+  14,  // Range #124: [7936, 8190, Greek]
+  25,  // Range #125: [8305, 8305, Latin]
+  25,  // Range #126: [8319, 8319, Latin]
+  25,  // Range #127: [8336, 8348, Latin]
+  14,  // Range #128: [8486, 8486, Greek]
+  25,  // Range #129: [8490, 8491, Latin]
+  25,  // Range #130: [8498, 8498, Latin]
+  25,  // Range #131: [8526, 8526, Latin]
+  25,  // Range #132: [8544, 8584, Latin]
+  46,  // Range #133: [10240, 10495, Braille]
+  56,  // Range #134: [11264, 11358, Glagolitic]
+  25,  // Range #135: [11360, 11391, Latin]
+  7,  // Range #136: [11392, 11507, Coptic]
+  7,  // Range #137: [11513, 11519, Coptic]
+  12,  // Range #138: [11520, 11559, Georgian]
+  12,  // Range #139: [11565, 11565, Georgian]
+  60,  // Range #140: [11568, 11623, Tifinagh]
+  60,  // Range #141: [11631, 11632, Tifinagh]
+  60,  // Range #142: [11647, 11647, Tifinagh]
+  11,  // Range #143: [11648, 11670, Ethiopic]
+  11,  // Range #144: [11680, 11742, Ethiopic]
+  8,  // Range #145: [11744, 11775, Cyrillic]
+  17,  // Range #146: [11904, 12019, Han]
+  17,  // Range #147: [12032, 12245, Han]
+  17,  // Range #148: [12293, 12293, Han]
+  17,  // Range #149: [12295, 12295, Han]
+  17,  // Range #150: [12321, 12329, Han]
+  18,  // Range #151: [12334, 12335, Hangul]
+  17,  // Range #152: [12344, 12347, Han]
+  20,  // Range #153: [12353, 12438, Hiragana]
+  20,  // Range #154: [12445, 12447, Hiragana]
+  22,  // Range #155: [12449, 12538, Katakana]
+  22,  // Range #156: [12541, 12543, Katakana]
+  5,  // Range #157: [12549, 12591, Bopomofo]
+  18,  // Range #158: [12593, 12686, Hangul]
+  5,  // Range #159: [12704, 12735, Bopomofo]
+  22,  // Range #160: [12784, 12799, Katakana]
+  18,  // Range #161: [12800, 12830, Hangul]
+  18,  // Range #162: [12896, 12926, Hangul]
+  22,  // Range #163: [13008, 13054, Katakana]
+  22,  // Range #164: [13056, 13143, Katakana]
+  17,  // Range #165: [13312, 19903, Han]
+  17,  // Range #166: [19968, 40956, Han]
+  41,  // Range #167: [40960, 42182, Yi]
+  131,  // Range #168: [42192, 42239, Lisu]
+  99,  // Range #169: [42240, 42539, Vai]
+  8,  // Range #170: [42560, 42655, Cyrillic]
+  130,  // Range #171: [42656, 42743, Bamum]
+  25,  // Range #172: [42786, 42887, Latin]
+  25,  // Range #173: [42891, 42954, Latin]
+  25,  // Range #174: [42997, 43007, Latin]
+  58,  // Range #175: [43008, 43052, Syloti_Nagri]
+  90,  // Range #176: [43072, 43127, Phags_Pa]
+  111,  // Range #177: [43136, 43205, Saurashtra]
+  111,  // Range #178: [43214, 43225, Saurashtra]
+  10,  // Range #179: [43232, 43263, Devanagari]
+  79,  // Range #180: [43264, 43309, Kayah_Li]
+  79,  // Range #181: [43311, 43311, Kayah_Li]
+  110,  // Range #182: [43312, 43347, Rejang]
+  110,  // Range #183: [43359, 43359, Rejang]
+  18,  // Range #184: [43360, 43388, Hangul]
+  78,  // Range #185: [43392, 43469, Javanese]
+  78,  // Range #186: [43472, 43487, Javanese]
+  28,  // Range #187: [43488, 43518, Myanmar]
+  66,  // Range #188: [43520, 43574, Cham]
+  66,  // Range #189: [43584, 43615, Cham]
+  28,  // Range #190: [43616, 43647, Myanmar]
+  127,  // Range #191: [43648, 43714, Tai_Viet]
+  127,  // Range #192: [43739, 43743, Tai_Viet]
+  115,  // Range #193: [43744, 43766, Meetei_Mayek]
+  11,  // Range #194: [43777, 43798, Ethiopic]
+  11,  // Range #195: [43808, 43822, Ethiopic]
+  25,  // Range #196: [43824, 43866, Latin]
+  25,  // Range #197: [43868, 43876, Latin]
+  14,  // Range #198: [43877, 43877, Greek]
+  25,  // Range #199: [43878, 43881, Latin]
+  6,  // Range #200: [43888, 43967, Cherokee]
+  115,  // Range #201: [43968, 44025, Meetei_Mayek]
+  18,  // Range #202: [44032, 55203, Hangul]
+  18,  // Range #203: [55216, 55291, Hangul]
+  17,  // Range #204: [63744, 64217, Han]
+  25,  // Range #205: [64256, 64262, Latin]
+  3,  // Range #206: [64275, 64279, Armenian]
+  19,  // Range #207: [64285, 64335, Hebrew]
+  2,  // Range #208: [64336, 64449, Arabic]
+  2,  // Range #209: [64467, 64829, Arabic]
+  2,  // Range #210: [64848, 64967, Arabic]
+  2,  // Range #211: [65008, 65021, Arabic]
+  8,  // Range #212: [65070, 65071, Cyrillic]
+  2,  // Range #213: [65136, 65276, Arabic]
+  25,  // Range #214: [65313, 65338, Latin]
+  25,  // Range #215: [65345, 65370, Latin]
+  22,  // Range #216: [65382, 65391, Katakana]
+  22,  // Range #217: [65393, 65437, Katakana]
+  18,  // Range #218: [65440, 65500, Hangul]
+  49,  // Range #219: [65536, 65629, Linear_B]
+  49,  // Range #220: [65664, 65786, Linear_B]
+  14,  // Range #221: [65856, 65934, Greek]
+  14,  // Range #222: [65952, 65952, Greek]
+  107,  // Range #223: [66176, 66204, Lycian]
+  104,  // Range #224: [66208, 66256, Carian]
+  30,  // Range #225: [66304, 66339, Old_Italic]
+  30,  // Range #226: [66349, 66351, Old_Italic]
+  13,  // Range #227: [66352, 66378, Gothic]
+  89,  // Range #228: [66384, 66426, Old_Permic]
+  53,  // Range #229: [66432, 66463, Ugaritic]
+  61,  // Range #230: [66464, 66517, Old_Persian]
+  9,  // Range #231: [66560, 66639, Deseret]
+  51,  // Range #232: [66640, 66687, Shavian]
+  50,  // Range #233: [66688, 66729, Osmanya]
+  171,  // Range #234: [66736, 66811, Osage]
+  136,  // Range #235: [66816, 66855, Elbasan]
+  159,  // Range #236: [66864, 66915, Caucasian_Albanian]
+  159,  // Range #237: [66927, 66927, Caucasian_Albanian]
+  83,  // Range #238: [67072, 67382, Linear_A]
+  83,  // Range #239: [67392, 67413, Linear_A]
+  83,  // Range #240: [67424, 67431, Linear_A]
+  47,  // Range #241: [67584, 67647, Cypriot]
+  116,  // Range #242: [67648, 67679, Imperial_Aramaic]
+  144,  // Range #243: [67680, 67711, Palmyrene]
+  143,  // Range #244: [67712, 67742, Nabataean]
+  143,  // Range #245: [67751, 67759, Nabataean]
+  162,  // Range #246: [67808, 67829, Hatran]
+  162,  // Range #247: [67835, 67839, Hatran]
+  91,  // Range #248: [67840, 67871, Phoenician]
+  108,  // Range #249: [67872, 67897, Lydian]
+  108,  // Range #250: [67903, 67903, Lydian]
+  86,  // Range #251: [67968, 67999, Meroitic_Hieroglyphs]
+  141,  // Range #252: [68000, 68095, Meroitic_Cursive]
+  57,  // Range #253: [68096, 68102, Kharoshthi]
+  57,  // Range #254: [68108, 68168, Kharoshthi]
+  57,  // Range #255: [68176, 68184, Kharoshthi]
+  133,  // Range #256: [68192, 68223, Old_South_Arabian]
+  142,  // Range #257: [68224, 68255, Old_North_Arabian]
+  121,  // Range #258: [68288, 68342, Manichaean]
+  117,  // Range #259: [68352, 68415, Avestan]
+  125,  // Range #260: [68416, 68447, Inscriptional_Parthian]
+  122,  // Range #261: [68448, 68466, Inscriptional_Pahlavi]
+  122,  // Range #262: [68472, 68479, Inscriptional_Pahlavi]
+  123,  // Range #263: [68480, 68497, Psalter_Pahlavi]
+  123,  // Range #264: [68505, 68508, Psalter_Pahlavi]
+  123,  // Range #265: [68521, 68527, Psalter_Pahlavi]
+  88,  // Range #266: [68608, 68680, Old_Turkic]
+  76,  // Range #267: [68736, 68786, Old_Hungarian]
+  76,  // Range #268: [68800, 68850, Old_Hungarian]
+  76,  // Range #269: [68858, 68863, Old_Hungarian]
+  182,  // Range #270: [68864, 68903, Hanifi_Rohingya]
+  182,  // Range #271: [68912, 68921, Hanifi_Rohingya]
+  2,  // Range #272: [69216, 69246, Arabic]
+  192,  // Range #273: [69248, 69297, Yezidi]
   184,  // Range #274: [69376, 69415, Old_Sogdian]
   183,  // Range #275: [69424, 69465, Sogdian]
-  185,  // Range #276: [69600, 69622, Elymaic]
-  65,  // Range #277: [69632, 69743, Brahmi]
-  65,  // Range #278: [69759, 69759, Brahmi]
-  120,  // Range #279: [69760, 69825, Kaithi]
-  120,  // Range #280: [69837, 69837, Kaithi]
-  152,  // Range #281: [69840, 69864, Sora_Sompeng]
-  152,  // Range #282: [69872, 69881, Sora_Sompeng]
-  118,  // Range #283: [69888, 69958, Chakma]
-  160,  // Range #284: [69968, 70006, Mahajani]
-  151,  // Range #285: [70016, 70111, Sharada]
-  33,  // Range #286: [70113, 70132, Sinhala]
-  157,  // Range #287: [70144, 70206, Khojki]
-  164,  // Range #288: [70272, 70313, Multani]
-  145,  // Range #289: [70320, 70378, Khudawadi]
-  145,  // Range #290: [70384, 70393, Khudawadi]
-  137,  // Range #291: [70400, 70457, Grantha]
-  137,  // Range #292: [70460, 70480, Grantha]
-  137,  // Range #293: [70487, 70487, Grantha]
-  137,  // Range #294: [70493, 70516, Grantha]
-  170,  // Range #295: [70656, 70751, Newa]
-  158,  // Range #296: [70784, 70855, Tirhuta]
-  158,  // Range #297: [70864, 70873, Tirhuta]
-  166,  // Range #298: [71040, 71133, Siddham]
-  163,  // Range #299: [71168, 71236, Modi]
-  163,  // Range #300: [71248, 71257, Modi]
-  27,  // Range #301: [71264, 71276, Mongolian]
-  153,  // Range #302: [71296, 71352, Takri]
-  153,  // Range #303: [71360, 71369, Takri]
-  161,  // Range #304: [71424, 71487, Ahom]
-  178,  // Range #305: [71680, 71739, Dogra]
-  146,  // Range #306: [71840, 71922, Warang_Citi]
-  146,  // Range #307: [71935, 71935, Warang_Citi]
-  187,  // Range #308: [72096, 72164, Nandinagari]
-  177,  // Range #309: [72192, 72263, Zanabazar_Square]
-  176,  // Range #310: [72272, 72354, Soyombo]
-  165,  // Range #311: [72384, 72440, Pau_Cin_Hau]
-  168,  // Range #312: [72704, 72773, Bhaiksuki]
-  168,  // Range #313: [72784, 72812, Bhaiksuki]
-  169,  // Range #314: [72816, 72886, Marchen]
-  175,  // Range #315: [72960, 73031, Masaram_Gondi]
-  175,  // Range #316: [73040, 73049, Masaram_Gondi]
-  179,  // Range #317: [73056, 73112, Gunjala_Gondi]
-  179,  // Range #318: [73120, 73129, Gunjala_Gondi]
-  180,  // Range #319: [73440, 73464, Makasar]
-  35,  // Range #320: [73664, 73713, Tamil]
-  35,  // Range #321: [73727, 73727, Tamil]
-  101,  // Range #322: [73728, 74649, Cuneiform]
-  101,  // Range #323: [74752, 74868, Cuneiform]
-  101,  // Range #324: [74880, 75075, Cuneiform]
-  71,  // Range #325: [77824, 78904, Egyptian_Hieroglyphs]
-  156,  // Range #326: [82944, 83526, Anatolian_Hieroglyphs]
-  130,  // Range #327: [92160, 92728, Bamum]
-  149,  // Range #328: [92736, 92783, Mro]
-  134,  // Range #329: [92880, 92917, Bassa_Vah]
-  75,  // Range #330: [92928, 92997, Pahawh_Hmong]
-  75,  // Range #331: [93008, 93047, Pahawh_Hmong]
-  75,  // Range #332: [93053, 93071, Pahawh_Hmong]
-  181,  // Range #333: [93760, 93850, Medefaidrin]
-  92,  // Range #334: [93952, 94087, Miao]
-  92,  // Range #335: [94095, 94111, Miao]
-  154,  // Range #336: [94176, 94176, Tangut]
-  150,  // Range #337: [94177, 94177, Nushu]
-  154,  // Range #338: [94208, 100343, Tangut]
-  154,  // Range #339: [100352, 101106, Tangut]
-  22,  // Range #340: [110592, 110592, Katakana]
-  20,  // Range #341: [110593, 110878, Hiragana]
-  20,  // Range #342: [110928, 110930, Hiragana]
-  22,  // Range #343: [110948, 110951, Katakana]
-  150,  // Range #344: [110960, 111355, Nushu]
-  135,  // Range #345: [113664, 113770, Duployan]
-  135,  // Range #346: [113776, 113800, Duployan]
-  135,  // Range #347: [113808, 113823, Duployan]
-  14,  // Range #348: [119296, 119365, Greek]
-  112,  // Range #349: [120832, 121483, SignWriting]
-  112,  // Range #350: [121499, 121519, SignWriting]
-  56,  // Range #351: [122880, 122922, Glagolitic]
-  186,  // Range #352: [123136, 123215, Nyiakeng_Puachue_Hmong]
-  188,  // Range #353: [123584, 123641, Wancho]
-  188,  // Range #354: [123647, 123647, Wancho]
-  140,  // Range #355: [124928, 125142, Mende_Kikakui]
-  167,  // Range #356: [125184, 125279, Adlam]
-  2,  // Range #357: [126464, 126523, Arabic]
-  2,  // Range #358: [126530, 126619, Arabic]
-  2,  // Range #359: [126625, 126651, Arabic]
-  2,  // Range #360: [126704, 126705, Arabic]
-  20,  // Range #361: [127488, 127488, Hiragana]
-  17,  // Range #362: [131072, 173782, Han]
-  17,  // Range #363: [173824, 177972, Han]
-  17,  // Range #364: [177984, 183969, Han]
-  17,  // Range #365: [183984, 191456, Han]
-  17,  // Range #366: [194560, 195101, Han]
+  189,  // Range #276: [69552, 69579, Chorasmian]
+  185,  // Range #277: [69600, 69622, Elymaic]
+  65,  // Range #278: [69632, 69743, Brahmi]
+  65,  // Range #279: [69759, 69759, Brahmi]
+  120,  // Range #280: [69760, 69825, Kaithi]
+  120,  // Range #281: [69837, 69837, Kaithi]
+  152,  // Range #282: [69840, 69864, Sora_Sompeng]
+  152,  // Range #283: [69872, 69881, Sora_Sompeng]
+  118,  // Range #284: [69888, 69959, Chakma]
+  160,  // Range #285: [69968, 70006, Mahajani]
+  151,  // Range #286: [70016, 70111, Sharada]
+  33,  // Range #287: [70113, 70132, Sinhala]
+  157,  // Range #288: [70144, 70206, Khojki]
+  164,  // Range #289: [70272, 70313, Multani]
+  145,  // Range #290: [70320, 70378, Khudawadi]
+  145,  // Range #291: [70384, 70393, Khudawadi]
+  137,  // Range #292: [70400, 70457, Grantha]
+  137,  // Range #293: [70460, 70480, Grantha]
+  137,  // Range #294: [70487, 70487, Grantha]
+  137,  // Range #295: [70493, 70516, Grantha]
+  170,  // Range #296: [70656, 70753, Newa]
+  158,  // Range #297: [70784, 70855, Tirhuta]
+  158,  // Range #298: [70864, 70873, Tirhuta]
+  166,  // Range #299: [71040, 71133, Siddham]
+  163,  // Range #300: [71168, 71236, Modi]
+  163,  // Range #301: [71248, 71257, Modi]
+  27,  // Range #302: [71264, 71276, Mongolian]
+  153,  // Range #303: [71296, 71352, Takri]
+  153,  // Range #304: [71360, 71369, Takri]
+  161,  // Range #305: [71424, 71487, Ahom]
+  178,  // Range #306: [71680, 71739, Dogra]
+  146,  // Range #307: [71840, 71922, Warang_Citi]
+  146,  // Range #308: [71935, 71935, Warang_Citi]
+  190,  // Range #309: [71936, 72006, Dives_Akuru]
+  190,  // Range #310: [72016, 72025, Dives_Akuru]
+  187,  // Range #311: [72096, 72164, Nandinagari]
+  177,  // Range #312: [72192, 72263, Zanabazar_Square]
+  176,  // Range #313: [72272, 72354, Soyombo]
+  165,  // Range #314: [72384, 72440, Pau_Cin_Hau]
+  168,  // Range #315: [72704, 72773, Bhaiksuki]
+  168,  // Range #316: [72784, 72812, Bhaiksuki]
+  169,  // Range #317: [72816, 72886, Marchen]
+  175,  // Range #318: [72960, 73031, Masaram_Gondi]
+  175,  // Range #319: [73040, 73049, Masaram_Gondi]
+  179,  // Range #320: [73056, 73112, Gunjala_Gondi]
+  179,  // Range #321: [73120, 73129, Gunjala_Gondi]
+  180,  // Range #322: [73440, 73464, Makasar]
+  131,  // Range #323: [73648, 73648, Lisu]
+  35,  // Range #324: [73664, 73713, Tamil]
+  35,  // Range #325: [73727, 73727, Tamil]
+  101,  // Range #326: [73728, 74649, Cuneiform]
+  101,  // Range #327: [74752, 74868, Cuneiform]
+  101,  // Range #328: [74880, 75075, Cuneiform]
+  71,  // Range #329: [77824, 78904, Egyptian_Hieroglyphs]
+  156,  // Range #330: [82944, 83526, Anatolian_Hieroglyphs]
+  130,  // Range #331: [92160, 92728, Bamum]
+  149,  // Range #332: [92736, 92783, Mro]
+  134,  // Range #333: [92880, 92917, Bassa_Vah]
+  75,  // Range #334: [92928, 92997, Pahawh_Hmong]
+  75,  // Range #335: [93008, 93047, Pahawh_Hmong]
+  75,  // Range #336: [93053, 93071, Pahawh_Hmong]
+  181,  // Range #337: [93760, 93850, Medefaidrin]
+  92,  // Range #338: [93952, 94087, Miao]
+  92,  // Range #339: [94095, 94111, Miao]
+  154,  // Range #340: [94176, 94176, Tangut]
+  150,  // Range #341: [94177, 94177, Nushu]
+  191,  // Range #342: [94180, 94180, Khitan_Small_Script]
+  17,  // Range #343: [94192, 94193, Han]
+  154,  // Range #344: [94208, 100343, Tangut]
+  154,  // Range #345: [100352, 101119, Tangut]
+  191,  // Range #346: [101120, 101589, Khitan_Small_Script]
+  154,  // Range #347: [101632, 101640, Tangut]
+  22,  // Range #348: [110592, 110592, Katakana]
+  20,  // Range #349: [110593, 110878, Hiragana]
+  20,  // Range #350: [110928, 110930, Hiragana]
+  22,  // Range #351: [110948, 110951, Katakana]
+  150,  // Range #352: [110960, 111355, Nushu]
+  135,  // Range #353: [113664, 113770, Duployan]
+  135,  // Range #354: [113776, 113800, Duployan]
+  135,  // Range #355: [113808, 113823, Duployan]
+  14,  // Range #356: [119296, 119365, Greek]
+  112,  // Range #357: [120832, 121483, SignWriting]
+  112,  // Range #358: [121499, 121519, SignWriting]
+  56,  // Range #359: [122880, 122922, Glagolitic]
+  186,  // Range #360: [123136, 123215, Nyiakeng_Puachue_Hmong]
+  188,  // Range #361: [123584, 123641, Wancho]
+  188,  // Range #362: [123647, 123647, Wancho]
+  140,  // Range #363: [124928, 125142, Mende_Kikakui]
+  167,  // Range #364: [125184, 125279, Adlam]
+  2,  // Range #365: [126464, 126523, Arabic]
+  2,  // Range #366: [126530, 126619, Arabic]
+  2,  // Range #367: [126625, 126651, Arabic]
+  2,  // Range #368: [126704, 126705, Arabic]
+  20,  // Range #369: [127488, 127488, Hiragana]
+  17,  // Range #370: [131072, 173789, Han]
+  17,  // Range #371: [173824, 177972, Han]
+  17,  // Range #372: [177984, 183969, Han]
+  17,  // Range #373: [183984, 191456, Han]
+  17,  // Range #374: [194560, 195101, Han]
+  17,  // Range #375: [196608, 201546, Han]
 };
 
-const uint8 kMaxScript = 188;
+const uint8 kMaxScript = 192;
 
 }  // namespace approx_script_internal
 }  // namespace mobile

diff --git a/native/models/actions_suggestions.en.model b/native/models/actions_suggestions.en.model
old mode 100644
new mode 100755
index 480a5ff..d4b0ced
--- a/native/models/actions_suggestions.en.model
+++ b/native/models/actions_suggestions.en.model
Binary files differ

diff --git a/native/models/actions_suggestions.universal.model b/native/models/actions_suggestions.universal.model
old mode 100644
new mode 100755
index a285ab0..2ee546c
--- a/native/models/actions_suggestions.universal.model
+++ b/native/models/actions_suggestions.universal.model
Binary files differ

diff --git a/native/models/textclassifier.ar.model b/native/models/textclassifier.ar.model
old mode 100644
new mode 100755
index 7a8d7d1..923d8af
--- a/native/models/textclassifier.ar.model
+++ b/native/models/textclassifier.ar.model
Binary files differ

diff --git a/native/models/textclassifier.en.model b/native/models/textclassifier.en.model
old mode 100644
new mode 100755
index dba017b..aec4302
--- a/native/models/textclassifier.en.model
+++ b/native/models/textclassifier.en.model
Binary files differ

diff --git a/native/models/textclassifier.es.model b/native/models/textclassifier.es.model
old mode 100644
new mode 100755
index 7a537d2..7ff3d73
--- a/native/models/textclassifier.es.model
+++ b/native/models/textclassifier.es.model
Binary files differ

diff --git a/native/models/textclassifier.fr.model b/native/models/textclassifier.fr.model
old mode 100644
new mode 100755
index 6fb4f82..cc5f488
--- a/native/models/textclassifier.fr.model
+++ b/native/models/textclassifier.fr.model
Binary files differ

diff --git a/native/models/textclassifier.it.model b/native/models/textclassifier.it.model
old mode 100644
new mode 100755
index 2111e7e..5d40ef5
--- a/native/models/textclassifier.it.model
+++ b/native/models/textclassifier.it.model
Binary files differ

diff --git a/native/models/textclassifier.ja.model b/native/models/textclassifier.ja.model
old mode 100644
new mode 100755
index f25f448..9d65601
--- a/native/models/textclassifier.ja.model
+++ b/native/models/textclassifier.ja.model
Binary files differ

diff --git a/native/models/textclassifier.ko.model b/native/models/textclassifier.ko.model
old mode 100644
new mode 100755
index 60524c1..becba7a
--- a/native/models/textclassifier.ko.model
+++ b/native/models/textclassifier.ko.model
Binary files differ

diff --git a/native/models/textclassifier.nl.model b/native/models/textclassifier.nl.model
old mode 100644
new mode 100755
index 25a8f2e..bac8350
--- a/native/models/textclassifier.nl.model
+++ b/native/models/textclassifier.nl.model
Binary files differ

diff --git a/native/models/textclassifier.pl.model b/native/models/textclassifier.pl.model
old mode 100644
new mode 100755
index 6a3ee23..03b2825
--- a/native/models/textclassifier.pl.model
+++ b/native/models/textclassifier.pl.model
Binary files differ

diff --git a/native/models/textclassifier.pt.model b/native/models/textclassifier.pt.model
old mode 100644
new mode 100755
index 7a5c812..39f0b12
--- a/native/models/textclassifier.pt.model
+++ b/native/models/textclassifier.pt.model
Binary files differ

diff --git a/native/models/textclassifier.ru.model b/native/models/textclassifier.ru.model
old mode 100644
new mode 100755
index 6e6d83a..6d08044
--- a/native/models/textclassifier.ru.model
+++ b/native/models/textclassifier.ru.model
Binary files differ

diff --git a/native/models/textclassifier.th.model b/native/models/textclassifier.th.model
old mode 100644
new mode 100755
index 7be4c6a..5e0f9dd
--- a/native/models/textclassifier.th.model
+++ b/native/models/textclassifier.th.model
Binary files differ

diff --git a/native/models/textclassifier.tr.model b/native/models/textclassifier.tr.model
old mode 100644
new mode 100755
index b109331..2dbc1d8
--- a/native/models/textclassifier.tr.model
+++ b/native/models/textclassifier.tr.model
Binary files differ

diff --git a/native/models/textclassifier.universal.model b/native/models/textclassifier.universal.model
old mode 100644
new mode 100755
index 3c97edf..853e389
--- a/native/models/textclassifier.universal.model
+++ b/native/models/textclassifier.universal.model
Binary files differ

diff --git a/native/models/textclassifier.zh.model b/native/models/textclassifier.zh.model
old mode 100644
new mode 100755
index 55522fa..8d989d7
--- a/native/models/textclassifier.zh.model
+++ b/native/models/textclassifier.zh.model
Binary files differ

diff --git a/native/utils/base/arena.h b/native/utils/base/arena.h
index aec1950..28b6f6c 100644
--- a/native/utils/base/arena.h
+++ b/native/utils/base/arena.h

@@ -53,6 +53,7 @@
 
 #include <assert.h>
 #include <string.h>
+
 #include <vector>
 #ifdef ADDRESS_SANITIZER
 #include <sanitizer/asan_interface.h>
@@ -67,7 +68,7 @@
 // arena at the same time without locking, as long as they use only
 // const methods.
 class BaseArena {
- protected:         // You can't make an arena directly; only a subclass of one
+ protected:  // You can't make an arena directly; only a subclass of one
   BaseArena(char* first_block, const size_t block_size, bool align_to_page);
 
  public:
@@ -77,18 +78,17 @@
 
   // they're "slow" only 'cause they're virtual (subclasses define "fast" ones)
   virtual char* SlowAlloc(size_t size) = 0;
-  virtual void  SlowFree(void* memory, size_t size) = 0;
+  virtual void SlowFree(void* memory, size_t size) = 0;
   virtual char* SlowRealloc(char* memory, size_t old_size, size_t new_size) = 0;
 
   class Status {
    private:
     friend class BaseArena;
     size_t bytes_allocated_;
+
    public:
-    Status() : bytes_allocated_(0) { }
-    size_t bytes_allocated() const {
-      return bytes_allocated_;
-    }
+    Status() : bytes_allocated_(0) {}
+    size_t bytes_allocated() const { return bytes_allocated_; }
   };
 
   // Accessors and stats counters
@@ -96,8 +96,8 @@
   // type-compatible with ArenaAllocator (in arena_allocator.h).  That is,
   // we define arena() because ArenaAllocator does, and that way you
   // can template on either of these and know it's safe to call arena().
-  virtual BaseArena* arena()  { return this; }
-  size_t block_size() const   { return block_size_; }
+  virtual BaseArena* arena() { return this; }
+  size_t block_size() const { return block_size_; }
   int block_count() const;
   bool is_empty() const {
     // must check block count in case we allocated a block larger than blksize
@@ -105,15 +105,15 @@
   }
 
   // The alignment that ArenaAllocator uses except for 1-byte objects.
-  static const int kDefaultAlignment = 8;
+  static constexpr int kDefaultAlignment = 8;
 
  protected:
   bool SatisfyAlignment(const size_t alignment);
   void MakeNewBlock(const uint32 alignment);
   void* GetMemoryFallback(const size_t size, const int align);
   void* GetMemory(const size_t size, const int align) {
-    assert(remaining_ <= block_size_);          // an invariant
-    if ( size > 0 && size <= remaining_ && align == 1 ) {       // common case
+    assert(remaining_ <= block_size_);                   // an invariant
+    if (size > 0 && size <= remaining_ && align == 1) {  // common case
       last_alloc_ = freestart_;
       freestart_ += size;
       remaining_ -= size;
@@ -161,18 +161,18 @@
   const AllocatedBlock* IndexToBlock(int index) const;
 
   const size_t block_size_;
-  char* freestart_;         // beginning of the free space in most recent block
+  char* freestart_;  // beginning of the free space in most recent block
   char* freestart_when_empty_;  // beginning of the free space when we're empty
-  char* last_alloc_;         // used to make sure ReturnBytes() is safe
+  char* last_alloc_;            // used to make sure ReturnBytes() is safe
   // if the first_blocks_ aren't enough, expand into overflow_blocks_.
   std::vector<AllocatedBlock>* overflow_blocks_;
   // STL vector isn't as efficient as it could be, so we use an array at first
-  const bool first_block_externally_owned_;   // true if they pass in 1st block
+  const bool first_block_externally_owned_;  // true if they pass in 1st block
   const bool page_aligned_;  // when true, all blocks need to be page aligned
   int8_t blocks_alloced_;  // how many of the first_blocks_ have been allocated
-  AllocatedBlock first_blocks_[16];   // the length of this array is arbitrary
+  AllocatedBlock first_blocks_[16];  // the length of this array is arbitrary
 
-  void FreeBlocks();         // Frees all except first block
+  void FreeBlocks();  // Frees all except first block
 
   BaseArena(const BaseArena&) = delete;
   BaseArena& operator=(const BaseArena&) = delete;
@@ -182,18 +182,18 @@
  public:
   // Allocates a thread-compatible arena with the specified block size.
   explicit UnsafeArena(const size_t block_size)
-    : BaseArena(nullptr, block_size, false) { }
+      : BaseArena(nullptr, block_size, false) {}
   UnsafeArena(const size_t block_size, bool align)
-    : BaseArena(nullptr, block_size, align) { }
+      : BaseArena(nullptr, block_size, align) {}
 
   // Allocates a thread-compatible arena with the specified block
   // size. "first_block" must have size "block_size". Memory is
   // allocated from "first_block" until it is exhausted; after that
   // memory is allocated by allocating new blocks from the heap.
   UnsafeArena(char* first_block, const size_t block_size)
-    : BaseArena(first_block, block_size, false) { }
+      : BaseArena(first_block, block_size, false) {}
   UnsafeArena(char* first_block, const size_t block_size, bool align)
-    : BaseArena(first_block, block_size, align) { }
+      : BaseArena(first_block, block_size, align) {}
 
   char* Alloc(const size_t size) {
     return reinterpret_cast<char*>(GetMemory(size, 1));
@@ -201,6 +201,14 @@
   void* AllocAligned(const size_t size, const int align) {
     return GetMemory(size, align);
   }
+
+  // Allocates and initializes an object on the arena.
+  template <typename T, typename... Args>
+  T* AllocAndInit(Args... args) {
+    return new (reinterpret_cast<T*>(AllocAligned(sizeof(T), alignof(T))))
+        T(std::forward<Args>(args)...);
+  }
+
   char* Calloc(const size_t size) {
     void* return_value = Alloc(size);
     memset(return_value, 0, size);
@@ -214,9 +222,7 @@
   }
 
   // Free does nothing except for the last piece allocated.
-  void Free(void* memory, size_t size) {
-    ReturnMemory(memory, size);
-  }
+  void Free(void* memory, size_t size) { ReturnMemory(memory, size); }
   char* SlowAlloc(size_t size) override {  // "slow" 'cause it's virtual
     return Alloc(size);
   }
@@ -234,14 +240,12 @@
     return newstr;
   }
   char* MemdupPlusNUL(const char* s, size_t bytes) {  // like "string(s, len)"
-    char* newstr = Alloc(bytes+1);
+    char* newstr = Alloc(bytes + 1);
     memcpy(newstr, s, bytes);
     newstr[bytes] = '\0';
     return newstr;
   }
-  char* Strdup(const char* s) {
-    return Memdup(s, strlen(s) + 1);
-  }
+  char* Strdup(const char* s) { return Memdup(s, strlen(s) + 1); }
   // Unlike libc's strncpy, I always NUL-terminate.  libc's semantics are dumb.
   // This will allocate at most n+1 bytes (+1 is for the nul terminator).
   char* Strndup(const char* s, size_t n) {
@@ -261,8 +265,8 @@
   // If you know the new size is smaller (or equal), you don't need to know
   // oldsize.  We don't check that newsize is smaller, so you'd better be sure!
   char* Shrink(char* s, size_t newsize) {
-    AdjustLastAlloc(s, newsize);       // reclaim space if we can
-    return s;                          // never need to move if we go smaller
+    AdjustLastAlloc(s, newsize);  // reclaim space if we can
+    return s;                     // never need to move if we go smaller
   }
 
   // We make a copy so you can keep track of status at a given point in time

diff --git a/native/utils/base/arena_leakage_unittest.cc b/native/utils/base/arena_leakage_unittest.cc
new file mode 100644
index 0000000..642dacd
--- /dev/null
+++ b/native/utils/base/arena_leakage_unittest.cc

@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/arena.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+
+TEST(Arena, Leakage) {
+  UnsafeArena arena(32);
+  // Grab just 10 bytes.
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 32);
+  const char* block = arena.Alloc(10);
+  EXPECT_NE(block, nullptr);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 22);
+  // Grab the rest.
+  const char* expected_next_block = block + 10;
+  const char* next_block = arena.Alloc(22);
+  // If the below test fails, a new block has been allocated for "next_block".
+  // This means that the last 22 bytes of the previous block have been lost.
+  EXPECT_EQ(next_block, expected_next_block);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 0);
+  // Try allocating a 0 bytes block. Arena should remain unchanged.
+  const char* null_block = arena.Alloc(0);
+  EXPECT_EQ(null_block, nullptr);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 0);
+}
+
+}  //  namespace libtextclassifier3

diff --git a/native/utils/base/prefixvarint.cc b/native/utils/base/prefixvarint.cc
deleted file mode 100644
index 5febbc5..0000000
--- a/native/utils/base/prefixvarint.cc
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/base/prefixvarint.h"
-
-#include "utils/base/integral_types.h"
-
-namespace libtextclassifier3 {
-
-const int PrefixVarint::kMax32;
-const int PrefixVarint::kMax64;
-const int PrefixVarint::kSlopBytes;
-const int PrefixVarint::kEncode32SlopBytes;
-const int PrefixVarint::kEncode64SlopBytes;
-
-char* PrefixVarint::SafeEncode32(char* ptr, uint32 val) {
-  return SafeEncode32Inline(ptr, val);
-}
-
-char* PrefixVarint::SafeEncode64(char* ptr, uint64 val) {
-  return SafeEncode64Inline(ptr, val);
-}
-
-void PrefixVarint::Append32Slow(std::string* s, uint32 value) {
-  size_t start = s->size();
-  s->resize(start + PrefixVarint::Length32(value));
-  PrefixVarint::SafeEncode32(&((*s)[start]), value);
-}
-
-void PrefixVarint::Append64Slow(std::string* s, uint64 value) {
-  size_t start = s->size();
-  s->resize(start + PrefixVarint::Length64(value));
-  PrefixVarint::SafeEncode64(&((*s)[start]), value);
-}
-
-const char* PrefixVarint::Parse32Fallback(uint32 code, const char* ptr,
-                                          uint32* val) {
-  return Parse32FallbackInline(code, ptr, val);
-}
-
-const char* PrefixVarint::Parse64Fallback(uint64 code, const char* ptr,
-                                          uint64* val) {
-  return Parse64FallbackInline(code, ptr, val);
-}
-
-#if 0
-const PrefixVarint::CodeInfo PrefixVarint::code_info_[8] = {
-  {2, 0xff00}, {2, 0xff00},
-  {2, 0xff00}, {2, 0xff00},
-  {3, 0xffff00}, {3, 0xffff00},
-  {4, 0xffffff00}, {5, 0xffffff00}
-};
-#endif
-
-}  // namespace libtextclassifier3

diff --git a/native/utils/base/prefixvarint.h b/native/utils/base/prefixvarint.h
deleted file mode 100644
index f00e05e..0000000
--- a/native/utils/base/prefixvarint.h
+++ /dev/null

@@ -1,609 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// PrefixVarint is an integer encoding method that has the exact same
-// compression size as Varint, but is faster to decode because all of the
-// length information is encoded in the first byte.
-// On a Warp 19 it can parse up to 42% faster than Varint, for the distributions
-// tested below.
-// On an Ilium it can parse up to 37% faster than Varint.
-//
-// But there are a few caveats:
-// - This is fastest if both the encoder and decoder are little endian.
-//   Somewhat slower versions are provided for encoding and decoding on big
-//   endian machines.
-// - This doesn't support backwards decoding.
-//
-// The PrefixVarint encoding uses a unary code in the high bits of the first
-// byte to encode the total number of bytes, as follows:
-// - 32bit encoding:
-//     1 byte:  "0" + 7 value bits
-//     2 bytes: "10" + 6 value bits
-//     3 bytes: "110" + 5 value bits
-//     4 bytes: "1110" + 4 value bits
-//     5 bytes: "1111" + no value bits (value is in the next 4 bytes)
-//
-// - 64bit encoding:
-//     1 byte:  "0" + 7 value bits
-//     2 bytes: "10" + 6 value bits
-//     3 bytes: "110" + 5 value bits
-//     4 bytes: "1110" + 4 value bits
-//     5 bytes: "11110" + 3 value bits
-//     6 bytes: "111110" + 2 value bits
-//     7 bytes: "1111110" + 1 value bits
-//     8 bytes: "11111110" + no value bits (value is in the next 7 bytes)
-//     9 bytes: "11111111" + no value bits (value is in the next 8 bytes)
-//
-// Note that 32bit and 64bit PrefixVarint encoding are same for values between
-// 0 and (1<<28)-1 (i.e., upto 4 byte-encodable value).
-//
-// The following are benchmark results (in cycles per operation, so lower is
-// better) on randomly generated sequences of values whose encodings have the
-// given distribution of byte lengths.  The cycle counts include some overhead
-// (1-2 cycles) for the testing loop operation.
-//
-// UNIFORM 2^14 means the values are randomly generated in the range [0-2^14),
-// so the majority will require 2 bytes to encode.  MIXED 60:20:10:6:4, on the
-// other hand, means 60% of the values encode to 1 byte, 20% to 2 bytes, and
-// so on.  The MIXED 15:71:13:1.2:0.1 distribution simulates a power law with
-// median value of 1024.
-//
-// VI is Varint, PVI is PrefixVarint.  In both cases, Parse32Inline was used.
-//
-// Warp 19 (Opteron):
-//                            Encode     Parse       Skip
-// Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
-// UNIFORM 2^7              12.2  9.9   3.4  3.3   3.2  3.2
-// UNIFORM 2^14             18.2 14.0   8.8  6.0   5.4  6.4
-// UNIFORM 2^21             18.1 15.1  13.0  9.7   6.7  9.5
-// UNIFORM 2^28             18.9 14.9  15.4 12.1   9.8 10.7
-// UNIFORM 2^31             23.6 19.3  20.1 14.9  12.7 10.7
-// MIXED 50:50:0:0:0        19.4 19.8  15.0 12.7  11.8 12.6
-// MIXED 20:20:20:20:20     28.2 27.3  24.9 21.8  20.7 18.8
-// MIXED 60:20:10:6:4       23.5 23.3  29.7 17.3  16.7 16.3
-// MIXED 80:12:5:2:1        16.5 16.3  11.6  9.9   9.7  9.6
-// MIXED 90:7:2:1:0         12.9 12.9   8.2  6.2   6.1  6.1
-// MIXED 15:71:13:1.2:0.1   18.9 19.2  13.8 11.2  11.0 11.8
-//
-// Ilium:
-//                            Encode     Parse       Skip
-// Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
-// UNIFORM 2^7              10.2  8.7   3.1  3.1   2.9  2.1
-// UNIFORM 2^14             15.8 13.2   7.1  4.5   4.2  3.4
-// UNIFORM 2^21             15.6 14.1  10.1  6.6   5.4  5.7
-// UNIFORM 2^28             18.1 15.2  12.7  8.8   7.3  8.3
-// UNIFORM 2^31             21.8 16.5  17.9 13.3  13.9  8.1
-// MIXED 50:50:0:0:0        19.8 20.7  14.2 13.0  12.4 12.2
-// MIXED 20:20:20:20:20     29.8 30.1  27.7 24.3  22.7 20.2
-// MIXED 60:20:10:6:4       24.2 24.9  20.1 18.9  18.7 17.2
-// MIXED 80:12:5:2:1        16.3 16.6  12.0 11.6  11.3 10.7
-// MIXED 90:7:2:1:0         12.1 12.3   7.2  7.0   6.8  6.5
-// MIXED 15:71:13:1.2:0.1   19.2 20.1  14.2 13.1  12.5 12.0
-//
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
-#define LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
-
-#include <string>
-
-#include "utils/base/casts.h"
-#include "utils/base/endian.h"
-#include "utils/base/integral_types.h"
-#include "utils/base/unaligned_access.h"
-
-namespace libtextclassifier3 {
-
-class PrefixVarint {
- public:
-  // The max bytes used to encode a uint32:
-  static const int kMax32 = 5;
-  static const int kMax64 = 9;
-
-  // This decoder does not read past the encoded buffer.
-  static const int kSlopBytes = 0;
-
-  // Returns the number of bytes used to encode the given value:
-  static int Length32(uint32 val);
-  static int Length64(uint64 val);
-
-  // The Encode functions could reset up to the following bytes past the last
-  // encoded byte. Use the slower SafeEncode equivalent if you want the encode
-  // to not use any slop bytes.
-  static const int kEncode32SlopBytes = 1;
-  static const int kEncode64SlopBytes = 3;
-
-  // The safer version of the Encode functions, which don't need any slop bytes.
-  static char* SafeEncode32(char* ptr, uint32 val);
-  static char* SafeEncode64(char* ptr, uint64 val);
-  // Inlined version:
-  static char* SafeEncode32Inline(char* ptr, uint32 val);
-  static char* SafeEncode64Inline(char* ptr, uint64 val);
-
-  // Appends the encoded value to *s.
-  static void Append32(std::string* s, uint32 value);
-  static void Append64(std::string* s, uint64 value);
-
-  // Parses the next value in the ptr buffer and returns the pointer advanced
-  // past the end of the encoded value.
-  static const char* Parse32(const char* ptr, uint32* val);
-  static const char* Parse64(const char* ptr, uint64* val);
-  // Use this in time-critical code:
-  static const char* Parse32Inline(const char* ptr, uint32* val);
-  static const char* Parse64Inline(const char* ptr, uint64* val);
-
- private:
-  static const int kMin2Bytes = (1 << 7);
-  static const int kMin3Bytes = (1 << 14);
-  static const int kMin4Bytes = (1 << 21);
-  static const int kMin5Bytes = (1 << 28);
-  static const int64 kMin6Bytes = (1LL << 35);
-  static const int64 kMin7Bytes = (1LL << 42);
-  static const int64 kMin8Bytes = (1LL << 49);
-  static const int64 kMin9Bytes = (1LL << 56);
-
-  static void Append32Slow(std::string* s, uint32 value);
-  static void Append64Slow(std::string* s, uint64 value);
-  static const char* Parse32Fallback(uint32 code, const char* ptr, uint32* val);
-  static const char* Parse64Fallback(uint64 code, const char* ptr, uint64* val);
-  static const char* Parse32FallbackInline(uint32 code, const char* ptr,
-                                           uint32* val);
-  static const char* Parse64FallbackInline(uint64 code, const char* ptr,
-                                           uint64* val);
-
-  // Casting helpers to aid in making this code signed-char-clean.
-  static uint8* MakeUnsigned(char* p) { return bit_cast<uint8*>(p); }
-  static const uint8* MakeUnsigned(const char* p) {
-    return bit_cast<const uint8*>(p);
-  }
-};
-
-inline int PrefixVarint::Length32(uint32 val) {
-  if (val < kMin2Bytes) return 1;
-  if (val < kMin3Bytes) return 2;
-  if (val < kMin4Bytes) return 3;
-  if (val < kMin5Bytes) return 4;
-  return 5;
-}
-
-inline int PrefixVarint::Length64(uint64 val) {
-  if (val < kMin2Bytes) return 1;
-  if (val < kMin3Bytes) return 2;
-  if (val < kMin4Bytes) return 3;
-  if (val < kMin5Bytes) return 4;
-  if (val < kMin6Bytes) return 5;
-  if (val < kMin7Bytes) return 6;
-  if (val < kMin8Bytes) return 7;
-  if (val < kMin9Bytes) return 8;
-  return 9;
-}
-
-inline char* PrefixVarint::SafeEncode32Inline(char* p, uint32 val) {
-  uint8* const ptr = MakeUnsigned(p);
-  if (val < kMin2Bytes) {
-    ptr[0] = val;
-    return p + 1;
-  } else if (val < kMin3Bytes) {
-    val <<= 2;
-    uint8 low = val;
-    ptr[0] = (low >> 2) | 128;
-    ptr[1] = val >> 8;
-    return p + 2;
-  } else if (val < kMin4Bytes) {
-    val <<= 3;
-    uint8 low = val;
-    ptr[0] = (low >> 3) | 192;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    return p + 3;
-  } else if (val < kMin5Bytes) {
-    val <<= 4;
-    uint8 low = val;
-    ptr[0] = (low >> 4) | 224;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    return p + 4;
-  } else {
-    ptr[0] = 0xff;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    return p + 5;
-  }
-}
-
-inline char* PrefixVarint::SafeEncode64Inline(char* p, uint64 val) {
-  uint8* const ptr = MakeUnsigned(p);
-  if (val < kMin2Bytes) {
-    ptr[0] = val;
-    return p + 1;
-  } else if (val < kMin3Bytes) {
-    val <<= 2;
-    uint8 low = val;
-    ptr[0] = (low >> 2) | 128;
-    ptr[1] = val >> 8;
-    return p + 2;
-  } else if (val < kMin4Bytes) {
-    val <<= 3;
-    uint8 low = val;
-    ptr[0] = (low >> 3) | 192;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    return p + 3;
-  } else if (val < kMin5Bytes) {
-    val <<= 4;
-    uint8 low = val;
-    ptr[0] = (low >> 4) | 224;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    return p + 4;
-  } else if (val < kMin6Bytes) {
-    val <<= 5;
-    uint8 low = val;
-    ptr[0] = (low >> 5) | 240;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    return p + 5;
-  } else if (val < kMin7Bytes) {
-    val <<= 6;
-    uint8 low = val;
-    ptr[0] = (low >> 6) | 248;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    ptr[5] = val >> 40;
-    return p + 6;
-  } else if (val < kMin8Bytes) {
-    val <<= 7;
-    uint8 low = val;
-    ptr[0] = (low >> 7) | 252;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    ptr[5] = val >> 40;
-    ptr[6] = val >> 48;
-    return p + 7;
-  } else if (val < kMin9Bytes) {
-    ptr[0] = 254;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    ptr[5] = val >> 32;
-    ptr[6] = val >> 40;
-    ptr[7] = val >> 48;
-    return p + 8;
-  } else {
-    ptr[0] = 255;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    ptr[5] = val >> 32;
-    ptr[6] = val >> 40;
-    ptr[7] = val >> 48;
-    ptr[8] = val >> 56;
-    return p + 9;
-  }
-}
-
-inline void PrefixVarint::Append32(std::string* s, uint32 value) {
-  // Inline the fast-path for single-character output, but fall back to the .cc
-  // file for the full version. The size<capacity check is so the compiler can
-  // optimize out the string resize code.
-  if (value < kMin2Bytes && s->size() < s->capacity()) {
-    s->push_back(static_cast<unsigned char>(value));
-  } else {
-    Append32Slow(s, value);
-  }
-}
-
-inline void PrefixVarint::Append64(std::string* s, uint64 value) {
-  // Inline the fast-path for single-character output, but fall back to the .cc
-  // file for the full version. The size<capacity check is so the compiler can
-  // optimize out the string resize code.
-  if (value < kMin2Bytes && s->size() < s->capacity()) {
-    s->push_back(static_cast<unsigned char>(value));
-  } else {
-    Append64Slow(s, value);
-  }
-}
-
-#ifdef IS_LITTLE_ENDIAN
-
-inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32FallbackInline(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64FallbackInline(code, p, val);
-  }
-}
-
-// Only handles cases with 3-5 bytes
-inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
-                                                       const char* p,
-                                                       uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint32 v = TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint32 v = ptr[3];
-    v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else {
-    *val = TC3_UNALIGNED_LOAD32(ptr + 1);
-    return p + 5;
-  }
-}
-
-// Only handles cases with 3-9 bytes
-inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
-                                                       const char* p,
-                                                       uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint64 v = TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0x1fLLU) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint64 v = ptr[3];
-    v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0xfLLU) | (v << 4);
-    return p + 4;
-  } else if (code < 248) {
-    uint64 v = TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x7LLU) | (v << 3);
-    return p + 5;
-  } else if (code < 252) {
-    uint64 v = ptr[5];
-    v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x3LLU) | (v << 2);
-    return p + 6;
-  } else if (code < 254) {
-    uint64 v = TC3_UNALIGNED_LOAD16(ptr + 5);
-    v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x1LLU) | (v << 1);
-    return p + 7;
-  } else if (code < 255) {
-    uint64 v = TC3_UNALIGNED_LOAD64(ptr);
-    *val = v >> 8;
-    return p + 8;
-  } else {
-    *val = TC3_UNALIGNED_LOAD64(ptr + 1);
-    return p + 9;
-  }
-}
-
-#else  // IS_BIG_ENDIAN
-
-// This works on big-endian machines.  Performance is 1-16% slower, depending
-// on the data.
-inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32FallbackInline(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64FallbackInline(code, p, val);
-  }
-}
-
-// Only handles cases with 3-5 bytes
-inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
-                                                       const char* p,
-                                                       uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint32 v = ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint32 v = ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else {
-    uint32 v = ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 5;
-  }
-}
-
-// Only handles cases with 3-9 bytes
-inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
-                                                       const char* p,
-                                                       uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint64 v = ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint64 v = ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else if (code < 248) {
-    uint64 v = ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x7) | (v << 3);
-    return p + 5;
-  } else if (code < 252) {
-    uint64 v = ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x3) | (v << 2);
-    return p + 6;
-  } else if (code < 254) {
-    uint64 v = ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1) | (v << 1);
-    return p + 7;
-  } else if (code < 255) {
-    uint64 v = ptr[7];
-    v = (v << 8) | ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 8;
-  } else {
-    uint64 v = ptr[8];
-    v = (v << 8) | ptr[7];
-    v = (v << 8) | ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 9;
-  }
-}
-
-#endif  // IS_LITTLE_ENDIAN
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_

diff --git a/native/utils/base/status_test.cc b/native/utils/base/status_test.cc
new file mode 100644
index 0000000..82d5aad
--- /dev/null
+++ b/native/utils/base/status_test.cc

@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/status.h"
+
+#include "utils/base/logging.h"
+#include "utils/base/status_macros.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StatusTest, PrintsAbortedStatus) {
+  logging::LoggingStringStream stream;
+  stream << Status::UNKNOWN;
+  EXPECT_EQ(Status::UNKNOWN.error_code(), 2);
+  EXPECT_EQ(Status::UNKNOWN.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(Status::UNKNOWN.error_message(), "");
+  EXPECT_EQ(stream.message, "2");
+}
+
+TEST(StatusTest, PrintsOKStatus) {
+  logging::LoggingStringStream stream;
+  stream << Status::OK;
+  EXPECT_EQ(Status::OK.error_code(), 0);
+  EXPECT_EQ(Status::OK.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(Status::OK.error_message(), "");
+  EXPECT_EQ(stream.message, "0");
+}
+
+TEST(StatusTest, UnknownStatusHasRightAttributes) {
+  EXPECT_EQ(Status::UNKNOWN.error_code(), 2);
+  EXPECT_EQ(Status::UNKNOWN.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(Status::UNKNOWN.error_message(), "");
+}
+
+TEST(StatusTest, OkStatusHasRightAttributes) {
+  EXPECT_EQ(Status::OK.error_code(), 0);
+  EXPECT_EQ(Status::OK.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(Status::OK.error_message(), "");
+}
+
+TEST(StatusTest, CustomStatusHasRightAttributes) {
+  Status status(StatusCode::INVALID_ARGUMENT, "You can't put this here!");
+  EXPECT_EQ(status.error_code(), 3);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status.error_message(), "You can't put this here!");
+}
+
+TEST(StatusTest, AssignmentPreservesMembers) {
+  Status status(StatusCode::INVALID_ARGUMENT, "You can't put this here!");
+
+  Status status2 = status;
+
+  EXPECT_EQ(status2.error_code(), 3);
+  EXPECT_EQ(status2.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status2.error_message(), "You can't put this here!");
+}
+
+TEST(StatusTest, ReturnIfErrorOkStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // OK should allow execution to continue and the returned status should also
+  // be OK.
+  Status status = lambda(Status());
+  EXPECT_EQ(status.error_code(), 0);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(status.error_message(), "");
+  EXPECT_FALSE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorInvalidArgumentStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // INVALID_ARGUMENT should cause an early return.
+  Status invalid_arg_status(StatusCode::INVALID_ARGUMENT, "You can't do that!");
+  Status status = lambda(invalid_arg_status);
+  EXPECT_EQ(status.error_code(), 3);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status.error_message(), "You can't do that!");
+  EXPECT_TRUE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorUnknownStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // UNKNOWN should cause an early return.
+  Status unknown_status(StatusCode::UNKNOWN,
+                        "We also know there are known unknowns.");
+  libtextclassifier3::Status status = lambda(unknown_status);
+  EXPECT_EQ(status.error_code(), 2);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(status.error_message(), "We also know there are known unknowns.");
+  EXPECT_TRUE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorOnlyInvokesExpressionOnce) {
+  int num_invocations = 0;
+  auto ok_internal_expr = [&num_invocations]() {
+    ++num_invocations;
+    return Status::OK;
+  };
+  auto ok_lambda = [&ok_internal_expr]() {
+    TC3_RETURN_IF_ERROR(ok_internal_expr());
+    return Status::OK;
+  };
+
+  libtextclassifier3::Status status = ok_lambda();
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(num_invocations, 1);
+
+  num_invocations = 0;
+  auto error_internal_expr = [&num_invocations]() {
+    ++num_invocations;
+    return Status::UNKNOWN;
+  };
+  auto error_lambda = [&error_internal_expr]() {
+    TC3_RETURN_IF_ERROR(error_internal_expr());
+    return Status::OK;
+  };
+
+  status = error_lambda();
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(num_invocations, 1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/base/statusor_test.cc b/native/utils/base/statusor_test.cc
new file mode 100644
index 0000000..23165b0
--- /dev/null
+++ b/native/utils/base/statusor_test.cc

@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/statusor.h"
+
+#include "utils/base/logging.h"
+#include "utils/base/status.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StatusOrTest, DoesntDieWhenOK) {
+  StatusOr<std::string> status_or_string = std::string("Hello World");
+  EXPECT_TRUE(status_or_string.ok());
+  EXPECT_EQ(status_or_string.ValueOrDie(), "Hello World");
+}
+
+TEST(StatusOrTest, DiesWhenNotOK) {
+  StatusOr<std::string> status_or_string = {Status::UNKNOWN};
+  EXPECT_FALSE(status_or_string.ok());
+  // Android does not print the error message to stderr, so we are not checking
+  // the error message here.
+  EXPECT_DEATH(status_or_string.ValueOrDie(), "");
+}
+
+// Foo is NOT default constructible and can be implicitly converted to from int.
+class Foo {
+ public:
+  // Copy value conversion
+  Foo(int i) : i_(i) {}  // NOLINT
+  int i() const { return i_; }
+
+ private:
+  int i_;
+};
+
+TEST(StatusOrTest, HandlesNonDefaultConstructibleValues) {
+  StatusOr<Foo> foo_or(Foo(7));
+  EXPECT_TRUE(foo_or.ok());
+  EXPECT_EQ(foo_or.ValueOrDie().i(), 7);
+
+  StatusOr<Foo> error_or(Status::UNKNOWN);
+  EXPECT_FALSE(error_or.ok());
+  EXPECT_EQ(error_or.status().CanonicalCode(), StatusCode::UNKNOWN);
+}
+
+class Bar {
+ public:
+  // Move value conversion
+  Bar(Foo&& f) : i_(2 * f.i()) {}  // NOLINT
+
+  // Movable, but not copyable.
+  Bar(const Bar& other) = delete;
+  Bar& operator=(const Bar& rhs) = delete;
+  Bar(Bar&& other) = default;
+  Bar& operator=(Bar&& rhs) = default;
+
+  int i() const { return i_; }
+
+ private:
+  int i_;
+};
+
+TEST(StatusOrTest, HandlesValueConversion) {
+  // Copy value conversion constructor : StatusOr<Foo>(const int&)
+  StatusOr<Foo> foo_status(19);
+  EXPECT_TRUE(foo_status.ok());
+  EXPECT_EQ(foo_status.ValueOrDie().i(), 19);
+
+  // Move value conversion constructor : StatusOr<Bar>(Foo&&)
+  StatusOr<Bar> bar_status(std::move(foo_status));
+  EXPECT_TRUE(bar_status.ok());
+  EXPECT_EQ(bar_status.ValueOrDie().i(), 38);
+
+  StatusOr<int> int_status(19);
+  // Copy conversion constructor : StatusOr<Foo>(const StatusOr<int>&)
+  StatusOr<Foo> copied_status(int_status);
+  EXPECT_TRUE(copied_status.ok());
+  EXPECT_EQ(copied_status.ValueOrDie().i(), 19);
+
+  // Move conversion constructor : StatusOr<Bar>(StatusOr<Foo>&&)
+  StatusOr<Bar> moved_status(std::move(copied_status));
+  EXPECT_TRUE(moved_status.ok());
+  EXPECT_EQ(moved_status.ValueOrDie().i(), 38);
+
+  // Move conversion constructor with error : StatusOr<Bar>(StatusOr<Foo>&&)
+  StatusOr<Foo> error_status(Status::UNKNOWN);
+  StatusOr<Bar> moved_error_status(std::move(error_status));
+  EXPECT_FALSE(moved_error_status.ok());
+}
+
+struct OkFn {
+  StatusOr<int> operator()() { return 42; }
+};
+TEST(StatusOrTest, AssignOrReturnValOk) {
+  auto lambda = []() {
+    TC3_ASSIGN_OR_RETURN(int i, OkFn()(), -1);
+    return i;
+  };
+
+  // OkFn() should return a valid integer, so lambda should return that integer.
+  EXPECT_EQ(lambda(), 42);
+}
+
+struct FailFn {
+  StatusOr<int> operator()() { return Status::UNKNOWN; }
+};
+TEST(StatusOrTest, AssignOrReturnValError) {
+  auto lambda = []() {
+    TC3_ASSIGN_OR_RETURN(int i, FailFn()(), -1);
+    return i;
+  };
+
+  // FailFn() should return an error, so lambda should return -1.
+  EXPECT_EQ(lambda(), -1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/base/unaligned_access.h b/native/utils/base/unaligned_access.h
deleted file mode 100644
index 68fe207..0000000
--- a/native/utils/base/unaligned_access.h
+++ /dev/null

@@ -1,300 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_
-#define LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_
-
-#include <string.h>
-
-#include <cstdint>
-
-#include "utils/base/integral_types.h"
-#include "utils/base/macros.h"
-
-// unaligned APIs
-
-// Portable handling of unaligned loads, stores, and copies.
-// On some platforms, like ARM, the copy functions can be more efficient
-// then a load and a store.
-//
-// It is possible to implement all of these these using constant-length memcpy
-// calls, which is portable and will usually be inlined into simple loads and
-// stores if the architecture supports it. However, such inlining usually
-// happens in a pass that's quite late in compilation, which means the resulting
-// loads and stores cannot participate in many other optimizations, leading to
-// overall worse code.
-
-// The unaligned API is C++ only.  The declarations use C++ features
-// (namespaces, inline) which are absent or incompatible in C.
-#if defined(__cplusplus)
-
-#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\
-    defined(MEMORY_SANITIZER)
-// Consider we have an unaligned load/store of 4 bytes from address 0x...05.
-// AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
-// will miss a bug if 08 is the first unaddressable byte.
-// ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
-// miss a race between this access and some other accesses to 08.
-// MemorySanitizer will correctly propagate the shadow on unaligned stores
-// and correctly report bugs on unaligned loads, but it may not properly
-// update and report the origin of the uninitialized memory.
-// For all three tools, replacing an unaligned access with a tool-specific
-// callback solves the problem.
-
-// Make sure uint16_t/uint32_t/uint64_t are defined.
-#include <stdint.h>
-
-extern "C" {
-uint16_t __sanitizer_unaligned_load16(const void *p);
-uint32_t __sanitizer_unaligned_load32(const void *p);
-uint64_t __sanitizer_unaligned_load64(const void *p);
-void __sanitizer_unaligned_store16(void *p, uint16_t v);
-void __sanitizer_unaligned_store32(void *p, uint32_t v);
-void __sanitizer_unaligned_store64(void *p, uint64_t v);
-}  // extern "C"
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  return __sanitizer_unaligned_load16(p);
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  return __sanitizer_unaligned_load32(p);
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  return __sanitizer_unaligned_load64(p);
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) {
-  __sanitizer_unaligned_store16(p, v);
-}
-
-inline void UnalignedStore32(void *p, uint32_t v) {
-  __sanitizer_unaligned_store32(p, v);
-}
-
-inline void UnalignedStore64(void *p, uint64 v) {
-  __sanitizer_unaligned_store64(p, v);
-}
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) \
-  (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#elif defined(UNDEFINED_BEHAVIOR_SANITIZER)
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  uint16_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  uint32_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
-    defined(_M_IX86) || defined(__ppc__) || defined(__PPC__) ||    \
-    defined(__ppc64__) || defined(__PPC64__)
-
-// x86 and x86-64 can perform unaligned loads/stores directly;
-// modern PowerPC hardware can also do unaligned integer loads and stores;
-// but note: the FPU still sends unaligned loads and stores to a trap handler!
-
-#define TC3_UNALIGNED_LOAD16(_p) \
-  (*reinterpret_cast<const uint16_t *>(_p))
-#define TC3_UNALIGNED_LOAD32(_p) \
-  (*reinterpret_cast<const uint32_t *>(_p))
-#define TC3_UNALIGNED_LOAD64(_p) \
-  (*reinterpret_cast<const uint64 *>(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (*reinterpret_cast<uint16_t *>(_p) = (_val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (*reinterpret_cast<uint64 *>(_p) = (_val))
-
-#elif defined(__arm__) && \
-      !defined(__ARM_ARCH_5__) && \
-      !defined(__ARM_ARCH_5T__) && \
-      !defined(__ARM_ARCH_5TE__) && \
-      !defined(__ARM_ARCH_5TEJ__) && \
-      !defined(__ARM_ARCH_6__) && \
-      !defined(__ARM_ARCH_6J__) && \
-      !defined(__ARM_ARCH_6K__) && \
-      !defined(__ARM_ARCH_6Z__) && \
-      !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
-
-
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// "ARMv7 or higher", so we have to filter away all ARMv5 and ARMv6
-// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
-// so in time, maybe we can move on to that.
-//
-// This is a mess, but there's not much we can do about it.
-//
-// To further complicate matters, only LDR instructions (single reads) are
-// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
-// explicitly tell the compiler that these accesses can be unaligned, it can and
-// will combine accesses. On armcc, the way to signal this is done by accessing
-// through the type (uint32_t __packed *), but GCC has no such attribute
-// (it ignores __attribute__((packed)) on individual variables). However,
-// we can tell it that a _struct_ is unaligned, which has the same effect,
-// so we do that.
-
-namespace libtextclassifier3 {
-
-struct Unaligned16Struct {
-  uint16_t value;
-  uint8_t dummy;  // To make the size non-power-of-two.
-} TC3_ATTRIBUTE_PACKED;
-
-struct Unaligned32Struct {
-  uint32_t value;
-  uint8_t dummy;  // To make the size non-power-of-two.
-} TC3_ATTRIBUTE_PACKED;
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p)                                  \
-  ((reinterpret_cast<const ::libtextclassifier3::Unaligned16Struct *>(_p)) \
-       ->value)
-#define TC3_UNALIGNED_LOAD32(_p)                                  \
-  ((reinterpret_cast<const ::libtextclassifier3::Unaligned32Struct *>(_p)) \
-       ->value)
-
-#define TC3_UNALIGNED_STORE16(_p, _val)                      \
-  ((reinterpret_cast< ::libtextclassifier3::Unaligned16Struct *>(_p)) \
-       ->value = (_val))
-#define TC3_UNALIGNED_STORE32(_p, _val)                      \
-  ((reinterpret_cast< ::libtextclassifier3::Unaligned32Struct *>(_p)) \
-       ->value = (_val))
-
-namespace libtextclassifier3 {
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#else
-
-// TC3_NEED_ALIGNED_LOADS is defined when the underlying platform
-// doesn't support unaligned access.
-#define TC3_NEED_ALIGNED_LOADS
-
-// These functions are provided for architectures that don't support
-// unaligned loads and stores.
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  uint16_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  uint32_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#endif
-
-#endif  // defined(__cplusplus), end of unaligned API
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_

diff --git a/native/utils/bitmap/bitmap.h b/native/utils/bitmap/bitmap.h
deleted file mode 100644
index 6eb9dff..0000000
--- a/native/utils/bitmap/bitmap.h
+++ /dev/null

@@ -1,536 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_
-#define LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_
-
-#include <algorithm>
-#include <climits>
-#include <ostream>
-
-#include "utils/base/integral_types.h"
-#include "utils/base/logging.h"
-
-namespace libtextclassifier3 {
-
-template <typename W>
-void SetBit(W* map, size_t index, bool value) {
-  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
-  // This is written in such a way that our current compiler generates
-  // a conditional move instead of a conditional branch, which is data
-  // dependent and unpredictable.  Branch mis-prediction is much more
-  // expensive than cost of a conditional move.
-  const W bit = W{1} << (index & (kIntBits - 1));
-  const W old_value = map[index / kIntBits];
-  const W new_value = value ? old_value | bit : old_value & ~bit;
-  map[index / kIntBits] = new_value;
-}
-
-template <typename W>
-bool GetBit(const W* map, size_t index) {
-  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
-  return map[index / kIntBits] & (W{1} << (index & (kIntBits - 1)));
-}
-
-namespace internal {
-template <typename W>
-class BasicBitmap {
- public:
-  using size_type = size_t;
-  using Word = W;  // packed bit internal storage type.
-
-  // Allocates a new bitmap with size bits set to the value fill.
-  BasicBitmap(size_type size, bool fill) : size_(size), alloc_(true) {
-    map_ = std::allocator<Word>().allocate(array_size());
-    SetAll(fill);
-  }
-
-  explicit BasicBitmap(size_type size) : BasicBitmap(size, false) {}
-
-  // Borrows a reference to a region of memory that is the caller's
-  // responsibility to manage for the life of the Bitmap. The map is expected
-  // to have enough memory to store size bits.
-  BasicBitmap(Word* map, size_type size)
-      : map_(map), size_(size), alloc_(false) {}
-
-  // Default constructor: creates a bitmap with zero bits.
-  BasicBitmap() : size_(0), alloc_(true) {
-    map_ = std::allocator<Word>().allocate(array_size());
-  }
-
-  BasicBitmap(const BasicBitmap& src);
-
-  // Assigns this Bitmap to the values of the src Bitmap.
-  // This includes pointing to the same underlying map_ if the src Bitmap
-  // does not allocate its own.
-  BasicBitmap& operator=(const BasicBitmap& src);
-
-  // Destructor : clean up if we allocated
-  ~BasicBitmap() {
-    if (alloc_) {
-      std::allocator<Word>().deallocate(map_, array_size());
-    }
-  }
-
-  // Resizes the bitmap.
-  // If size < bits(), the extra bits will be discarded.
-  // If size > bits(), the extra bits will be filled with the fill value.
-  void Resize(size_type size, bool fill = false);
-
-  // ACCESSORS
-  size_type bits() const { return size_; }
-  size_type array_size() const { return RequiredArraySize(bits()); }
-
-  // Gets an entry of the internal map. Requires array_index < array_size()
-  Word GetMapElement(size_type array_index) const {
-    CHECK_LT(array_index, array_size());
-    return map_[array_index];
-  }
-
-  // Gets an entry of the internal map. Requires array_index < array_size()
-  // Also performs masking to insure no bits >= bits().
-  Word GetMaskedMapElement(size_type array_index) const {
-    return (array_index == array_size() - 1)
-               ? map_[array_size() - 1] & HighOrderMapElementMask()
-               : map_[array_index];
-  }
-
-  // Sets an element of the internal map. Requires array_index < array_size()
-  void SetMapElement(size_type array_index, Word value) {
-    CHECK_LT(array_index, array_size());
-    map_[array_index] = value;
-  }
-
-  // The highest order element in map_ will have some meaningless bits
-  // (with undefined values) if bits() is not a multiple of
-  // kIntBits. If you & HighOrderMapElementMask with the high order
-  // element, you will be left with only the valid, defined bits (the
-  // others will be 0)
-  Word HighOrderMapElementMask() const {
-    return (size_ == 0) ? 0 : (~W{0}) >> (-size_ & (kIntBits - 1));
-  }
-
-  bool Get(size_type index) const {
-    TC3_DCHECK_LT(index, size_);
-    return GetBit(map_, index);
-  }
-
-  // Returns true if all bits are unset
-  bool IsAllZeroes() const {
-    return std::all_of(map_, map_ + array_size() - 1,
-                       [](Word w) { return w == W{0}; }) &&
-           (map_[array_size() - 1] & HighOrderMapElementMask()) == W{0};
-  }
-
-  // Returns true if all bits are set
-  bool IsAllOnes() const {
-    return std::all_of(map_, map_ + array_size() - 1,
-                       [](Word w) { return w == ~W{0}; }) &&
-           ((~map_[array_size() - 1]) & HighOrderMapElementMask()) == W{0};
-  }
-
-  void Set(size_type index, bool value) {
-    TC3_DCHECK_LT(index, size_);
-    SetBit(map_, index, value);
-  }
-
-  void Toggle(size_type index) {
-    TC3_DCHECK_LT(index, size_);
-    map_[index / kIntBits] ^= (W{1} << (index & (kIntBits - 1)));
-  }
-
-  // Sets all the bits to true or false
-  void SetAll(bool value) {
-    std::fill(map_, map_ + array_size(), value ? ~W{0} : W{0});
-  }
-
-  // Clears all bits in the bitmap
-  void Clear() { SetAll(false); }
-
-  // Sets a range of bits (begin inclusive, end exclusive) to true or false
-  void SetRange(size_type begin, size_type end, bool value);
-
-  // Sets "this" to be the union of "this" and "other". The bitmaps do
-  // not have to be the same size. If other is smaller, all the higher
-  // order bits are assumed to be 0. The size of "this" is never
-  // changed by this operation (higher order bits in other are
-  // ignored). Note this make Union *not* commutative -- it matters
-  // which Bitmap is this and which is other
-  void Union(const BasicBitmap& other);
-
-  // Sets "this" to be the intersection of "this" and "other". The
-  // bitmaps do not have to be the same size. If other is smaller, all
-  // the higher order bits are assumed to be 0. The size of this is
-  // never changed by this operation (higher order bits in other are
-  // ignored)
-  void Intersection(const BasicBitmap& other);
-
-  // Returns true if "this" and "other" have any bits set in common.
-  bool IsIntersectionNonEmpty(const BasicBitmap& other) const;
-
-  // Sets "this" to be the "~" (Complement) of "this".
-  void Complement() {
-    std::transform(map_, map_ + array_size(), map_, [](Word w) { return ~w; });
-  }
-
-  // Sets "this" to be the set of bits in "this" but not in "other"
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  void Difference(const BasicBitmap& other) {
-    TC3_CHECK_EQ(bits(), other.bits());
-    std::transform(map_, map_ + array_size(), other.map_, map_,
-                   [](Word a, Word b) { return a & ~b; });
-  }
-
-  // Sets "this" to be the set of bits which is set in either "this" or "other",
-  // but not both.
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  void ExclusiveOr(const BasicBitmap& other) {
-    TC3_CHECK_EQ(bits(), other.bits());
-    std::transform(map_, map_ + array_size(), other.map_, map_,
-                   [](Word a, Word b) { return a ^ b; });
-  }
-
-  // Return true if any bit between begin inclusive and end exclusive
-  // is set.  0 <= begin <= end <= bits() is required.
-  bool TestRange(size_type begin, size_type end) const;
-
-  // Return true if both Bitmaps are of equal length and have the same
-  // value.
-  bool IsEqual(const BasicBitmap& other) const {
-    return (bits() == other.bits()) &&
-           ((array_size() < 1) ||
-            std::equal(map_, map_ + array_size() - 1, other.map_)) &&
-           ((HighOrderMapElementMask() & other.map_[array_size() - 1]) ==
-            (HighOrderMapElementMask() & map_[array_size() - 1]));
-  }
-
-  // Return true is this bitmap is a subset of another bitmap in terms of
-  // the positions of 1s. That is, 0110 is a subset of 1110.
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  bool IsSubsetOf(const BasicBitmap& other) const;
-
-  // Returns 0 if the two bitmaps are equal.  Returns a negative number if the
-  // this bitmap is less than other, and a positive number otherwise.
-  //
-  // The relation we use is the natural relation defined by assigning an integer
-  // to each bitmap:
-  //
-  // int(bitmap) = b_0 + 2 * b_1 + ... + 2^k * b_k
-  //
-  // Then for our comparison function:
-  //
-  // if int(b1) != int(b2), then b1 is less than b2 if int(b1) < int(b2),
-  // and b2 is less than b1 otherwise.
-  //
-  // if int(b1) == int(b2), then we compare the numbers of bits in b1 and b2.
-  // If b1 has strictly fewer bits, then b1 is less than b2 (same for b2).
-  // If b1 and b2 have the same number of bits, then they are equal and we
-  // return 0.
-  int CompareTo(const BasicBitmap& other) const;
-
-  // return number of allocated words required for a bitmap of size num_bits
-  // minimum size is 1
-  static constexpr size_t RequiredArraySize(size_type num_bits) {
-    return num_bits == 0 ? 1 : (num_bits - 1) / kIntBits + 1;
-  }
-
- private:
-  // The same semantics as CompareTo, except that we have the invariant that
-  // first has at least as many bits as second.
-  static int CompareToHelper(const BasicBitmap& first,
-                             const BasicBitmap& second);
-
-  static constexpr unsigned Log2(unsigned n, unsigned p = 0) {
-    return (n <= 1) ? p : Log2(n / 2, p + 1);
-  }
-
-  // NOTE: we make assumptions throughout the code that kIntBits is a power of
-  // 2, so that we can use shift and mask instead of division and modulo.
-  static constexpr int kIntBits = CHAR_BIT * sizeof(Word);  // bits in a Word
-  static constexpr int kLogIntBits = Log2(kIntBits, 0);
-  Word* map_;       // the bitmap
-  size_type size_;  // the upper bound of the bitmap
-  bool alloc_;      // whether or not *we* allocated the memory
-};
-}  // namespace internal
-
-
-class Bitmap : public libtextclassifier3::internal::BasicBitmap<uint32> {
- public:
-  using internal::BasicBitmap<uint32>::BasicBitmap;
-};
-
-namespace internal {
-template <typename W>
-BasicBitmap<W>::BasicBitmap(const BasicBitmap& src)
-    : size_(src.size_), alloc_(src.alloc_) {
-  static_assert(((kIntBits & (kIntBits - 1)) == 0), "kIntBits not power of 2");
-  if (alloc_) {
-    map_ = std::allocator<Word>().allocate(array_size());
-    std::copy(src.map_, src.map_ + array_size(), map_);
-  } else {
-    map_ = src.map_;
-  }
-}
-
-template <typename W>
-void BasicBitmap<W>::Resize(size_type size, bool fill) {
-  const size_type old_size = size_;
-  const size_t new_array_size = RequiredArraySize(size);
-  if (new_array_size != array_size()) {
-    Word* new_map = std::allocator<Word>().allocate(new_array_size);
-    std::copy(map_, map_ + std::min<size_t>(new_array_size, array_size()),
-              new_map);
-    if (alloc_) {
-      std::allocator<Word>().deallocate(map_, array_size());
-    }
-    map_ = new_map;
-    alloc_ = true;
-  }
-  size_ = size;
-  if (old_size < size_) {
-    SetRange(old_size, size_, fill);
-  }
-}
-
-template <typename W>
-BasicBitmap<W>& BasicBitmap<W>::operator=(const BasicBitmap<W>& src) {
-  if (this != &src) {
-    if (alloc_ && array_size() != src.array_size()) {
-      std::allocator<Word>().deallocate(map_, array_size());
-      map_ = std::allocator<Word>().allocate(src.array_size());
-    }
-    size_ = src.size_;
-    if (src.alloc_) {
-      if (!alloc_) {
-        map_ = std::allocator<Word>().allocate(src.array_size());
-      }
-      std::copy(src.map_, src.map_ + src.array_size(), map_);
-      alloc_ = true;
-    } else {
-      if (alloc_) {
-        std::allocator<Word>().deallocate(map_, array_size());
-      }
-      map_ = src.map_;
-      alloc_ = false;
-    }
-  }
-  return *this;
-}
-
-// Return true if any bit between begin inclusive and end exclusive
-// is set.  0 <= begin <= end <= bits() is required.
-template <typename W>
-bool BasicBitmap<W>::TestRange(size_type begin, size_type end) const {
-  // Return false immediately if the range is empty.
-  if (begin == end) {
-    return false;
-  }
-  // Calculate the indices of the words containing the first and last bits,
-  // along with the positions of the bits within those words.
-  size_t i = begin / kIntBits;
-  size_t j = begin & (kIntBits - 1);
-  size_t ilast = (end - 1) / kIntBits;
-  size_t jlast = (end - 1) & (kIntBits - 1);
-  // If the range spans multiple words, discard the extraneous bits of the
-  // first word by shifting to the right, and then test the remaining bits.
-  if (i < ilast) {
-    if (map_[i++] >> j) {
-      return true;
-    }
-    j = 0;
-
-    // Test each of the "middle" words that lies completely within the range.
-    while (i < ilast) {
-      if (map_[i++]) {
-        return true;
-      }
-    }
-  }
-
-  // Test the portion of the last word that lies within the range. (This logic
-  // also handles the case where the entire range lies within a single word.)
-  const Word mask = (((W{1} << 1) << (jlast - j)) - 1) << j;
-  return (map_[ilast] & mask) != W{0};
-}
-
-template <typename W>
-bool BasicBitmap<W>::IsSubsetOf(const BasicBitmap& other) const {
-  TC3_CHECK_EQ(bits(), other.bits());
-  Word* mp = map_;
-  Word* endp = mp + array_size() - 1;
-  Word* op = other.map_;
-  // A is a subset of B if A - B = {}, that is A & ~B = {}
-  for (; mp != endp; ++mp, ++op)
-    if (*mp & ~*op) return false;
-  return (*mp & ~*op & HighOrderMapElementMask()) == W{0};
-}
-
-// Same semantics as CompareTo, except that we have the invariant that first
-// has at least as many bits as second.
-template <typename W>
-int BasicBitmap<W>::CompareToHelper(const BasicBitmap<W>& first,
-                                    const BasicBitmap<W>& second) {
-  // Checks if the high order bits in first that are not in second are set.  If
-  // any of these are set, then first is greater than second, and we return a
-  // positive value.
-  if (first.TestRange(second.bits(), first.bits())) {
-    return 1;
-  }
-
-  // We use unsigned integer comparison to compare the bitmaps.  We need to
-  // handle the high order bits in a special case (since there may be undefined
-  // bits for the element representing the highest order bits) and then we
-  // can do direct integer comparison.
-  size_t index = second.array_size() - 1;
-  Word left = first.map_[index] & second.HighOrderMapElementMask();
-  Word right = second.map_[index] & second.HighOrderMapElementMask();
-  if (left != right) {
-    return left < right ? -1 : 1;
-  }
-  while (index > 0) {
-    --index;
-    left = first.map_[index];
-    right = second.map_[index];
-    if (left != right) {
-      return left < right ? -1 : 1;
-    }
-  }
-  // Now we have reached the end, all common bits are equal, and all bits that
-  // are only in the longer list are 0.  We return 1 if the first bitmap is
-  // strictly larger, and 0 if the bitmaps are of equal size.
-  if (first.bits() == second.bits()) {
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-template <typename W>
-int BasicBitmap<W>::CompareTo(const BasicBitmap<W>& other) const {
-  if (bits() > other.bits()) {
-    return CompareToHelper(*this, other);
-  } else {
-    return -CompareToHelper(other, *this);
-  }
-}
-
-// Note that bits > size end up in undefined states when sizes
-// aren't equal, but that's okay.
-template <typename W>
-void BasicBitmap<W>::Union(const BasicBitmap<W>& other) {
-  const size_t this_array_size = array_size();
-  const size_t other_array_size = other.array_size();
-  const size_t min_array_size = std::min(this_array_size, other_array_size);
-  if (min_array_size == 0) {
-    // Nothing to do.
-    return;
-  }
-  // Perform bitwise OR of all but the last common word.
-  const size_t last = min_array_size - 1;
-  std::transform(map_, map_ + last, other.map_, map_,
-                 [](Word a, Word b) { return a | b; });
-  // Perform bitwise OR of the last common word, applying mask if necessary.
-  map_[last] |= other_array_size == min_array_size
-                    ? other.map_[last] & other.HighOrderMapElementMask()
-                    : other.map_[last];
-}
-
-// Note that bits > size end up in undefined states when sizes
-// aren't equal, but that's okay.
-template <typename W>
-void BasicBitmap<W>::Intersection(const BasicBitmap<W>& other) {
-  const size_t this_array_size = array_size();
-  const size_t other_array_size = other.array_size();
-  const size_t min_array_size = std::min(this_array_size, other_array_size);
-  // Perform bitwise AND of all common words.
-  std::transform(map_, map_ + min_array_size, other.map_, map_,
-                 [](Word a, Word b) { return a & b; });
-  if (other_array_size == min_array_size) {
-    // Zero out bits that are outside the range of 'other'.
-    if (other_array_size != 0) {
-      map_[other_array_size - 1] &= other.HighOrderMapElementMask();
-    }
-    std::fill(map_ + other_array_size, map_ + this_array_size, 0);
-  }
-}
-
-template <typename W>
-bool BasicBitmap<W>::IsIntersectionNonEmpty(const BasicBitmap<W>& other) const {
-  // First check fully overlapping bytes.
-  size_t max_overlap = std::min(array_size(), other.array_size()) - 1;
-  for (size_t i = 0; i < max_overlap; ++i) {
-    if (map_[i] & other.map_[i]) return true;
-  }
-
-  // Now check the highest overlapping byte, applying bit masks as necessary.
-  Word high_byte = map_[max_overlap] & other.map_[max_overlap];
-
-  if (other.array_size() > array_size())
-    return high_byte & HighOrderMapElementMask();
-  else if (array_size() > other.array_size())
-    return high_byte & other.HighOrderMapElementMask();
-
-  // Same array_size, apply both masks.
-  return high_byte & HighOrderMapElementMask() &
-         other.HighOrderMapElementMask();
-}
-
-/*static*/
-template <typename W>
-void BasicBitmap<W>::SetRange(size_type begin, size_type end, bool value) {
-  if (begin == end) return;
-  // Figure out which element(s) in the map_ array are affected
-  // by this op.
-  const size_type begin_element = begin / kIntBits;
-  const size_type begin_bit = begin % kIntBits;
-  const size_type end_element = end / kIntBits;
-  const size_type end_bit = end % kIntBits;
-  Word initial_mask = ~W{0} << begin_bit;
-  if (end_element == begin_element) {
-    // The range is contained in a single element of the array, so
-    // adjust both ends of the mask.
-    initial_mask = initial_mask & (~W{0} >> (kIntBits - end_bit));
-  }
-  if (value) {
-    map_[begin_element] |= initial_mask;
-  } else {
-    map_[begin_element] &= ~initial_mask;
-  }
-  if (end_element != begin_element) {
-    // Set all the bits in the array elements between the begin
-    // and end elements.
-    std::fill(map_ + begin_element + 1, map_ + end_element,
-              value ? ~W{0} : W{0});
-
-    // Update the appropriate bit-range in the last element.
-    // Note end_bit is an exclusive bound, so if it's 0 none of the
-    // bits in end_element are contained in the range (and we don't
-    // have to modify it).
-    if (end_bit != 0) {
-      const Word final_mask = ~W{0} >> (kIntBits - end_bit);
-      if (value) {
-        map_[end_element] |= final_mask;
-      } else {
-        map_[end_element] &= ~final_mask;
-      }
-    }
-  }
-}
-}  // namespace internal
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_

diff --git a/native/utils/calendar/calendar-common.h b/native/utils/calendar/calendar-common.h
index 1f5b128..e6fd076 100644
--- a/native/utils/calendar/calendar-common.h
+++ b/native/utils/calendar/calendar-common.h

@@ -100,14 +100,14 @@
   *granularity = GetGranularity(parse_data);
 
   // Apply each of the parsed fields in order of increasing granularity.
-  static const int64 kMillisInHour = 1000 * 60 * 60;
+  static const int64 kMillisInMinute = 1000 * 60;
   if (parse_data.HasFieldType(DatetimeComponent::ComponentType::ZONE_OFFSET)) {
     int zone_offset;
     parse_data.GetFieldValue(DatetimeComponent::ComponentType::ZONE_OFFSET,
                              &zone_offset);
-    TC3_CALENDAR_CHECK(calendar->SetZoneOffset(zone_offset * kMillisInHour))
+    TC3_CALENDAR_CHECK(calendar->SetZoneOffset(zone_offset * kMillisInMinute))
   }
-
+  static const int64 kMillisInHour = 1000 * 60 * 60;
   if (parse_data.HasFieldType(DatetimeComponent::ComponentType::DST_OFFSET)) {
     int dst_offset;
     if (parse_data.GetFieldValue(DatetimeComponent::ComponentType::DST_OFFSET,
@@ -229,7 +229,7 @@
     case DatetimeComponent::RelativeQualifier::PAST:
       TC3_CALENDAR_CHECK(
           AdjustByRelation(relative_date_time_component,
-                           -relative_date_time_component.relative_count,
+                           relative_date_time_component.relative_count,
                            /*allow_today=*/false, calendar))
       return true;
     case DatetimeComponent::RelativeQualifier::FUTURE:

diff --git a/native/utils/checksum_test.cc b/native/utils/checksum_test.cc
new file mode 100644
index 0000000..dd04956
--- /dev/null
+++ b/native/utils/checksum_test.cc

@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/checksum.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(LuhnTest, CorrectlyHandlesSimpleCases) {
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0"));
+  EXPECT_FALSE(VerifyLuhnChecksum("1"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0A"));
+}
+
+TEST(LuhnTest, CorrectlyVerifiesPaymentCardNumbers) {
+  // Fake test numbers.
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_TRUE(VerifyLuhnChecksum("371449635398431"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5610591081018250"));
+  EXPECT_TRUE(VerifyLuhnChecksum("38520000023237"));
+  EXPECT_TRUE(VerifyLuhnChecksum("6011000990139424"));
+  EXPECT_TRUE(VerifyLuhnChecksum("3566002020360505"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5105105105105100"));
+  EXPECT_TRUE(VerifyLuhnChecksum("4012 8888 8888 1881"));
+}
+
+TEST(LuhnTest, HandlesWhitespace) {
+  EXPECT_TRUE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/false));
+}
+
+TEST(LuhnTest, HandlesEdgeCases) {
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/false));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/false));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/container/sorted-strings-table_test.cc b/native/utils/container/sorted-strings-table_test.cc
new file mode 100644
index 0000000..a93b197
--- /dev/null
+++ b/native/utils/container/sorted-strings-table_test.cc

@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/container/sorted-strings-table.h"
+
+#include <vector>
+
+#include "utils/base/integral_types.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(SortedStringsTest, Lookup) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+
+  SortedStringsTable table(/*num_pieces=*/4, offsets, StringPiece(pieces, 18),
+                           /*use_linear_scan_threshold=*/1);
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("hello there", &matches));
+    EXPECT_EQ(matches.size(), 2);
+    EXPECT_EQ(matches[0].id, 0 /*hell*/);
+    EXPECT_EQ(matches[0].match_length, 4 /*hell*/);
+    EXPECT_EQ(matches[1].id, 1 /*hello*/);
+    EXPECT_EQ(matches[1].match_length, 5 /*hello*/);
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("he", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("he", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("abcd", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("hi there", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches(StringPiece("\0", 1), &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(
+        table.FindAllPrefixMatches(StringPiece("\xff, \xfe", 2), &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("hella there", &match));
+    EXPECT_EQ(match.id, 0 /*hell*/);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("hello there", &match));
+    EXPECT_EQ(match.id, 1 /*hello*/);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("abcd", &match));
+    EXPECT_EQ(match.id, -1);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("", &match));
+    EXPECT_EQ(match.id, -1);
+  }
+
+  {
+    int value;
+    EXPECT_TRUE(table.Find("hell", &value));
+    EXPECT_EQ(value, 0);
+  }
+
+  {
+    int value;
+    EXPECT_FALSE(table.Find("hella", &value));
+  }
+
+  {
+    int value;
+    EXPECT_TRUE(table.Find("hello", &value));
+    EXPECT_EQ(value, 1 /*hello*/);
+  }
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/flatbuffers.cc b/native/utils/flatbuffers.cc
index 73ce0cc..cf4c97f 100644
--- a/native/utils/flatbuffers.cc
+++ b/native/utils/flatbuffers.cc

@@ -24,49 +24,6 @@
 
 namespace libtextclassifier3 {
 namespace {
-bool CreateRepeatedField(const reflection::Schema* schema,
-                         const reflection::Type* type,
-                         std::unique_ptr<RepeatedField>* repeated_field) {
-  switch (type->element()) {
-    case reflection::Bool:
-      repeated_field->reset(new TypedRepeatedField<bool>);
-      return true;
-    case reflection::Byte:
-      repeated_field->reset(new TypedRepeatedField<char>);
-      return true;
-    case reflection::UByte:
-      repeated_field->reset(new TypedRepeatedField<unsigned char>);
-      return true;
-    case reflection::Int:
-      repeated_field->reset(new TypedRepeatedField<int>);
-      return true;
-    case reflection::UInt:
-      repeated_field->reset(new TypedRepeatedField<uint>);
-      return true;
-    case reflection::Long:
-      repeated_field->reset(new TypedRepeatedField<int64>);
-      return true;
-    case reflection::ULong:
-      repeated_field->reset(new TypedRepeatedField<uint64>);
-      return true;
-    case reflection::Float:
-      repeated_field->reset(new TypedRepeatedField<float>);
-      return true;
-    case reflection::Double:
-      repeated_field->reset(new TypedRepeatedField<double>);
-      return true;
-    case reflection::String:
-      repeated_field->reset(new TypedRepeatedField<std::string>);
-      return true;
-    case reflection::Obj:
-      repeated_field->reset(
-          new TypedRepeatedField<ReflectiveFlatbuffer>(schema, type));
-      return true;
-    default:
-      TC3_LOG(ERROR) << "Unsupported type: " << type->element();
-      return false;
-  }
-}
 
 // Gets the field information for a field name, returns nullptr if the
 // field was not defined.
@@ -76,8 +33,8 @@
   return type->fields()->LookupByKey(field_name.data());
 }
 
-const reflection::Field* GetFieldByOffsetOrNull(const reflection::Object* type,
-                                                const int field_offset) {
+const reflection::Field* GetFieldOrNull(const reflection::Object* type,
+                                        const int field_offset) {
   if (type->fields() == nullptr) {
     return nullptr;
   }
@@ -97,14 +54,14 @@
   if (!field_name.empty()) {
     return GetFieldOrNull(type, field_name.data());
   }
-  return GetFieldByOffsetOrNull(type, field_offset);
+  return GetFieldOrNull(type, field_offset);
 }
 
 const reflection::Field* GetFieldOrNull(const reflection::Object* type,
                                         const FlatbufferField* field) {
   TC3_CHECK(type != nullptr && field != nullptr);
   if (field->field_name() == nullptr) {
-    return GetFieldByOffsetOrNull(type, field->field_offset());
+    return GetFieldOrNull(type, field->field_offset());
   }
   return GetFieldOrNull(
       type,
@@ -118,6 +75,49 @@
   return GetFieldOrNull(type, field->field_name, field->field_offset);
 }
 
+bool Parse(const std::string& str_value, float* value) {
+  double double_value;
+  if (!ParseDouble(str_value.data(), &double_value)) {
+    return false;
+  }
+  *value = static_cast<float>(double_value);
+  return true;
+}
+
+bool Parse(const std::string& str_value, double* value) {
+  return ParseDouble(str_value.data(), value);
+}
+
+bool Parse(const std::string& str_value, int64* value) {
+  return ParseInt64(str_value.data(), value);
+}
+
+bool Parse(const std::string& str_value, int32* value) {
+  return ParseInt32(str_value.data(), value);
+}
+
+bool Parse(const std::string& str_value, std::string* value) {
+  *value = str_value;
+  return true;
+}
+
+template <typename T>
+bool ParseAndSetField(const reflection::Field* field,
+                      const std::string& str_value,
+                      ReflectiveFlatbuffer* buffer) {
+  T value;
+  if (!Parse(str_value, &value)) {
+    TC3_LOG(ERROR) << "Could not parse '" << str_value << "'";
+    return false;
+  }
+  if (field->type()->base_type() == reflection::Vector) {
+    buffer->Repeated(field)->Add(value);
+    return true;
+  } else {
+    return buffer->Set<T>(field, value);
+  }
+}
+
 }  // namespace
 
 template <>
@@ -178,48 +178,26 @@
   return true;
 }
 
-const reflection::Field* ReflectiveFlatbuffer::GetFieldByOffsetOrNull(
+const reflection::Field* ReflectiveFlatbuffer::GetFieldOrNull(
     const int field_offset) const {
-  return libtextclassifier3::GetFieldByOffsetOrNull(type_, field_offset);
+  return libtextclassifier3::GetFieldOrNull(type_, field_offset);
 }
 
 bool ReflectiveFlatbuffer::ParseAndSet(const reflection::Field* field,
                                        const std::string& value) {
-  switch (field->type()->base_type()) {
+  switch (field->type()->base_type() == reflection::Vector
+              ? field->type()->element()
+              : field->type()->base_type()) {
     case reflection::String:
-      return Set(field, value);
-    case reflection::Int: {
-      int32 int_value;
-      if (!ParseInt32(value.data(), &int_value)) {
-        TC3_LOG(ERROR) << "Could not parse '" << value << "' as int32.";
-        return false;
-      }
-      return Set(field, int_value);
-    }
-    case reflection::Long: {
-      int64 int_value;
-      if (!ParseInt64(value.data(), &int_value)) {
-        TC3_LOG(ERROR) << "Could not parse '" << value << "' as int64.";
-        return false;
-      }
-      return Set(field, int_value);
-    }
-    case reflection::Float: {
-      double double_value;
-      if (!ParseDouble(value.data(), &double_value)) {
-        TC3_LOG(ERROR) << "Could not parse '" << value << "' as float.";
-        return false;
-      }
-      return Set(field, static_cast<float>(double_value));
-    }
-    case reflection::Double: {
-      double double_value;
-      if (!ParseDouble(value.data(), &double_value)) {
-        TC3_LOG(ERROR) << "Could not parse '" << value << "' as double.";
-        return false;
-      }
-      return Set(field, double_value);
-    }
+      return ParseAndSetField<std::string>(field, value, this);
+    case reflection::Int:
+      return ParseAndSetField<int32>(field, value, this);
+    case reflection::Long:
+      return ParseAndSetField<int64>(field, value, this);
+    case reflection::Float:
+      return ParseAndSetField<float>(field, value, this);
+    case reflection::Double:
+      return ParseAndSetField<double>(field, value, this);
     default:
       TC3_LOG(ERROR) << "Unhandled field type: " << field->type()->base_type();
       return false;
@@ -236,6 +214,27 @@
   return parent->ParseAndSet(field, value);
 }
 
+ReflectiveFlatbuffer* ReflectiveFlatbuffer::Add(StringPiece field_name) {
+  const reflection::Field* field = GetFieldOrNull(field_name);
+  if (field == nullptr) {
+    return nullptr;
+  }
+
+  if (field->type()->base_type() != reflection::BaseType::Vector) {
+    return nullptr;
+  }
+
+  return Add(field);
+}
+
+ReflectiveFlatbuffer* ReflectiveFlatbuffer::Add(
+    const reflection::Field* field) {
+  if (field == nullptr) {
+    return nullptr;
+  }
+  return Repeated(field)->Add();
+}
+
 ReflectiveFlatbuffer* ReflectiveFlatbuffer::Mutable(
     const StringPiece field_name) {
   if (const reflection::Field* field = GetFieldOrNull(field_name)) {
@@ -285,11 +284,8 @@
   }
 
   // Otherwise, create a new instance and store it.
-  std::unique_ptr<RepeatedField> repeated_field;
-  if (!CreateRepeatedField(schema_, field->type(), &repeated_field)) {
-    TC3_LOG(ERROR) << "Could not create repeated field.";
-    return nullptr;
-  }
+  std::unique_ptr<RepeatedField> repeated_field(
+      new RepeatedField(schema_, field));
   const auto it = repeated_fields_.insert(
       /*hint=*/entry, std::make_pair(field, std::move(repeated_field)));
   return it->second.get();
@@ -309,9 +305,10 @@
 
   // Create strings.
   for (const auto& it : fields_) {
-    if (it.second.HasString()) {
-      offsets.push_back({it.first->offset(),
-                         builder->CreateString(it.second.StringValue()).o});
+    if (it.second.Has<std::string>()) {
+      offsets.push_back(
+          {it.first->offset(),
+           builder->CreateString(it.second.ConstRefValue<std::string>()).o});
     }
   }
 
@@ -328,44 +325,46 @@
     switch (it.second.GetType()) {
       case Variant::TYPE_BOOL_VALUE:
         builder->AddElement<uint8_t>(
-            it.first->offset(), static_cast<uint8_t>(it.second.BoolValue()),
+            it.first->offset(), static_cast<uint8_t>(it.second.Value<bool>()),
             static_cast<uint8_t>(it.first->default_integer()));
         continue;
       case Variant::TYPE_INT8_VALUE:
         builder->AddElement<int8_t>(
-            it.first->offset(), static_cast<int8_t>(it.second.Int8Value()),
+            it.first->offset(), static_cast<int8_t>(it.second.Value<int8>()),
             static_cast<int8_t>(it.first->default_integer()));
         continue;
       case Variant::TYPE_UINT8_VALUE:
         builder->AddElement<uint8_t>(
-            it.first->offset(), static_cast<uint8_t>(it.second.UInt8Value()),
+            it.first->offset(), static_cast<uint8_t>(it.second.Value<uint8>()),
             static_cast<uint8_t>(it.first->default_integer()));
         continue;
       case Variant::TYPE_INT_VALUE:
         builder->AddElement<int32>(
-            it.first->offset(), it.second.IntValue(),
+            it.first->offset(), it.second.Value<int>(),
             static_cast<int32>(it.first->default_integer()));
         continue;
       case Variant::TYPE_UINT_VALUE:
         builder->AddElement<uint32>(
-            it.first->offset(), it.second.UIntValue(),
+            it.first->offset(), it.second.Value<uint>(),
             static_cast<uint32>(it.first->default_integer()));
         continue;
       case Variant::TYPE_INT64_VALUE:
-        builder->AddElement<int64>(it.first->offset(), it.second.Int64Value(),
+        builder->AddElement<int64>(it.first->offset(), it.second.Value<int64>(),
                                    it.first->default_integer());
         continue;
       case Variant::TYPE_UINT64_VALUE:
-        builder->AddElement<uint64>(it.first->offset(), it.second.UInt64Value(),
+        builder->AddElement<uint64>(it.first->offset(),
+                                    it.second.Value<uint64>(),
                                     it.first->default_integer());
         continue;
       case Variant::TYPE_FLOAT_VALUE:
         builder->AddElement<float>(
-            it.first->offset(), it.second.FloatValue(),
+            it.first->offset(), it.second.Value<float>(),
             static_cast<float>(it.first->default_real()));
         continue;
       case Variant::TYPE_DOUBLE_VALUE:
-        builder->AddElement<double>(it.first->offset(), it.second.DoubleValue(),
+        builder->AddElement<double>(it.first->offset(),
+                                    it.second.Value<double>(),
                                     it.first->default_real());
         continue;
       default:
@@ -398,7 +397,7 @@
     return false;
   }
 
-  TypedRepeatedField<std::string>* to_repeated = Repeated<std::string>(field);
+  RepeatedField* to_repeated = Repeated(field);
   for (const flatbuffers::String* element : *from_vector) {
     to_repeated->Add(element->str());
   }
@@ -414,8 +413,7 @@
     return false;
   }
 
-  TypedRepeatedField<ReflectiveFlatbuffer>* to_repeated =
-      Repeated<ReflectiveFlatbuffer>(field);
+  RepeatedField* to_repeated = Repeated(field);
   for (const flatbuffers::Table* const from_element : *from_vector) {
     ReflectiveFlatbuffer* to_element = to_repeated->Add();
     if (to_element == nullptr) {
@@ -481,7 +479,9 @@
                        ->str());
         break;
       case reflection::Obj:
-        if (!Mutable(field)->MergeFrom(
+        if (ReflectiveFlatbuffer* nested_field = Mutable(field);
+            nested_field == nullptr ||
+            !nested_field->MergeFrom(
                 from->GetPointer<const flatbuffers::Table* const>(
                     field->offset()))) {
           return false;
@@ -614,4 +614,96 @@
   return true;
 }
 
+//
+// Repeated field methods.
+//
+
+ReflectiveFlatbuffer* RepeatedField::Add() {
+  if (is_primitive_) {
+    TC3_LOG(ERROR) << "Trying to add sub-message on a primitive-typed field.";
+    return nullptr;
+  }
+
+  object_items_.emplace_back(new ReflectiveFlatbuffer(
+      schema_, schema_->objects()->Get(field_->type()->index())));
+  return object_items_.back().get();
+}
+
+namespace {
+
+template <typename T>
+flatbuffers::uoffset_t TypedSerialize(const std::vector<Variant>& values,
+                                      flatbuffers::FlatBufferBuilder* builder) {
+  std::vector<T> typed_values;
+  typed_values.reserve(values.size());
+  for (const Variant& item : values) {
+    typed_values.push_back(item.Value<T>());
+  }
+  return builder->CreateVector(typed_values).o;
+}
+
+}  // namespace
+
+flatbuffers::uoffset_t RepeatedField::Serialize(
+    flatbuffers::FlatBufferBuilder* builder) const {
+  switch (field_->type()->element()) {
+    case reflection::String:
+      return SerializeString(builder);
+      break;
+    case reflection::Obj:
+      return SerializeObject(builder);
+      break;
+    case reflection::Bool:
+      return TypedSerialize<bool>(items_, builder);
+      break;
+    case reflection::Byte:
+      return TypedSerialize<int8_t>(items_, builder);
+      break;
+    case reflection::UByte:
+      return TypedSerialize<uint8_t>(items_, builder);
+      break;
+    case reflection::Int:
+      return TypedSerialize<int>(items_, builder);
+      break;
+    case reflection::UInt:
+      return TypedSerialize<uint>(items_, builder);
+      break;
+    case reflection::Long:
+      return TypedSerialize<int64>(items_, builder);
+      break;
+    case reflection::ULong:
+      return TypedSerialize<uint64>(items_, builder);
+      break;
+    case reflection::Float:
+      return TypedSerialize<float>(items_, builder);
+      break;
+    case reflection::Double:
+      return TypedSerialize<double>(items_, builder);
+      break;
+    default:
+      TC3_LOG(FATAL) << "Unsupported type: " << field_->type()->element();
+      break;
+  }
+  TC3_LOG(FATAL) << "Invalid state.";
+  return 0;
+}
+
+flatbuffers::uoffset_t RepeatedField::SerializeString(
+    flatbuffers::FlatBufferBuilder* builder) const {
+  std::vector<flatbuffers::Offset<flatbuffers::String>> offsets(items_.size());
+  for (int i = 0; i < items_.size(); i++) {
+    offsets[i] = builder->CreateString(items_[i].ConstRefValue<std::string>());
+  }
+  return builder->CreateVector(offsets).o;
+}
+
+flatbuffers::uoffset_t RepeatedField::SerializeObject(
+    flatbuffers::FlatBufferBuilder* builder) const {
+  std::vector<flatbuffers::Offset<void>> offsets(object_items_.size());
+  for (int i = 0; i < object_items_.size(); i++) {
+    offsets[i] = object_items_[i]->Serialize(builder);
+  }
+  return builder->CreateVector(offsets).o;
+}
+
 }  // namespace libtextclassifier3

diff --git a/native/utils/flatbuffers.h b/native/utils/flatbuffers.h
index 81bc0b5..aaf248e 100644
--- a/native/utils/flatbuffers.h
+++ b/native/utils/flatbuffers.h

@@ -19,9 +19,9 @@
 #ifndef LIBTEXTCLASSIFIER_UTILS_FLATBUFFERS_H_
 #define LIBTEXTCLASSIFIER_UTILS_FLATBUFFERS_H_
 
-#include <map>
 #include <memory>
 #include <string>
+#include <unordered_map>
 
 #include "annotator/model_generated.h"
 #include "utils/base/logging.h"
@@ -30,13 +30,12 @@
 #include "utils/variant.h"
 #include "flatbuffers/flatbuffers.h"
 #include "flatbuffers/reflection.h"
+#include "flatbuffers/reflection_generated.h"
 
 namespace libtextclassifier3 {
 
 class ReflectiveFlatBuffer;
 class RepeatedField;
-template <typename T>
-class TypedRepeatedField;
 
 // Loads and interprets the buffer as 'FlatbufferMessage' and verifies its
 // integrity.
@@ -104,6 +103,41 @@
                      builder.GetSize());
 }
 
+class ReflectiveFlatbuffer;
+
+// Checks whether a variant value type agrees with a field type.
+template <typename T>
+bool IsMatchingType(const reflection::BaseType type) {
+  switch (type) {
+    case reflection::Bool:
+      return std::is_same<T, bool>::value;
+    case reflection::Byte:
+      return std::is_same<T, int8>::value;
+    case reflection::UByte:
+      return std::is_same<T, uint8>::value;
+    case reflection::Int:
+      return std::is_same<T, int32>::value;
+    case reflection::UInt:
+      return std::is_same<T, uint32>::value;
+    case reflection::Long:
+      return std::is_same<T, int64>::value;
+    case reflection::ULong:
+      return std::is_same<T, uint64>::value;
+    case reflection::Float:
+      return std::is_same<T, float>::value;
+    case reflection::Double:
+      return std::is_same<T, double>::value;
+    case reflection::String:
+      return std::is_same<T, std::string>::value ||
+             std::is_same<T, StringPiece>::value ||
+             std::is_same<T, const char*>::value;
+    case reflection::Obj:
+      return std::is_same<T, ReflectiveFlatbuffer>::value;
+    default:
+      return false;
+  }
+}
+
 // A flatbuffer that can be built using flatbuffer reflection data of the
 // schema.
 // Normally, field information is hard-coded in code generated from a flatbuffer
@@ -122,119 +156,58 @@
   // field was not defined.
   const reflection::Field* GetFieldOrNull(const StringPiece field_name) const;
   const reflection::Field* GetFieldOrNull(const FlatbufferField* field) const;
-  const reflection::Field* GetFieldByOffsetOrNull(const int field_offset) const;
+  const reflection::Field* GetFieldOrNull(const int field_offset) const;
 
   // Gets a nested field and the message it is defined on.
   bool GetFieldWithParent(const FlatbufferFieldPath* field_path,
                           ReflectiveFlatbuffer** parent,
                           reflection::Field const** field);
 
-  // Checks whether a variant value type agrees with a field type.
-  template <typename T>
-  bool IsMatchingType(const reflection::BaseType type) const {
-    switch (type) {
-      case reflection::Bool:
-        return std::is_same<T, bool>::value;
-      case reflection::Byte:
-        return std::is_same<T, int8>::value;
-      case reflection::UByte:
-        return std::is_same<T, uint8>::value;
-      case reflection::Int:
-        return std::is_same<T, int32>::value;
-      case reflection::UInt:
-        return std::is_same<T, uint32>::value;
-      case reflection::Long:
-        return std::is_same<T, int64>::value;
-      case reflection::ULong:
-        return std::is_same<T, uint64>::value;
-      case reflection::Float:
-        return std::is_same<T, float>::value;
-      case reflection::Double:
-        return std::is_same<T, double>::value;
-      case reflection::String:
-        return std::is_same<T, std::string>::value ||
-               std::is_same<T, StringPiece>::value ||
-               std::is_same<T, const char*>::value;
-      case reflection::Obj:
-        return std::is_same<T, ReflectiveFlatbuffer>::value;
-      default:
-        return false;
-    }
-  }
-
-  // Sets a (primitive) field to a specific value.
+  // Sets a field to a specific value.
   // Returns true if successful, and false if the field was not found or the
   // expected type doesn't match.
   template <typename T>
-  bool Set(StringPiece field_name, T value) {
-    if (const reflection::Field* field = GetFieldOrNull(field_name)) {
-      return Set<T>(field, value);
-    }
-    return false;
-  }
+  bool Set(StringPiece field_name, T value);
 
-  // Sets a (primitive) field to a specific value.
+  // Sets a field to a specific value.
   // Returns true if successful, and false if the expected type doesn't match.
   // Expects `field` to be non-null.
   template <typename T>
-  bool Set(const reflection::Field* field, T value) {
-    if (field == nullptr) {
-      TC3_LOG(ERROR) << "Expected non-null field.";
-      return false;
-    }
-    Variant variant_value(value);
-    if (!IsMatchingType<T>(field->type()->base_type())) {
-      TC3_LOG(ERROR) << "Type mismatch for field `" << field->name()->str()
-                     << "`, expected: " << field->type()->base_type()
-                     << ", got: " << variant_value.GetType();
-      return false;
-    }
-    fields_[field] = variant_value;
-    return true;
-  }
+  bool Set(const reflection::Field* field, T value);
 
+  // Sets a field to a specific value. Field is specified by path.
   template <typename T>
-  bool Set(const FlatbufferFieldPath* path, T value) {
-    ReflectiveFlatbuffer* parent;
-    const reflection::Field* field;
-    if (!GetFieldWithParent(path, &parent, &field)) {
-      return false;
-    }
-    return parent->Set<T>(field, value);
-  }
+  bool Set(const FlatbufferFieldPath* path, T value);
 
-  // Sets a (primitive) field to a specific value.
-  // Parses the string value according to the field type.
-  bool ParseAndSet(const reflection::Field* field, const std::string& value);
-  bool ParseAndSet(const FlatbufferFieldPath* path, const std::string& value);
-
-  // Gets the reflective flatbuffer for a table field.
+  // Sets sub-message field (if not set yet), and returns a pointer to it.
   // Returns nullptr if the field was not found, or the field type was not a
   // table.
   ReflectiveFlatbuffer* Mutable(StringPiece field_name);
   ReflectiveFlatbuffer* Mutable(const reflection::Field* field);
 
+  // Parses the value (according to the type) and sets a primitive field to the
+  // parsed value.
+  bool ParseAndSet(const reflection::Field* field, const std::string& value);
+  bool ParseAndSet(const FlatbufferFieldPath* path, const std::string& value);
+
+  // Adds a primitive value to the repeated field.
+  template <typename T>
+  bool Add(StringPiece field_name, T value);
+
+  // Add a sub-message to the repeated field.
+  ReflectiveFlatbuffer* Add(StringPiece field_name);
+
+  template <typename T>
+  bool Add(const reflection::Field* field, T value);
+
+  ReflectiveFlatbuffer* Add(const reflection::Field* field);
+
   // Gets the reflective flatbuffer for a repeated field.
   // Returns nullptr if the field was not found, or the field type was not a
   // vector.
   RepeatedField* Repeated(StringPiece field_name);
   RepeatedField* Repeated(const reflection::Field* field);
 
-  template <typename T>
-  TypedRepeatedField<T>* Repeated(const reflection::Field* field) {
-    if (!IsMatchingType<T>(field->type()->element())) {
-      TC3_LOG(ERROR) << "Type mismatch for field `" << field->name()->str()
-                     << "`";
-      return nullptr;
-    }
-    return static_cast<TypedRepeatedField<T>*>(Repeated(field));
-  }
-
-  template <typename T>
-  TypedRepeatedField<T>* Repeated(StringPiece field_name) {
-    return static_cast<TypedRepeatedField<T>*>(Repeated(field_name));
-  }
-
   // Serializes the flatbuffer.
   flatbuffers::uoffset_t Serialize(
       flatbuffers::FlatBufferBuilder* builder) const;
@@ -274,14 +247,15 @@
   const reflection::Object* const type_;
 
   // Cached primitive fields (scalars and strings).
-  std::map<const reflection::Field*, Variant> fields_;
+  std::unordered_map<const reflection::Field*, Variant> fields_;
 
   // Cached sub-messages.
-  std::map<const reflection::Field*, std::unique_ptr<ReflectiveFlatbuffer>>
+  std::unordered_map<const reflection::Field*,
+                     std::unique_ptr<ReflectiveFlatbuffer>>
       children_;
 
   // Cached repeated fields.
-  std::map<const reflection::Field*, std::unique_ptr<RepeatedField>>
+  std::unordered_map<const reflection::Field*, std::unique_ptr<RepeatedField>>
       repeated_fields_;
 
   // Flattens the flatbuffer as a flat map.
@@ -316,77 +290,132 @@
 // Serves as a common base class for repeated fields.
 class RepeatedField {
  public:
-  virtual ~RepeatedField() {}
+  RepeatedField(const reflection::Schema* const schema,
+                const reflection::Field* field)
+      : schema_(schema),
+        field_(field),
+        is_primitive_(field->type()->element() != reflection::BaseType::Obj) {}
 
-  virtual flatbuffers::uoffset_t Serialize(
-      flatbuffers::FlatBufferBuilder* builder) const = 0;
-};
+  template <typename T>
+  bool Add(const T value);
 
-// Represents a repeated field of particular type.
-template <typename T>
-class TypedRepeatedField : public RepeatedField {
- public:
-  void Add(const T value) { items_.push_back(value); }
+  ReflectiveFlatbuffer* Add();
 
-  flatbuffers::uoffset_t Serialize(
-      flatbuffers::FlatBufferBuilder* builder) const override {
-    return builder->CreateVector(items_).o;
+  template <typename T>
+  T Get(int index) const {
+    return items_.at(index).Value<T>();
   }
 
- private:
-  std::vector<T> items_;
-};
-
-// Specialization for strings.
-template <>
-class TypedRepeatedField<std::string> : public RepeatedField {
- public:
-  void Add(const std::string& value) { items_.push_back(value); }
-
-  flatbuffers::uoffset_t Serialize(
-      flatbuffers::FlatBufferBuilder* builder) const override {
-    std::vector<flatbuffers::Offset<flatbuffers::String>> offsets(
-        items_.size());
-    for (int i = 0; i < items_.size(); i++) {
-      offsets[i] = builder->CreateString(items_[i]);
+  template <>
+  ReflectiveFlatbuffer* Get(int index) const {
+    if (is_primitive_) {
+      TC3_LOG(ERROR) << "Trying to get primitive value out of non-primitive "
+                        "repeated field.";
+      return nullptr;
     }
-    return builder->CreateVector(offsets).o;
+    return object_items_.at(index).get();
   }
 
- private:
-  std::vector<std::string> items_;
-};
-
-// Specialization for repeated sub-messages.
-template <>
-class TypedRepeatedField<ReflectiveFlatbuffer> : public RepeatedField {
- public:
-  TypedRepeatedField<ReflectiveFlatbuffer>(
-      const reflection::Schema* const schema,
-      const reflection::Type* const type)
-      : schema_(schema), type_(type) {}
-
-  ReflectiveFlatbuffer* Add() {
-    items_.emplace_back(new ReflectiveFlatbuffer(
-        schema_, schema_->objects()->Get(type_->index())));
-    return items_.back().get();
+  int Size() const {
+    if (is_primitive_) {
+      return items_.size();
+    } else {
+      return object_items_.size();
+    }
   }
 
   flatbuffers::uoffset_t Serialize(
-      flatbuffers::FlatBufferBuilder* builder) const override {
-    std::vector<flatbuffers::Offset<void>> offsets(items_.size());
-    for (int i = 0; i < items_.size(); i++) {
-      offsets[i] = items_[i]->Serialize(builder);
-    }
-    return builder->CreateVector(offsets).o;
-  }
+      flatbuffers::FlatBufferBuilder* builder) const;
 
  private:
+  flatbuffers::uoffset_t SerializeString(
+      flatbuffers::FlatBufferBuilder* builder) const;
+  flatbuffers::uoffset_t SerializeObject(
+      flatbuffers::FlatBufferBuilder* builder) const;
+
   const reflection::Schema* const schema_;
-  const reflection::Type* const type_;
-  std::vector<std::unique_ptr<ReflectiveFlatbuffer>> items_;
+  const reflection::Field* field_;
+  bool is_primitive_;
+
+  std::vector<Variant> items_;
+  std::vector<std::unique_ptr<ReflectiveFlatbuffer>> object_items_;
 };
 
+template <typename T>
+bool ReflectiveFlatbuffer::Set(StringPiece field_name, T value) {
+  if (const reflection::Field* field = GetFieldOrNull(field_name)) {
+    if (field->type()->base_type() == reflection::BaseType::Vector ||
+        field->type()->base_type() == reflection::BaseType::Obj) {
+      TC3_LOG(ERROR)
+          << "Trying to set a primitive value on a non-scalar field.";
+      return false;
+    }
+    return Set<T>(field, value);
+  }
+  TC3_LOG(ERROR) << "Couldn't find a field: " << field_name;
+  return false;
+}
+
+template <typename T>
+bool ReflectiveFlatbuffer::Set(const reflection::Field* field, T value) {
+  if (field == nullptr) {
+    TC3_LOG(ERROR) << "Expected non-null field.";
+    return false;
+  }
+  Variant variant_value(value);
+  if (!IsMatchingType<T>(field->type()->base_type())) {
+    TC3_LOG(ERROR) << "Type mismatch for field `" << field->name()->str()
+                   << "`, expected: " << field->type()->base_type()
+                   << ", got: " << variant_value.GetType();
+    return false;
+  }
+  fields_[field] = variant_value;
+  return true;
+}
+
+template <typename T>
+bool ReflectiveFlatbuffer::Set(const FlatbufferFieldPath* path, T value) {
+  ReflectiveFlatbuffer* parent;
+  const reflection::Field* field;
+  if (!GetFieldWithParent(path, &parent, &field)) {
+    return false;
+  }
+  return parent->Set<T>(field, value);
+}
+
+template <typename T>
+bool ReflectiveFlatbuffer::Add(StringPiece field_name, T value) {
+  const reflection::Field* field = GetFieldOrNull(field_name);
+  if (field == nullptr) {
+    return false;
+  }
+
+  if (field->type()->base_type() != reflection::BaseType::Vector) {
+    return false;
+  }
+
+  return Add<T>(field, value);
+}
+
+template <typename T>
+bool ReflectiveFlatbuffer::Add(const reflection::Field* field, T value) {
+  if (field == nullptr) {
+    return false;
+  }
+  Repeated(field)->Add(value);
+  return true;
+}
+
+template <typename T>
+bool RepeatedField::Add(const T value) {
+  if (!is_primitive_ || !IsMatchingType<T>(field_->type()->element())) {
+    TC3_LOG(ERROR) << "Trying to add value of unmatching type.";
+    return false;
+  }
+  items_.push_back(Variant{value});
+  return true;
+}
+
 // Resolves field lookups by name to the concrete field offsets.
 bool SwapFieldNamesForOffsetsInPath(const reflection::Schema* schema,
                                     FlatbufferFieldPathT* path);
@@ -400,7 +429,7 @@
     return false;
   }
 
-  TypedRepeatedField<T>* to_repeated = Repeated<T>(field);
+  RepeatedField* to_repeated = Repeated(field);
   for (const T element : *from_vector) {
     to_repeated->Add(element);
   }

diff --git a/native/utils/flatbuffers_test_extended.fbs b/native/utils/flatbuffers_test_extended.fbs
deleted file mode 100644
index ca679dc..0000000
--- a/native/utils/flatbuffers_test_extended.fbs
+++ /dev/null

@@ -1,50 +0,0 @@
-//
-// Copyright (C) 2018 The Android Open Source Project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-namespace libtextclassifier3.test;
-
-table FlightNumberInfo {
-  carrier_code: string;
-  flight_code: int;
-}
-
-table ContactInfo {
-  first_name: string;
-  last_name: string;
-  phone_number: string;
-  score: float;
-}
-
-table Reminder {
-  title: string;
-  notes: [string];
-}
-
-table EntityData {
-  an_int_field: int;
-  a_long_field: int64;
-  a_bool_field: bool;
-  a_float_field: float;
-  a_double_field: double;
-  flight_number: FlightNumberInfo;
-  contact_info: ContactInfo;
-  reminders: [Reminder];
-  numbers: [int];
-  strings: [string];
-  mystic: string;  // Extra field.
-}
-
-root_type libtextclassifier3.test.EntityData;

diff --git a/native/utils/grammar/callback-delegate.cc b/native/utils/grammar/callback-delegate.cc
deleted file mode 100644
index c5a1ac5..0000000
--- a/native/utils/grammar/callback-delegate.cc
+++ /dev/null

@@ -1,50 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/grammar/callback-delegate.h"
-
-#include "utils/grammar/matcher.h"
-
-namespace libtextclassifier3::grammar {
-
-void CallbackDelegate::HandleCapturingMatch(const Match* match,
-                                            const uint64 match_id,
-                                            Matcher* matcher) const {
-  // Allocate match on matcher arena and initialize.
-  // Will be deallocated by the arena.
-  CapturingMatch* capturing_match =
-      matcher->AllocateAndInitMatch<CapturingMatch>(*match);
-  capturing_match->type = Match::kCapturingMatch;
-  capturing_match->id = static_cast<uint16>(match_id);
-
-  // Queue the match.
-  matcher->AddMatch(capturing_match);
-}
-
-void CallbackDelegate::HandleAssertion(const grammar::Match* match,
-                                       bool negative, Matcher* matcher) const {
-  // Allocate match on matcher arena and initialize.
-  // Will be deallocated by the arena.
-  AssertionMatch* assertion_match =
-      matcher->AllocateAndInitMatch<AssertionMatch>(*match);
-  assertion_match->type = Match::kAssertionMatch;
-  assertion_match->negative = negative;
-
-  // Queue the match.
-  matcher->AddMatch(assertion_match);
-}
-
-}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/callback-delegate.h b/native/utils/grammar/callback-delegate.h
index f46c232..a5424dd 100644
--- a/native/utils/grammar/callback-delegate.h
+++ b/native/utils/grammar/callback-delegate.h

@@ -36,17 +36,6 @@
   // which a  callback is attached.
   virtual void MatchFound(const Match* match, const CallbackId callback_id,
                           const int64 callback_param, Matcher* matcher) {}
-
- protected:
-  // Handles capturing matches.
-  // This associates sub-matches with rule defined ids so that individual parts
-  // of an action rule match can later be retrieved easily.
-  void HandleCapturingMatch(const Match* match, uint64 match_id,
-                            Matcher* matcher) const;
-
-  // Handles assertion matches.
-  void HandleAssertion(const Match* match, bool negative,
-                       Matcher* matcher) const;
 };
 
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/lexer.cc b/native/utils/grammar/lexer.cc
index 80d81e7..3a2d0d3 100644
--- a/native/utils/grammar/lexer.cc
+++ b/native/utils/grammar/lexer.cc

@@ -18,6 +18,10 @@
 
 #include <unordered_map>
 
+#include "annotator/types.h"
+#include "utils/zlib/zlib.h"
+#include "utils/zlib/zlib_regex.h"
+
 namespace libtextclassifier3::grammar {
 namespace {
 
@@ -46,8 +50,43 @@
   }
 }
 
+int MapCodepointToTokenPaddingIfPresent(
+    const std::unordered_map<CodepointIndex, CodepointIndex>& token_alignment,
+    const int start) {
+  const auto it = token_alignment.find(start);
+  if (it != token_alignment.end()) {
+    return it->second;
+  }
+  return start;
+}
+
 }  // namespace
 
+Lexer::Lexer(const UniLib* unilib, const RulesSet* rules)
+    : unilib_(*unilib),
+      rules_(rules),
+      regex_annotators_(BuildRegexAnnotator(unilib_, rules)) {}
+
+std::vector<Lexer::RegexAnnotator> Lexer::BuildRegexAnnotator(
+    const UniLib& unilib, const RulesSet* rules) const {
+  std::vector<Lexer::RegexAnnotator> result;
+  if (rules->regex_annotator() != nullptr) {
+    std::unique_ptr<ZlibDecompressor> decompressor =
+        ZlibDecompressor::Instance();
+    result.reserve(rules->regex_annotator()->size());
+    for (const RulesSet_::RegexAnnotator* regex_annotator :
+         *rules->regex_annotator()) {
+      result.push_back(
+          {UncompressMakeRegexPattern(unilib_, regex_annotator->pattern(),
+                                      regex_annotator->compressed_pattern(),
+                                      rules->lazy_regex_compilation(),
+                                      decompressor.get()),
+           regex_annotator->nonterminal()});
+    }
+  }
+  return result;
+}
+
 void Lexer::Emit(const Symbol& symbol, const RulesSet_::Nonterminals* nonterms,
                  Matcher* matcher) const {
   switch (symbol.type) {
@@ -144,21 +183,22 @@
   }
 }
 
-void Lexer::Process(const std::vector<Token>& tokens,
-                    const std::vector<Match*>& matches,
+void Lexer::Process(const UnicodeText& text, const std::vector<Token>& tokens,
+                    const std::vector<AnnotatedSpan>* annotations,
                     Matcher* matcher) const {
-  return Process(tokens.begin(), tokens.end(), matches, matcher);
+  return Process(text, tokens.begin(), tokens.end(), annotations, matcher);
 }
 
-void Lexer::Process(const std::vector<Token>::const_iterator& begin,
+void Lexer::Process(const UnicodeText& text,
+                    const std::vector<Token>::const_iterator& begin,
                     const std::vector<Token>::const_iterator& end,
-                    const std::vector<Match*>& matches,
+                    const std::vector<AnnotatedSpan>* annotations,
                     Matcher* matcher) const {
   if (begin == end) {
     return;
   }
 
-  const RulesSet_::Nonterminals* nonterminals = matcher->nonterminals();
+  const RulesSet_::Nonterminals* nonterminals = rules_->nonterminals();
 
   // Initialize processing of new text.
   CodepointIndex prev_token_end = 0;
@@ -219,14 +259,45 @@
     symbols.push_back(Symbol(match));
   }
 
-  // Add predefined matches.
-  for (Match* match : matches) {
-    // Decrease match offset to include preceding whitespace.
-    auto token_match_start_it = token_match_start.find(match->match_offset);
-    if (token_match_start_it != token_match_start.end()) {
-      match->match_offset = token_match_start_it->second;
+  // Add matches based on annotations.
+  auto annotation_nonterminals = nonterminals->annotation_nt();
+  if (annotation_nonterminals != nullptr && annotations != nullptr) {
+    for (const AnnotatedSpan& annotated_span : *annotations) {
+      const ClassificationResult& classification =
+          annotated_span.classification.front();
+      if (auto entry = annotation_nonterminals->LookupByKey(
+              classification.collection.c_str())) {
+        AnnotationMatch* match = matcher->AllocateAndInitMatch<AnnotationMatch>(
+            entry->value(), annotated_span.span,
+            /*match_offset=*/
+            MapCodepointToTokenPaddingIfPresent(token_match_start,
+                                                annotated_span.span.first),
+            Match::kAnnotationMatch);
+        match->annotation = &classification;
+        symbols.push_back(Symbol(match));
+      }
     }
-    symbols.push_back(Symbol(match));
+  }
+
+  // Add regex annotator matches for the range covered by the tokens.
+  for (const RegexAnnotator& regex_annotator : regex_annotators_) {
+    std::unique_ptr<UniLib::RegexMatcher> regex_matcher =
+        regex_annotator.pattern->Matcher(UnicodeText::Substring(
+            text, begin->start, prev_token_end, /*do_copy=*/false));
+    int status = UniLib::RegexMatcher::kNoError;
+    while (regex_matcher->Find(&status) &&
+           status == UniLib::RegexMatcher::kNoError) {
+      const CodepointSpan span = {
+          regex_matcher->Start(0, &status) + begin->start,
+          regex_matcher->End(0, &status) + begin->start};
+      if (Match* match =
+              CheckedAddMatch(regex_annotator.nonterm, span, /*match_offset=*/
+                              MapCodepointToTokenPaddingIfPresent(
+                                  token_match_start, span.first),
+                              Match::kUnknownType, matcher)) {
+        symbols.push_back(Symbol(match));
+      }
+    }
   }
 
   std::sort(symbols.begin(), symbols.end(),
@@ -242,6 +313,9 @@
   for (const Symbol& symbol : symbols) {
     Emit(symbol, nonterminals, matcher);
   }
+
+  // Finish the matching.
+  matcher->Finish();
 }
 
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/lexer.h b/native/utils/grammar/lexer.h
index 4a0d0ef..ca31c25 100644
--- a/native/utils/grammar/lexer.h
+++ b/native/utils/grammar/lexer.h

@@ -75,16 +75,20 @@
 
 class Lexer {
  public:
-  explicit Lexer(const UniLib& unilib) : unilib_(unilib) {}
+  explicit Lexer(const UniLib* unilib, const RulesSet* rules);
 
   // Processes a tokenized text. Classifies the tokens and feeds them to the
-  // matcher. Predefined existing matches `matches` will be fed to the matcher
-  // alongside the tokens.
-  void Process(const std::vector<Token>& tokens,
-               const std::vector<Match*>& matches, Matcher* matcher) const;
-  void Process(const std::vector<Token>::const_iterator& begin,
+  // matcher.
+  // The provided annotations will be fed to the matcher alongside the tokens.
+  // NOTE: The `annotations` need to outlive any dependent processing.
+  void Process(const UnicodeText& text, const std::vector<Token>& tokens,
+               const std::vector<AnnotatedSpan>* annotations,
+               Matcher* matcher) const;
+  void Process(const UnicodeText& text,
+               const std::vector<Token>::const_iterator& begin,
                const std::vector<Token>::const_iterator& end,
-               const std::vector<Match*>& matches, Matcher* matcher) const;
+               const std::vector<AnnotatedSpan>* annotations,
+               Matcher* matcher) const;
 
  private:
   // A lexical symbol with an identified meaning that represents raw tokens,
@@ -153,7 +157,18 @@
   Symbol::Type GetSymbolType(const UnicodeText::const_iterator& it) const;
 
  private:
+  struct RegexAnnotator {
+    std::unique_ptr<UniLib::RegexPattern> pattern;
+    Nonterm nonterm;
+  };
+
+  // Uncompress and build the defined regex annotators.
+  std::vector<RegexAnnotator> BuildRegexAnnotator(const UniLib& unilib,
+                                                  const RulesSet* rules) const;
+
   const UniLib& unilib_;
+  const RulesSet* rules_;
+  std::vector<RegexAnnotator> regex_annotators_;
 };
 
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/match.h b/native/utils/grammar/match.h
index c1c7022..97edac9 100644
--- a/native/utils/grammar/match.h
+++ b/native/utils/grammar/match.h

@@ -30,12 +30,14 @@
 // Instances should be created by calling Matcher::AllocateMatch().
 // This uses an arena to allocate matches (and subclasses thereof).
 struct Match {
-  static const int16 kUnknownType = 0;
-  static const int16 kTokenType = -1;
-  static const int16 kDigitsType = -2;
-  static const int16 kBreakType = -3;
-  static const int16 kCapturingMatch = -4;
-  static const int16 kAssertionMatch = -5;
+  static constexpr int16 kUnknownType = 0;
+  static constexpr int16 kTokenType = -1;
+  static constexpr int16 kDigitsType = -2;
+  static constexpr int16 kBreakType = -3;
+  static constexpr int16 kAssertionMatch = -4;
+  static constexpr int16 kMappingMatch = -5;
+  static constexpr int16 kExclusionMatch = -6;
+  static constexpr int16 kAnnotationMatch = -7;
 
   void Init(const Nonterm arg_lhs, const CodepointSpan arg_codepoint_span,
             const int arg_match_offset, const int arg_type = kUnknownType) {
@@ -92,12 +94,10 @@
   const Match* rhs2 = nullptr;
 };
 
-// Match type to keep track of capturing parts of rules.
-struct CapturingMatch : public Match {
-  // The id of the capturing match.
-  // This id allows to individual rules to access sub-matches in a similar way
-  // as e.g. capturing group ids in regular expressions allow.
-  uint16 id;
+// Match type to keep track of associated values.
+struct MappingMatch : public Match {
+  // The associated id or value.
+  int64 id;
 };
 
 // Match type to keep track of assertions.
@@ -107,6 +107,19 @@
   bool negative;
 };
 
+// Match type to define exclusions.
+struct ExclusionMatch : public Match {
+  // The nonterminal that denotes matches to exclude from a successful match.
+  // So the match is only valid if there is no match of `exclusion_nonterm`
+  // spanning the same text range.
+  Nonterm exclusion_nonterm;
+};
+
+// Match to represent an annotator annotated span in the grammar.
+struct AnnotationMatch : public Match {
+  const ClassificationResult* annotation;
+};
+
 // Utility functions for parse tree traversal.
 
 // Does a preorder traversal, calling `node_fn` on each node.
@@ -135,9 +148,23 @@
 
 // Retrieves the first child node of a given type.
 template <typename T>
-const T* SelectFirstOfType(const Match* match, const int64 type) {
+const T* SelectFirstOfType(const Match* root, const int16 type) {
   return static_cast<const T*>(SelectFirst(
-      match, [type](const Match* node) { return (node->type == type); }));
+      root, [type](const Match* node) { return node->type == type; }));
+}
+
+// Retrieves all nodes of a given type.
+template <typename T>
+const std::vector<const T*> SelectAllOfType(const Match* root,
+                                            const int16 type) {
+  std::vector<const T*> result;
+  Traverse(root, [&result, type](const Match* node) {
+    if (node->type == type) {
+      result.push_back(static_cast<const T*>(node));
+    }
+    return true;
+  });
+  return result;
 }
 
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/matcher.cc b/native/utils/grammar/matcher.cc
index 6386951..a8ebba5 100644
--- a/native/utils/grammar/matcher.cc
+++ b/native/utils/grammar/matcher.cc

@@ -47,8 +47,8 @@
 
 // Iterator that lowercases a utf8 string on the fly and enumerates the bytes.
 struct LowercasingByteIterator {
-  LowercasingByteIterator(const UniLib& unilib, StringPiece text)
-      : unilib(unilib),
+  LowercasingByteIterator(const UniLib* unilib, StringPiece text)
+      : unilib(*unilib),
         data(text.data()),
         end(text.data() + text.size()),
         buffer_pos(0),
@@ -240,13 +240,37 @@
   state_ = STATE_DEFAULT;
   arena_.Reset();
   pending_items_ = nullptr;
+  pending_exclusion_items_ = nullptr;
   std::fill(chart_.begin(), chart_.end(), nullptr);
   last_end_ = std::numeric_limits<int>().lowest();
 }
 
+void Matcher::Finish() {
+  // Check any pending items.
+  ProcessPendingExclusionMatches();
+}
+
+void Matcher::QueueForProcessing(Match* item) {
+  // Push element to the front.
+  item->next = pending_items_;
+  pending_items_ = item;
+}
+
+void Matcher::QueueForPostCheck(ExclusionMatch* item) {
+  // Push element to the front.
+  item->next = pending_exclusion_items_;
+  pending_exclusion_items_ = item;
+}
+
 void Matcher::AddTerminal(const CodepointSpan codepoint_span,
                           const int match_offset, StringPiece terminal) {
   TC3_CHECK_GE(codepoint_span.second, last_end_);
+
+  // Finish any pending post-checks.
+  if (codepoint_span.second > last_end_) {
+    ProcessPendingExclusionMatches();
+  }
+
   last_end_ = codepoint_span.second;
   for (const RulesSet_::Rules* shard : rules_shards_) {
     // Try case-sensitive matches.
@@ -267,7 +291,7 @@
 
     // Try case-insensitive matches.
     if (const RulesSet_::LhsSet* lhs_set = FindTerminalMatches(
-            LowercasingByteIterator(unilib_, terminal), rules_,
+            LowercasingByteIterator(&unilib_, terminal), rules_,
             shard->lowercase_terminal_rules(), &terminal)) {
       // `terminal` points now into the rules string pool, providing a
       // stable reference.
@@ -286,6 +310,12 @@
 
 void Matcher::AddMatch(Match* match) {
   TC3_CHECK_GE(match->codepoint_span.second, last_end_);
+
+  // Finish any pending post-checks.
+  if (match->codepoint_span.second > last_end_) {
+    ProcessPendingExclusionMatches();
+  }
+
   last_end_ = match->codepoint_span.second;
   QueueForProcessing(match);
   ProcessPendingSet();
@@ -313,6 +343,48 @@
       continue;
     }
 
+    // Handle default callbacks.
+    switch (static_cast<DefaultCallback>(callback_id)) {
+      case DefaultCallback::kSetType: {
+        Match* typed_match = AllocateAndInitMatch<Match>(lhs, codepoint_span,
+                                                         match_offset_bytes);
+        initializer(typed_match);
+        typed_match->type = callback_param;
+        QueueForProcessing(typed_match);
+        continue;
+      }
+      case DefaultCallback::kAssertion: {
+        AssertionMatch* assertion_match = AllocateAndInitMatch<AssertionMatch>(
+            lhs, codepoint_span, match_offset_bytes);
+        initializer(assertion_match);
+        assertion_match->type = Match::kAssertionMatch;
+        assertion_match->negative = (callback_param != 0);
+        QueueForProcessing(assertion_match);
+        continue;
+      }
+      case DefaultCallback::kMapping: {
+        MappingMatch* mapping_match = AllocateAndInitMatch<MappingMatch>(
+            lhs, codepoint_span, match_offset_bytes);
+        initializer(mapping_match);
+        mapping_match->type = Match::kMappingMatch;
+        mapping_match->id = callback_param;
+        QueueForProcessing(mapping_match);
+        continue;
+      }
+      case DefaultCallback::kExclusion: {
+        // We can only check the exclusion once all matches up to this position
+        // have been processed. Schedule and post check later.
+        ExclusionMatch* exclusion_match = AllocateAndInitMatch<ExclusionMatch>(
+            lhs, codepoint_span, match_offset_bytes);
+        initializer(exclusion_match);
+        exclusion_match->exclusion_nonterm = callback_param;
+        QueueForPostCheck(exclusion_match);
+        continue;
+      }
+      default:
+        break;
+    }
+
     if (callback_id != kNoCallback && rules_->callback() != nullptr) {
       const RulesSet_::CallbackEntry* callback_info =
           rules_->callback()->LookupByKey(callback_id);
@@ -408,4 +480,33 @@
   state_ = STATE_DEFAULT;
 }
 
+void Matcher::ProcessPendingExclusionMatches() {
+  while (pending_exclusion_items_) {
+    ExclusionMatch* item = pending_exclusion_items_;
+    pending_exclusion_items_ = static_cast<ExclusionMatch*>(item->next);
+
+    // Check that the exclusion condition is fulfilled.
+    if (!ContainsMatch(item->exclusion_nonterm, item->codepoint_span)) {
+      AddMatch(item);
+    }
+  }
+}
+
+bool Matcher::ContainsMatch(const Nonterm nonterm,
+                            const CodepointSpan& span) const {
+  // Lookup by end.
+  Match* match = chart_[span.second & kChartHashTableBitmask];
+  // The chain of items is in decreasing `end` order.
+  while (match != nullptr && match->codepoint_span.second > span.second) {
+    match = match->next;
+  }
+  while (match != nullptr && match->codepoint_span.second == span.second) {
+    if (match->lhs == nonterm && match->codepoint_span.first == span.first) {
+      return true;
+    }
+    match = match->next;
+  }
+  return false;
+}
+
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/matcher.h b/native/utils/grammar/matcher.h
index 646e016..47bac43 100644
--- a/native/utils/grammar/matcher.h
+++ b/native/utils/grammar/matcher.h

@@ -91,6 +91,7 @@
 #include <functional>
 #include <vector>
 
+#include "annotator/types.h"
 #include "utils/base/arena.h"
 #include "utils/grammar/callback-delegate.h"
 #include "utils/grammar/match.h"
@@ -102,11 +103,11 @@
 
 class Matcher {
  public:
-  Matcher(const UniLib& unilib, const RulesSet* rules,
-          const std::vector<const RulesSet_::Rules*> rules_shards,
-          CallbackDelegate* delegate)
+  explicit Matcher(const UniLib* unilib, const RulesSet* rules,
+                   const std::vector<const RulesSet_::Rules*> rules_shards,
+                   CallbackDelegate* delegate)
       : state_(STATE_DEFAULT),
-        unilib_(unilib),
+        unilib_(*unilib),
         arena_(kBlocksize),
         rules_(rules),
         rules_shards_(rules_shards),
@@ -114,8 +115,8 @@
     TC3_CHECK(rules_ != nullptr);
     Reset();
   }
-  Matcher(const UniLib& unilib, const RulesSet* rules,
-          CallbackDelegate* delegate)
+  explicit Matcher(const UniLib* unilib, const RulesSet* rules,
+                   CallbackDelegate* delegate)
       : Matcher(unilib, rules, {}, delegate) {
     rules_shards_.reserve(rules->rules()->size());
     rules_shards_.insert(rules_shards_.end(), rules->rules()->begin(),
@@ -125,6 +126,9 @@
   // Resets the matcher.
   void Reset();
 
+  // Finish the matching.
+  void Finish();
+
   // Tells the matcher that the given terminal was found occupying position
   // range [begin, end) in the input.
   // The matcher may invoke callback functions before returning, if this
@@ -165,10 +169,6 @@
   // Returns the current number of bytes allocated for all match objects.
   size_t ArenaSize() const { return arena_.status().bytes_allocated(); }
 
-  const RulesSet_::Nonterminals* nonterminals() const {
-    return rules_->nonterminals();
-  }
-
  private:
   static constexpr int kBlocksize = 16 << 10;
 
@@ -190,16 +190,26 @@
                      CallbackDelegate* delegate);
 
   // Queues a newly created match item.
-  void QueueForProcessing(Match* item) {
-    // Push element to front.
-    item->next = pending_items_;
-    pending_items_ = item;
-  }
+  void QueueForProcessing(Match* item);
+
+  // Queues a match item for later post checking of the exclusion condition.
+  // For exclusions we need to check that the `item->excluded_nonterminal`
+  // doesn't match the same span. As we cannot know which matches have already
+  // been added, we queue the item for later post checking - once all matches
+  // up to `item->codepoint_span.second` have been added.
+  void QueueForPostCheck(ExclusionMatch* item);
 
   // Adds pending items to the chart, possibly generating new matches as a
   // result.
   void ProcessPendingSet();
 
+  // Returns whether the chart contains a match for a given nonterminal.
+  bool ContainsMatch(const Nonterm nonterm, const CodepointSpan& span) const;
+
+  // Checks all pending exclusion matches that their exclusion condition is
+  // fulfilled.
+  void ProcessPendingExclusionMatches();
+
   UniLib unilib_;
 
   // Memory arena for match allocation.
@@ -215,6 +225,9 @@
   // The set of items pending to be added to the chart as a singly-linked list.
   Match* pending_items_;
 
+  // The set of items pending to be post-checked as a singly-linked list.
+  ExclusionMatch* pending_exclusion_items_;
+
   // The chart data structure: a hashtable containing all matches, indexed by
   // their end positions.
   static constexpr int kChartHashTableNumBuckets = 1 << 8;

diff --git a/native/utils/grammar/rules-utils.cc b/native/utils/grammar/rules-utils.cc
index 7358349..56c928a 100644
--- a/native/utils/grammar/rules-utils.cc
+++ b/native/utils/grammar/rules-utils.cc

@@ -54,12 +54,12 @@
   return shards;
 }
 
-std::vector<RuleMatch> DeduplicateMatches(
-    const std::vector<RuleMatch>& matches) {
-  std::vector<RuleMatch> sorted_candidates = matches;
+std::vector<Derivation> DeduplicateDerivations(
+    const std::vector<Derivation>& derivations) {
+  std::vector<Derivation> sorted_candidates = derivations;
   std::stable_sort(
       sorted_candidates.begin(), sorted_candidates.end(),
-      [](const RuleMatch& a, const RuleMatch& b) {
+      [](const Derivation& a, const Derivation& b) {
         // Sort by id.
         if (a.rule_id != b.rule_id) {
           return a.rule_id < b.rule_id;
@@ -75,9 +75,9 @@
       });
 
   // Deduplicate by overlap.
-  std::vector<RuleMatch> result;
+  std::vector<Derivation> result;
   for (int i = 0; i < sorted_candidates.size(); i++) {
-    const RuleMatch& candidate = sorted_candidates[i];
+    const Derivation& candidate = sorted_candidates[i];
     bool eliminated = false;
 
     // Due to the sorting above, the candidate can only be completely
@@ -120,31 +120,4 @@
   return result;
 }
 
-std::unordered_map<uint16, const CapturingMatch*> GatherCapturingMatches(
-    const Match* match) {
-  // Gather active capturing matches.
-  std::unordered_map<uint16, const CapturingMatch*> capturing_matches;
-  grammar::Traverse(match, [&capturing_matches](const grammar::Match* node) {
-    switch (node->type) {
-      case Match::kCapturingMatch: {
-        const CapturingMatch* capturing_match =
-            static_cast<const CapturingMatch*>(node);
-        capturing_matches[capturing_match->id] = capturing_match;
-        return true;
-      }
-
-      // Don't traverse assertion nodes.
-      case Match::kAssertionMatch: {
-        return false;
-      }
-
-      default: {
-        // Expand node.
-        return true;
-      }
-    }
-  });
-  return capturing_matches;
-}
-
 }  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/rules-utils.h b/native/utils/grammar/rules-utils.h
index 6ed5f9d..e6ac541 100644
--- a/native/utils/grammar/rules-utils.h
+++ b/native/utils/grammar/rules-utils.h

@@ -37,27 +37,22 @@
     const std::vector<std::vector<Locale>>& shard_locales,
     const std::vector<Locale>& locales);
 
-// Deduplicates rule matches by containing overlap.
+// Deduplicates rule derivations by containing overlap.
 // The grammar system can output multiple candidates for optional parts.
 // For example if a rule has an optional suffix, we
-// will get two rule matches when the suffix is present: one with and one
+// will get two rule derivations when the suffix is present: one with and one
 // without the suffix. We therefore deduplicate by containing overlap, viz. from
 // two candidates we keep the longer one if it completely contains the shorter.
-struct RuleMatch {
+struct Derivation {
   const Match* match;
   int64 rule_id;
 };
-std::vector<RuleMatch> DeduplicateMatches(
-    const std::vector<RuleMatch>& matches);
+std::vector<Derivation> DeduplicateDerivations(
+    const std::vector<Derivation>& derivations);
 
 // Checks that all assertions of a match tree are fulfilled.
 bool VerifyAssertions(const Match* match);
 
-// Gathers active capturing matches in a match.
-// Returns a map from the id (CapturingMatch::id) to the capturing match.
-std::unordered_map<uint16, const CapturingMatch*> GatherCapturingMatches(
-    const Match* match);
-
 }  // namespace libtextclassifier3::grammar
 
 #endif  // LIBTEXTCLASSIFIER_UTILS_GRAMMAR_RULES_UTILS_H_

diff --git a/native/utils/grammar/rules-utils_test.cc b/native/utils/grammar/rules-utils_test.cc
new file mode 100644
index 0000000..6391be1
--- /dev/null
+++ b/native/utils/grammar/rules-utils_test.cc

@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/rules-utils.h"
+
+#include <vector>
+
+#include "utils/grammar/match.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using testing::ElementsAre;
+using testing::Value;
+
+// Create test match object.
+Match CreateMatch(const CodepointIndex begin, const CodepointIndex end) {
+  Match match;
+  match.Init(0, CodepointSpan{begin, end},
+             /*arg_match_offset=*/begin);
+  return match;
+}
+
+MATCHER_P(IsDerivation, candidate, "") {
+  return Value(arg.rule_id, candidate.rule_id) &&
+         Value(arg.match, candidate.match);
+}
+
+TEST(UtilsTest, DeduplicatesMatches) {
+  // Overlapping matches from the same rule.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(0, 2)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0}};
+
+  // Keep longest.
+  EXPECT_THAT(DeduplicateDerivations(candidates),
+              ElementsAre(IsDerivation(candidates[2])));
+}
+
+TEST(UtilsTest, DeduplicatesMatchesPerRule) {
+  // Overlapping matches from different rules.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(0, 2)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0},
+                                              {&matches[0], /*rule_id=*/1}};
+
+  // Keep longest for rule 0, but also keep match from rule 1.
+  EXPECT_THAT(
+      DeduplicateDerivations(candidates),
+      ElementsAre(IsDerivation(candidates[2]), IsDerivation(candidates[3])));
+}
+
+TEST(UtilsTest, KeepNonoverlapping) {
+  // Non-overlapping matches.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(2, 3)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0}};
+
+  // Keep all matches.
+  EXPECT_THAT(
+      DeduplicateDerivations(candidates),
+      ElementsAre(IsDerivation(candidates[0]), IsDerivation(candidates[1]),
+                  IsDerivation(candidates[2])));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/rules.fbs b/native/utils/grammar/rules.fbs
index 84404fd..8052c11 100755
--- a/native/utils/grammar/rules.fbs
+++ b/native/utils/grammar/rules.fbs

@@ -15,6 +15,7 @@
 //
 
 include "utils/i18n/language-tag.fbs";
+include "utils/zlib/buffer.fbs";
 
 // The terminal rules map as sorted strings table.
 // The sorted terminal strings table is represented as offsets into the
@@ -109,6 +110,12 @@
   max_whitespace_gap:byte;
 }
 
+namespace libtextclassifier3.grammar.RulesSet_.Nonterminals_;
+table AnnotationNtEntry {
+  key:string (key, shared);
+  value:int;
+}
+
 // Usage of pre-defined non-terminals that the lexer can generate if used by
 // the grammar.
 namespace libtextclassifier3.grammar.RulesSet_;
@@ -134,6 +141,10 @@
 
   // Id of the nonterminal indicating an uppercase token.
   uppercase_token_nt:int;
+
+  // Predefined nonterminals for annotations.
+  // Maps annotation/collection names to non-terminal ids.
+  annotation_nt:[Nonterminals_.AnnotationNtEntry];
 }
 
 // Callback information.
@@ -162,6 +173,18 @@
   nonterminal_names:[DebugInformation_.NonterminalNamesEntry];
 }
 
+// Regex annotators.
+namespace libtextclassifier3.grammar.RulesSet_;
+table RegexAnnotator {
+  // The pattern to run.
+  pattern:string (shared);
+
+  compressed_pattern:CompressedBuffer;
+
+  // The nonterminal to trigger.
+  nonterminal:uint;
+}
+
 // Context free grammar rules representation.
 // Rules are represented in (mostly) Chomsky Normal Form, where all rules are
 // of the following form, either:
@@ -184,5 +207,9 @@
   nonterminals:RulesSet_.Nonterminals;
   callback:[RulesSet_.CallbackEntry];
   debug_information:RulesSet_.DebugInformation;
+  regex_annotator:[RulesSet_.RegexAnnotator];
+
+  // If true, will compile the regexes only on first use.
+  lazy_regex_compilation:bool;
 }
 

diff --git a/native/utils/grammar/types.h b/native/utils/grammar/types.h
index 8d7ef97..a79532b 100644
--- a/native/utils/grammar/types.h
+++ b/native/utils/grammar/types.h

@@ -32,6 +32,13 @@
 const Nonterm kUnassignedNonterm = 0;
 
 typedef int32 CallbackId;  // `kNoCallback` is reserved for "no callback"
+enum class DefaultCallback : CallbackId {
+  kSetType = -1,
+  kAssertion = -2,
+  kMapping = -3,
+  kExclusion = -4,
+  kRootRule = 1,
+};
 
 // Special CallbackId indicating that there's no callback associated with a
 // rule.
@@ -52,7 +59,7 @@
 struct BinaryRuleHasher {
   inline uint64 operator()(const TwoNonterms& x) const {
     // the hash_int32 maps a int to a random int, then treat two ints as a
-    // rational number, then use cantor pairing function to caculate the
+    // rational number, then use cantor pairing function to calculate the
     // order of rational number.
     uint32 t1 = hash_int32(x.first);
     uint32 t2 = hash_int32(x.second);

diff --git a/native/utils/grammar/utils/ir.cc b/native/utils/grammar/utils/ir.cc
index 69b8273..ce074b8 100644
--- a/native/utils/grammar/utils/ir.cc
+++ b/native/utils/grammar/utils/ir.cc

@@ -18,104 +18,25 @@
 
 #include "utils/strings/append.h"
 #include "utils/strings/stringpiece.h"
+#include "utils/zlib/zlib.h"
 
 namespace libtextclassifier3::grammar {
-
-const size_t kMaxHashTableSize = 100;
-
-Nonterm Ir::AddToSet(const Lhs& lhs, LhsSet* lhs_set) {
-  const int lhs_set_size = lhs_set->size();
-  Nonterm shareable_nonterm = lhs.nonterminal;
-  for (int i = 0; i < lhs_set_size; i++) {
-    Lhs* candidate = &lhs_set->at(i);
-
-    // Exact match, just reuse rule.
-    if (lhs == *candidate) {
-      return candidate->nonterminal;
-    }
-
-    // Cannot reuse unshareable ids.
-    if (nonshareable_.find(candidate->nonterminal) != nonshareable_.end() ||
-        nonshareable_.find(lhs.nonterminal) != nonshareable_.end()) {
-      continue;
-    }
-
-    // Cannot reuse id if the preconditions are different.
-    if (!(lhs.preconditions == candidate->preconditions)) {
-      continue;
-    }
-
-    // If either callback is a filter, we can't share as we must always run
-    // both filters.
-    if ((lhs.callback.id != kNoCallback &&
-         filters_.find(lhs.callback.id) != filters_.end()) ||
-        (candidate->callback.id != kNoCallback &&
-         filters_.find(candidate->callback.id) != filters_.end())) {
-      continue;
-    }
-
-    // If the nonterminal is already defined, it must match for sharing.
-    if (lhs.nonterminal != kUnassignedNonterm &&
-        lhs.nonterminal != candidate->nonterminal) {
-      continue;
-    }
-
-    // Check whether the callbacks match.
-    if (lhs.callback == candidate->callback) {
-      return candidate->nonterminal;
-    }
-
-    // We can reuse if one of the output callbacks is not used.
-    if (lhs.callback.id == kNoCallback) {
-      return candidate->nonterminal;
-    } else if (candidate->callback.id == kNoCallback) {
-      // Old entry has no output callback, which is redundant now.
-      candidate->callback = lhs.callback;
-      return candidate->nonterminal;
-    }
-
-    // We can share the nonterminal, but we need to
-    // add a new output callback. Defer this as we might find a shareable
-    // nonterminal first.
-    shareable_nonterm = candidate->nonterminal;
-  }
-
-  // We didn't find a redundant entry, so create a new one.
-  shareable_nonterm = DefineNonterminal(shareable_nonterm);
-  lhs_set->push_back(Lhs{shareable_nonterm, lhs.callback, lhs.preconditions});
-  return shareable_nonterm;
-}
-
-Nonterm Ir::Add(const Lhs& lhs, const std::string& terminal,
-                const bool case_sensitive, const int shard) {
-  TC3_CHECK_LT(shard, shards_.size());
-  if (case_sensitive) {
-    return AddRule(lhs, terminal, &shards_[shard].terminal_rules);
-  } else {
-    return AddRule(lhs, terminal, &shards_[shard].lowercase_terminal_rules);
-  }
-}
-
-Nonterm Ir::Add(const Lhs& lhs, const std::vector<Nonterm>& rhs,
-                const int shard) {
-  TC3_CHECK(!rhs.empty()) << "Rhs cannot be empty.";
-
-  // Add a new unary rule.
-  if (rhs.size() == 1) {
-    return Add(lhs, rhs.front(), shard);
-  }
-
-  // Add a chain of (rhs.size() - 1) binary rules.
-  Nonterm prev = rhs.front();
-  for (int i = 1; i < rhs.size() - 1; i++) {
-    prev = Add(kUnassignedNonterm, prev, rhs[i], shard);
-  }
-  return Add(lhs, prev, rhs.back(), shard);
-}
-
-// Utilities for baking rules in the IR to the inference format.
 namespace {
 
+constexpr size_t kMaxHashTableSize = 100;
+
+template <typename T>
+void SortForBinarySearchLookup(T* entries) {
+  std::sort(entries->begin(), entries->end(),
+            [](const auto& a, const auto& b) { return a->key < b->key; });
+}
+
+template <typename T>
+void SortStructsForBinarySearchLookup(T* entries) {
+  std::sort(entries->begin(), entries->end(),
+            [](const auto& a, const auto& b) { return a.key() < b.key(); });
+}
+
 bool IsSameLhs(const Ir::Lhs& lhs, const RulesSet_::Lhs& other) {
   return (lhs.nonterminal == other.nonterminal() &&
           lhs.callback.id == other.callback_id() &&
@@ -216,14 +137,11 @@
 void SerializeUnaryRulesShard(
     const std::unordered_map<Nonterm, Ir::LhsSet>& unary_rules,
     RulesSetT* rules_set, RulesSet_::RulesT* rules) {
-  for (const auto it : unary_rules) {
+  for (const auto& it : unary_rules) {
     rules->unary_rules.push_back(RulesSet_::Rules_::UnaryRulesEntry(
         it.first, AddLhsSet(it.second, rules_set)));
   }
-
-  // Sort for binary search lookup.
-  std::sort(rules->unary_rules.begin(), rules->unary_rules.end(),
-            [](const auto& a, const auto& b) { return a.key() < b.key(); });
+  SortStructsForBinarySearchLookup(&rules->unary_rules);
 }
 
 // // Serializes a binary rules table.
@@ -239,7 +157,7 @@
 
   // Serialize the table.
   BinaryRuleHasher hash;
-  for (const auto it : binary_rules) {
+  for (const auto& it : binary_rules) {
     const TwoNonterms key = it.first;
     uint32 bucket_index = hash(key) % num_buckets;
 
@@ -252,6 +170,104 @@
 
 }  // namespace
 
+Nonterm Ir::AddToSet(const Lhs& lhs, LhsSet* lhs_set) {
+  const int lhs_set_size = lhs_set->size();
+  Nonterm shareable_nonterm = lhs.nonterminal;
+  for (int i = 0; i < lhs_set_size; i++) {
+    Lhs* candidate = &lhs_set->at(i);
+
+    // Exact match, just reuse rule.
+    if (lhs == *candidate) {
+      return candidate->nonterminal;
+    }
+
+    // Cannot reuse unshareable ids.
+    if (nonshareable_.find(candidate->nonterminal) != nonshareable_.end() ||
+        nonshareable_.find(lhs.nonterminal) != nonshareable_.end()) {
+      continue;
+    }
+
+    // Cannot reuse id if the preconditions are different.
+    if (!(lhs.preconditions == candidate->preconditions)) {
+      continue;
+    }
+
+    // If either callback is a filter, we can't share as we must always run
+    // both filters.
+    if ((lhs.callback.id != kNoCallback &&
+         filters_.find(lhs.callback.id) != filters_.end()) ||
+        (candidate->callback.id != kNoCallback &&
+         filters_.find(candidate->callback.id) != filters_.end())) {
+      continue;
+    }
+
+    // If the nonterminal is already defined, it must match for sharing.
+    if (lhs.nonterminal != kUnassignedNonterm &&
+        lhs.nonterminal != candidate->nonterminal) {
+      continue;
+    }
+
+    // Check whether the callbacks match.
+    if (lhs.callback == candidate->callback) {
+      return candidate->nonterminal;
+    }
+
+    // We can reuse if one of the output callbacks is not used.
+    if (lhs.callback.id == kNoCallback) {
+      return candidate->nonterminal;
+    } else if (candidate->callback.id == kNoCallback) {
+      // Old entry has no output callback, which is redundant now.
+      candidate->callback = lhs.callback;
+      return candidate->nonterminal;
+    }
+
+    // We can share the nonterminal, but we need to
+    // add a new output callback. Defer this as we might find a shareable
+    // nonterminal first.
+    shareable_nonterm = candidate->nonterminal;
+  }
+
+  // We didn't find a redundant entry, so create a new one.
+  shareable_nonterm = DefineNonterminal(shareable_nonterm);
+  lhs_set->push_back(Lhs{shareable_nonterm, lhs.callback, lhs.preconditions});
+  return shareable_nonterm;
+}
+
+Nonterm Ir::Add(const Lhs& lhs, const std::string& terminal,
+                const bool case_sensitive, const int shard) {
+  TC3_CHECK_LT(shard, shards_.size());
+  if (case_sensitive) {
+    return AddRule(lhs, terminal, &shards_[shard].terminal_rules);
+  } else {
+    return AddRule(lhs, terminal, &shards_[shard].lowercase_terminal_rules);
+  }
+}
+
+Nonterm Ir::Add(const Lhs& lhs, const std::vector<Nonterm>& rhs,
+                const int shard) {
+  // Add a new unary rule.
+  if (rhs.size() == 1) {
+    return Add(lhs, rhs.front(), shard);
+  }
+
+  // Add a chain of (rhs.size() - 1) binary rules.
+  Nonterm prev = rhs.front();
+  for (int i = 1; i < rhs.size() - 1; i++) {
+    prev = Add(kUnassignedNonterm, prev, rhs[i], shard);
+  }
+  return Add(lhs, prev, rhs.back(), shard);
+}
+
+Nonterm Ir::AddRegex(Nonterm lhs, const std::string& regex_pattern) {
+  lhs = DefineNonterminal(lhs);
+  regex_rules_.emplace_back(regex_pattern, lhs);
+  return lhs;
+}
+
+void Ir::AddAnnotation(const Nonterm lhs, const std::string& annotation) {
+  annotations_.emplace_back(annotation, lhs);
+}
+
 // Serializes the terminal rules table.
 void Ir::SerializeTerminalRules(
     RulesSetT* rules_set,
@@ -395,12 +411,7 @@
     output->callback.push_back(RulesSet_::CallbackEntry(
         filter_callback_id, RulesSet_::Callback(/*is_filter=*/true)));
   }
-  // Sort for binary search.
-  std::sort(
-      output->callback.begin(), output->callback.end(),
-      [](const RulesSet_::CallbackEntry& a, const RulesSet_::CallbackEntry& b) {
-        return a.key() < b.key();
-      });
+  SortStructsForBinarySearchLookup(&output->callback);
 
   // Add information about predefined nonterminal classes.
   output->nonterminals.reset(new RulesSet_::NonterminalsT);
@@ -418,20 +429,35 @@
       output->nonterminals->n_digits_nt[i - 1] = n_digits_nt;
     }
   }
+  for (const auto& [annotation, annotation_nt] : annotations_) {
+    output->nonterminals->annotation_nt.emplace_back(
+        new RulesSet_::Nonterminals_::AnnotationNtEntryT);
+    output->nonterminals->annotation_nt.back()->key = annotation;
+    output->nonterminals->annotation_nt.back()->value = annotation_nt;
+  }
+  SortForBinarySearchLookup(&output->nonterminals->annotation_nt);
 
   if (include_debug_information) {
     output->debug_information.reset(new RulesSet_::DebugInformationT);
     // Keep original non-terminal names.
-    for (auto it : nonterminal_names_) {
+    for (const auto& it : nonterminal_names_) {
       output->debug_information->nonterminal_names.emplace_back(
           new RulesSet_::DebugInformation_::NonterminalNamesEntryT);
       output->debug_information->nonterminal_names.back()->key = it.first;
       output->debug_information->nonterminal_names.back()->value = it.second;
     }
-    // Sort for binary search lookup.
-    std::sort(output->debug_information->nonterminal_names.begin(),
-              output->debug_information->nonterminal_names.end(),
-              [](const auto& a, const auto& b) { return a->key < b->key; });
+    SortForBinarySearchLookup(&output->debug_information->nonterminal_names);
+  }
+
+  // Add regex rules.
+  std::unique_ptr<ZlibCompressor> compressor = ZlibCompressor::Instance();
+  for (auto [pattern, lhs] : regex_rules_) {
+    output->regex_annotator.emplace_back(new RulesSet_::RegexAnnotatorT);
+    output->regex_annotator.back()->compressed_pattern.reset(
+        new CompressedBufferT);
+    compressor->Compress(
+        pattern, output->regex_annotator.back()->compressed_pattern.get());
+    output->regex_annotator.back()->nonterminal = lhs;
   }
 
   // Serialize the unary and binary rules.

diff --git a/native/utils/grammar/utils/ir.h b/native/utils/grammar/utils/ir.h
index ba6de72..b05b87f 100644
--- a/native/utils/grammar/utils/ir.h
+++ b/native/utils/grammar/utils/ir.h

@@ -83,6 +83,19 @@
   };
   using LhsSet = std::vector<Lhs>;
 
+  // A rules shard.
+  struct RulesShard {
+    // Terminal rules.
+    std::unordered_map<std::string, LhsSet> terminal_rules;
+    std::unordered_map<std::string, LhsSet> lowercase_terminal_rules;
+
+    // Unary rules.
+    std::unordered_map<Nonterm, LhsSet> unary_rules;
+
+    // Binary rules.
+    std::unordered_map<TwoNonterms, LhsSet, BinaryRuleHasher> binary_rules;
+  };
+
   explicit Ir(const std::unordered_set<CallbackId>& filters = {},
               const int num_shards = 1)
       : num_nonterminals_(0), filters_(filters), shards_(num_shards) {}
@@ -112,7 +125,7 @@
 
   // Gets the non-terminal for a given name, if it was previously defined.
   Nonterm GetNonterminalForName(const std::string& name) const {
-    const auto& it = nonterminal_ids_.find(name);
+    const auto it = nonterminal_ids_.find(name);
     if (it == nonterminal_ids_.end()) {
       return kUnassignedNonterm;
     }
@@ -121,27 +134,25 @@
 
   // Adds a terminal rule <lhs> ::= terminal.
   Nonterm Add(const Lhs& lhs, const std::string& terminal,
-              const bool case_sensitive = false, const int shard = 0);
+              bool case_sensitive = false, int shard = 0);
   Nonterm Add(const Nonterm lhs, const std::string& terminal,
-              const bool case_sensitive = false, const int shard = 0) {
+              bool case_sensitive = false, int shard = 0) {
     return Add(Lhs{lhs}, terminal, case_sensitive, shard);
   }
 
   // Adds a unary rule <lhs> ::= <rhs>.
-  Nonterm Add(const Lhs& lhs, const Nonterm rhs, const int shard = 0) {
+  Nonterm Add(const Lhs& lhs, Nonterm rhs, int shard = 0) {
     return AddRule(lhs, rhs, &shards_[shard].unary_rules);
   }
-  Nonterm Add(const Nonterm lhs, const Nonterm rhs, const int shard = 0) {
+  Nonterm Add(Nonterm lhs, Nonterm rhs, int shard = 0) {
     return Add(Lhs{lhs}, rhs, shard);
   }
 
   // Adds a binary rule <lhs> ::= <rhs_1> <rhs_2>.
-  Nonterm Add(const Lhs& lhs, const Nonterm rhs_1, const Nonterm rhs_2,
-              const int shard = 0) {
+  Nonterm Add(const Lhs& lhs, Nonterm rhs_1, Nonterm rhs_2, int shard = 0) {
     return AddRule(lhs, {rhs_1, rhs_2}, &shards_[shard].binary_rules);
   }
-  Nonterm Add(const Nonterm lhs, const Nonterm rhs_1, const Nonterm rhs_2,
-              const int shard = 0) {
+  Nonterm Add(Nonterm lhs, Nonterm rhs_1, Nonterm rhs_2, int shard = 0) {
     return Add(Lhs{lhs}, rhs_1, rhs_2, shard);
   }
 
@@ -153,34 +164,27 @@
   //     <temp_2> ::= <temp_1> <RHS_3>
   //     ...
   //     <LHS> ::= <temp_(k-1)> <RHS_k>
-  Nonterm Add(const Lhs& lhs, const std::vector<Nonterm>& rhs,
-              const int shard = 0);
-  Nonterm Add(const Nonterm lhs, const std::vector<Nonterm>& rhs,
-              const int shard = 0) {
+  Nonterm Add(const Lhs& lhs, const std::vector<Nonterm>& rhs, int shard = 0);
+  Nonterm Add(Nonterm lhs, const std::vector<Nonterm>& rhs, int shard = 0) {
     return Add(Lhs{lhs}, rhs, shard);
   }
 
+  // Adds a regex rule <lhs> ::= <regex_pattern>.
+  Nonterm AddRegex(Nonterm lhs, const std::string& regex_pattern);
+
+  // Adds a definition for a nonterminal provided by a text annotation.
+  void AddAnnotation(Nonterm lhs, const std::string& annotation);
+
   // Serializes a rule set in the intermediate representation into the
   // memory mappable inference format.
-  void Serialize(const bool include_debug_information, RulesSetT* output) const;
+  void Serialize(bool include_debug_information, RulesSetT* output) const;
 
   std::string SerializeAsFlatbuffer(
-      const bool include_debug_information = false) const;
+      bool include_debug_information = false) const;
+
+  const std::vector<RulesShard>& shards() const { return shards_; }
 
  private:
-  // A rules shard.
-  struct RulesShard {
-    // Terminal rules.
-    std::unordered_map<std::string, LhsSet> terminal_rules;
-    std::unordered_map<std::string, LhsSet> lowercase_terminal_rules;
-
-    // Unary rules.
-    std::unordered_map<Nonterm, LhsSet> unary_rules;
-
-    // Binary rules.
-    std::unordered_map<TwoNonterms, LhsSet, BinaryRuleHasher> binary_rules;
-  };
-
   template <typename R, typename H>
   Nonterm AddRule(const Lhs& lhs, const R& rhs,
                   std::unordered_map<R, LhsSet, H>* rules) {
@@ -216,6 +220,12 @@
   // The sharded rules.
   std::vector<RulesShard> shards_;
 
+  // The regex rules.
+  std::vector<std::pair<std::string, Nonterm>> regex_rules_;
+
+  // Mapping from annotation name to nonterminal.
+  std::vector<std::pair<std::string, Nonterm>> annotations_;
+
   // Debug information.
   std::unordered_map<Nonterm, std::string> nonterminal_names_;
   std::unordered_map<std::string, Nonterm> nonterminal_ids_;

diff --git a/native/utils/grammar/utils/ir_test.cc b/native/utils/grammar/utils/ir_test.cc
new file mode 100644
index 0000000..d2438dd
--- /dev/null
+++ b/native/utils/grammar/utils/ir_test.cc

@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/utils/ir.h"
+
+#include "utils/grammar/rules_generated.h"
+#include "utils/grammar/types.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+TEST(IrTest, HandlesSharingWithTerminalRules) {
+  Ir ir;
+
+  // <t1> ::= the
+  const Nonterm t1 = ir.Add(kUnassignedNonterm, "the");
+
+  // <t2> ::= quick
+  const Nonterm t2 = ir.Add(kUnassignedNonterm, "quick");
+
+  // <t3> ::= quick    -- should share with <t2>
+  const Nonterm t3 = ir.Add(kUnassignedNonterm, "quick");
+
+  // <t4> ::= quick    -- specify unshareable <t4>
+  // <t4> ::= brown
+  const Nonterm t4_unshareable = ir.AddUnshareableNonterminal();
+  ir.Add(t4_unshareable, "quick");
+  ir.Add(t4_unshareable, "brown");
+
+  // <t5> ::= brown    -- should not be shared with <t4>
+  const Nonterm t5 = ir.Add(kUnassignedNonterm, "brown");
+
+  // <t6> ::= brown    -- specify unshareable <t6>
+  const Nonterm t6_unshareable = ir.AddUnshareableNonterminal();
+  ir.Add(t6_unshareable, "brown");
+
+  // <t7> ::= brown    -- should share with <t5>
+  const Nonterm t7 = ir.Add(kUnassignedNonterm, "brown");
+
+  EXPECT_THAT(t1, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t2, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t1, Ne(t2));
+  EXPECT_THAT(t2, Eq(t3));
+  EXPECT_THAT(t4_unshareable, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t4_unshareable, Ne(t3));
+  EXPECT_THAT(t4_unshareable, Ne(t5));
+  EXPECT_THAT(t6_unshareable, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t6_unshareable, Ne(t4_unshareable));
+  EXPECT_THAT(t6_unshareable, Ne(t5));
+  EXPECT_THAT(t7, Eq(t5));
+}
+
+TEST(IrTest, HandlesSharingWithNonterminalRules) {
+  Ir ir;
+
+  // Setup a few terminal rules.
+  const std::vector<Nonterm> rhs = {
+      ir.Add(kUnassignedNonterm, "the"), ir.Add(kUnassignedNonterm, "quick"),
+      ir.Add(kUnassignedNonterm, "brown"), ir.Add(kUnassignedNonterm, "fox")};
+
+  // Check for proper sharing using nonterminal rules.
+  for (int rhs_length = 1; rhs_length <= rhs.size(); rhs_length++) {
+    std::vector<Nonterm> rhs_truncated = rhs;
+    rhs_truncated.resize(rhs_length);
+    const Nonterm nt_u = ir.AddUnshareableNonterminal();
+    ir.Add(nt_u, rhs_truncated);
+    const Nonterm nt_1 = ir.Add(kUnassignedNonterm, rhs_truncated);
+    const Nonterm nt_2 = ir.Add(kUnassignedNonterm, rhs_truncated);
+
+    EXPECT_THAT(nt_1, Eq(nt_2));
+    EXPECT_THAT(nt_1, Ne(nt_u));
+  }
+}
+
+TEST(IrTest, HandlesSharingWithCallbacksWithSameParameters) {
+  // Test sharing in the presence of callbacks.
+  constexpr CallbackId kOutput1 = 1;
+  constexpr CallbackId kOutput2 = 2;
+  constexpr CallbackId kFilter1 = 3;
+  constexpr CallbackId kFilter2 = 4;
+  Ir ir(/*filters=*/{kFilter1, kFilter2});
+
+  const Nonterm x1 = ir.Add(kUnassignedNonterm, "hello");
+  const Nonterm x2 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput1, 0}}, "hello");
+  const Nonterm x3 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter1, 0}}, "hello");
+  const Nonterm x4 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput2, 0}}, "hello");
+  const Nonterm x5 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter2, 0}}, "hello");
+
+  // Duplicate entry.
+  const Nonterm x6 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput2, 0}}, "hello");
+
+  EXPECT_THAT(x2, Eq(x1));
+  EXPECT_THAT(x3, Ne(x1));
+  EXPECT_THAT(x4, Eq(x1));
+  EXPECT_THAT(x5, Ne(x1));
+  EXPECT_THAT(x5, Ne(x3));
+  EXPECT_THAT(x6, Ne(x3));
+}
+
+TEST(IrTest, HandlesSharingWithCallbacksWithDifferentParameters) {
+  // Test sharing in the presence of callbacks.
+  constexpr CallbackId kOutput = 1;
+  constexpr CallbackId kFilter = 2;
+  Ir ir(/*filters=*/{kFilter});
+
+  const Nonterm x1 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput, 0}}, "world");
+  const Nonterm x2 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput, 1}}, "world");
+  const Nonterm x3 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter, 0}}, "world");
+  const Nonterm x4 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter, 1}}, "world");
+
+  EXPECT_THAT(x2, Eq(x1));
+  EXPECT_THAT(x3, Ne(x1));
+  EXPECT_THAT(x4, Ne(x1));
+  EXPECT_THAT(x4, Ne(x3));
+}
+
+TEST(IrTest, SerializesRulesToFlatbufferFormat) {
+  constexpr CallbackId kOutput = 1;
+  Ir ir;
+  const Nonterm verb = ir.AddUnshareableNonterminal();
+  ir.Add(verb, "buy");
+  ir.Add(Ir::Lhs{verb, {kOutput}}, "bring");
+  ir.Add(verb, "upbring");
+  ir.Add(verb, "remind");
+  const Nonterm set_reminder = ir.AddUnshareableNonterminal();
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "remind"),
+                              ir.Add(kUnassignedNonterm, "me"),
+                              ir.Add(kUnassignedNonterm, "to"), verb});
+  const Nonterm action = ir.AddUnshareableNonterminal();
+  ir.Add(action, set_reminder);
+  RulesSetT rules;
+  ir.Serialize(/*include_debug_information=*/false, &rules);
+
+  EXPECT_THAT(rules.rules, SizeIs(1));
+
+  // Only one rule uses a callback, the rest will be encoded directly.
+  EXPECT_THAT(rules.lhs, SizeIs(1));
+  EXPECT_THAT(rules.lhs.front().callback_id(), kOutput);
+
+  // 6 distinct terminals: "buy", "upbring", "bring", "remind", "me" and "to".
+  EXPECT_THAT(rules.rules.front()->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(6));
+  EXPECT_THAT(rules.rules.front()->terminal_rules->terminal_offsets, IsEmpty());
+
+  // As "bring" is a suffix of "upbring" it is expected to be suffix merged in
+  // the string pool
+  EXPECT_THAT(rules.terminals,
+              Eq(std::string("buy\0me\0remind\0to\0upbring\0", 25)));
+
+  EXPECT_THAT(rules.rules.front()->binary_rules, SizeIs(3));
+
+  // One unary rule: <action> ::= <set_reminder>
+  EXPECT_THAT(rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(IrTest, HandlesRulesSharding) {
+  Ir ir(/*filters=*/{}, /*num_shards=*/2);
+  const Nonterm verb = ir.AddUnshareableNonterminal();
+  const Nonterm set_reminder = ir.AddUnshareableNonterminal();
+
+  // Shard 0: en
+  ir.Add(verb, "buy");
+  ir.Add(verb, "bring");
+  ir.Add(verb, "remind");
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "remind"),
+                              ir.Add(kUnassignedNonterm, "me"),
+                              ir.Add(kUnassignedNonterm, "to"), verb});
+
+  // Shard 1: de
+  ir.Add(verb, "kaufen", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(verb, "bringen", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(verb, "erinnern", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "erinnere",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              ir.Add(kUnassignedNonterm, "mich",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              ir.Add(kUnassignedNonterm, "zu",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              verb},
+         /*shard=*/1);
+
+  // Test that terminal strings are correctly merged into the shared
+  // string pool.
+  RulesSetT rules;
+  ir.Serialize(/*include_debug_information=*/false, &rules);
+
+  EXPECT_THAT(rules.rules, SizeIs(2));
+
+  // 5 distinct terminals: "buy", "bring", "remind", "me" and "to".
+  EXPECT_THAT(rules.rules[0]->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(5));
+  EXPECT_THAT(rules.rules[0]->terminal_rules->terminal_offsets, IsEmpty());
+
+  // 6 distinct terminals: "kaufen", "bringen", "erinnern", "erinnere", "mich"
+  // and "zu".
+  EXPECT_THAT(rules.rules[1]->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(6));
+  EXPECT_THAT(rules.rules[1]->terminal_rules->terminal_offsets, IsEmpty());
+
+  EXPECT_THAT(rules.terminals,
+              Eq(std::string("bring\0bringen\0buy\0erinnere\0erinnern\0kaufen\0"
+                             "me\0mich\0remind\0to\0zu\0",
+                             64)));
+
+  EXPECT_THAT(rules.rules[0]->binary_rules, SizeIs(3));
+  EXPECT_THAT(rules.rules[1]->binary_rules, SizeIs(3));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/utils/rules.cc b/native/utils/grammar/utils/rules.cc
index 4dbab61..d6e4b76 100644
--- a/native/utils/grammar/utils/rules.cc
+++ b/native/utils/grammar/utils/rules.cc

@@ -20,6 +20,7 @@
 
 #include "utils/grammar/utils/ir.h"
 #include "utils/strings/append.h"
+#include "utils/strings/stringpiece.h"
 
 namespace libtextclassifier3::grammar {
 namespace {
@@ -64,17 +65,35 @@
       return false;
     }
   }
+
+  // Check that all parts of an exclusion are defined.
+  if (rule.callback == static_cast<CallbackId>(DefaultCallback::kExclusion)) {
+    if (GetAssignedIdForNonterminal(rule.callback_param, nonterminals) ==
+        kUnassignedNonterm) {
+      return false;
+    }
+  }
+
   return true;
 }
 
 // Lowers a single high-level rule down into the intermediate representation.
 void LowerRule(const int lhs_index, const Rules::Rule& rule,
                std::unordered_map<int, Nonterm>* nonterminals, Ir* ir) {
+  const CallbackId callback = rule.callback;
+  int64 callback_param = rule.callback_param;
+
+  // Resolve id of excluded nonterminal in exclusion rules.
+  if (callback == static_cast<CallbackId>(DefaultCallback::kExclusion)) {
+    callback_param = GetAssignedIdForNonterminal(callback_param, *nonterminals);
+    TC3_CHECK_NE(callback_param, kUnassignedNonterm);
+  }
+
   // Special case for terminal rules.
   if (rule.rhs.size() == 1 && rule.rhs.front().is_terminal) {
     (*nonterminals)[lhs_index] =
         ir->Add(Ir::Lhs{GetAssignedIdForNonterminal(lhs_index, *nonterminals),
-                        /*callback=*/{rule.callback, rule.callback_param},
+                        /*callback=*/{callback, callback_param},
                         /*preconditions=*/{rule.max_whitespace_gap}},
                 rule.rhs.front().terminal, rule.case_sensitive, rule.shard);
     return;
@@ -96,11 +115,10 @@
   }
   (*nonterminals)[lhs_index] =
       ir->Add(Ir::Lhs{GetAssignedIdForNonterminal(lhs_index, *nonterminals),
-                      /*callback=*/{rule.callback, rule.callback_param},
+                      /*callback=*/{callback, callback_param},
                       /*preconditions=*/{rule.max_whitespace_gap}},
               rhs_nonterms, rule.shard);
 }
-
 // Check whether this component is a non-terminal.
 bool IsNonterminal(StringPiece rhs_component) {
   return rhs_component[0] == '<' &&
@@ -119,18 +137,61 @@
 
 }  // namespace
 
-int Rules::AddNonterminal(StringPiece nonterminal_name) {
-  const std::string key = nonterminal_name.ToString();
+int Rules::AddNonterminal(const std::string& nonterminal_name) {
+  std::string key = nonterminal_name;
+  auto alias_it = nonterminal_alias_.find(key);
+  if (alias_it != nonterminal_alias_.end()) {
+    key = alias_it->second;
+  }
   auto it = nonterminal_names_.find(key);
   if (it != nonterminal_names_.end()) {
     return it->second;
   }
   const int index = nonterminals_.size();
-  nonterminals_.push_back(NontermInfo{nonterminal_name.ToString()});
+  nonterminals_.push_back(NontermInfo{key});
   nonterminal_names_.insert(it, {key, index});
   return index;
 }
 
+int Rules::AddNewNonterminal() {
+  const int index = nonterminals_.size();
+  nonterminals_.push_back(NontermInfo{});
+  return index;
+}
+
+void Rules::AddAlias(const std::string& nonterminal_name,
+                     const std::string& alias) {
+  TC3_CHECK_EQ(nonterminal_alias_.insert_or_assign(alias, nonterminal_name)
+                   .first->second,
+               nonterminal_name)
+      << "Cannot redefine alias: " << alias;
+}
+
+// Defines a nonterminal for an externally provided annotation.
+int Rules::AddAnnotation(const std::string& annotation_name) {
+  auto [it, inserted] =
+      annotation_nonterminals_.insert({annotation_name, nonterminals_.size()});
+  if (inserted) {
+    nonterminals_.push_back(NontermInfo{});
+  }
+  return it->second;
+}
+
+void Rules::BindAnnotation(const std::string& nonterminal_name,
+                           const std::string& annotation_name) {
+  auto [_, inserted] = annotation_nonterminals_.insert(
+      {annotation_name, AddNonterminal(nonterminal_name)});
+  TC3_CHECK(inserted);
+}
+
+bool Rules::IsNonterminalOfName(const RhsElement& element,
+                                const std::string& nonterminal) const {
+  if (element.is_terminal) {
+    return false;
+  }
+  return (nonterminals_[element.nonterminal].name == nonterminal);
+}
+
 // Note: For k optional components this creates 2^k rules, but it would be
 // possible to be smarter about this and only use 2k rules instead.
 // However that might be slower as it requires an extra rule firing at match
@@ -176,41 +237,155 @@
                   optional_element_indices_end, omit_these);
 }
 
-void Rules::Add(StringPiece lhs, const std::vector<std::string>& rhs,
+std::vector<Rules::RhsElement> Rules::ResolveAnchors(
+    const std::vector<RhsElement>& rhs) const {
+  if (rhs.size() <= 2) {
+    return rhs;
+  }
+  auto begin = rhs.begin();
+  auto end = rhs.end();
+  if (IsNonterminalOfName(rhs.front(), kStartNonterm) &&
+      IsNonterminalOfName(rhs[1], kFiller)) {
+    // Skip start anchor and filler.
+    begin += 2;
+  }
+  if (IsNonterminalOfName(rhs.back(), kEndNonterm) &&
+      IsNonterminalOfName(rhs[rhs.size() - 2], kFiller)) {
+    // Skip filler and end anchor.
+    end -= 2;
+  }
+  return std::vector<Rules::RhsElement>(begin, end);
+}
+
+std::vector<Rules::RhsElement> Rules::ResolveFillers(
+    const std::vector<RhsElement>& rhs) {
+  std::vector<RhsElement> result;
+  for (int i = 0; i < rhs.size();) {
+    if (i == rhs.size() - 1 || IsNonterminalOfName(rhs[i], kFiller) ||
+        rhs[i].is_optional || !IsNonterminalOfName(rhs[i + 1], kFiller)) {
+      result.push_back(rhs[i]);
+      i++;
+      continue;
+    }
+
+    // We have the case:
+    // <a> <filler>
+    // rewrite as:
+    // <a_with_tokens> ::= <a>
+    // <a_with_tokens> ::= <a_with_tokens> <token>
+    const int with_tokens_nonterminal = AddNewNonterminal();
+    const RhsElement token(AddNonterminal(kTokenNonterm),
+                           /*is_optional=*/false);
+    if (rhs[i + 1].is_optional) {
+      // <a_with_tokens> ::= <a>
+      Add(with_tokens_nonterminal, {rhs[i]});
+    } else {
+      // <a_with_tokens> ::= <a> <token>
+      Add(with_tokens_nonterminal, {rhs[i], token});
+    }
+    // <a_with_tokens> ::= <a_with_tokens> <token>
+    const RhsElement with_tokens(with_tokens_nonterminal,
+                                 /*is_optional=*/false);
+    Add(with_tokens_nonterminal, {with_tokens, token});
+    result.push_back(with_tokens);
+    i += 2;
+  }
+  return result;
+}
+
+std::vector<Rules::RhsElement> Rules::OptimizeRhs(
+    const std::vector<RhsElement>& rhs) {
+  return ResolveFillers(ResolveAnchors(rhs));
+}
+
+void Rules::Add(const int lhs, const std::vector<RhsElement>& rhs,
+                const CallbackId callback, const int64 callback_param,
+                const int8 max_whitespace_gap, const bool case_sensitive,
+                const int shard) {
+  // Resolve anchors and fillers.
+  const std::vector optimized_rhs = OptimizeRhs(rhs);
+
+  std::vector<int> optional_element_indices;
+  TC3_CHECK_LT(optional_element_indices.size(), optimized_rhs.size())
+      << "Rhs must contain at least one non-optional element.";
+  for (int i = 0; i < optimized_rhs.size(); i++) {
+    if (optimized_rhs[i].is_optional) {
+      optional_element_indices.push_back(i);
+    }
+  }
+  std::vector<bool> omit_these(optimized_rhs.size(), false);
+  ExpandOptionals(lhs, optimized_rhs, callback, callback_param,
+                  max_whitespace_gap, case_sensitive, shard,
+                  optional_element_indices.begin(),
+                  optional_element_indices.end(), &omit_these);
+}
+
+void Rules::Add(const std::string& lhs, const std::vector<std::string>& rhs,
                 const CallbackId callback, const int64 callback_param,
                 const int8 max_whitespace_gap, const bool case_sensitive,
                 const int shard) {
   TC3_CHECK(!rhs.empty()) << "Rhs cannot be empty (Lhs=" << lhs << ")";
-  TC3_CHECK(!IsPredefinedNonterminal(lhs.ToString()));
-
+  TC3_CHECK(!IsPredefinedNonterminal(lhs));
   std::vector<RhsElement> rhs_elements;
-  std::vector<int> optional_element_indices;
+  rhs_elements.reserve(rhs.size());
   for (StringPiece rhs_component : rhs) {
     // Check whether this component is optional.
+    bool is_optional = false;
     if (rhs_component[rhs_component.size() - 1] == '?') {
-      optional_element_indices.push_back(rhs_elements.size());
       rhs_component.RemoveSuffix(1);
+      is_optional = true;
     }
-
     // Check whether this component is a non-terminal.
     if (IsNonterminal(rhs_component)) {
-      rhs_elements.push_back(RhsElement(AddNonterminal(rhs_component)));
+      rhs_elements.push_back(
+          RhsElement(AddNonterminal(rhs_component.ToString()), is_optional));
     } else {
       // A terminal.
       // Sanity check for common typos -- '<' or '>' in a terminal.
       ValidateTerminal(rhs_component);
-      rhs_elements.push_back(RhsElement(rhs_component.ToString()));
+      rhs_elements.push_back(RhsElement(rhs_component.ToString(), is_optional));
     }
   }
+  Add(AddNonterminal(lhs), rhs_elements, callback, callback_param,
+      max_whitespace_gap, case_sensitive, shard);
+}
 
-  TC3_CHECK_LT(optional_element_indices.size(), rhs_elements.size())
-      << "Rhs must contain at least one non-optional element.";
+void Rules::AddWithExclusion(const std::string& lhs,
+                             const std::vector<std::string>& rhs,
+                             const std::string& excluded_nonterminal,
+                             const int8 max_whitespace_gap,
+                             const bool case_sensitive, const int shard) {
+  Add(lhs, rhs,
+      /*callback=*/static_cast<CallbackId>(DefaultCallback::kExclusion),
+      /*callback_param=*/AddNonterminal(excluded_nonterminal),
+      max_whitespace_gap, case_sensitive, shard);
+}
 
-  std::vector<bool> omit_these(rhs_elements.size(), false);
-  ExpandOptionals(AddNonterminal(lhs), rhs_elements, callback, callback_param,
-                  max_whitespace_gap, case_sensitive, shard,
-                  optional_element_indices.begin(),
-                  optional_element_indices.end(), &omit_these);
+void Rules::AddAssertion(const std::string& lhs,
+                         const std::vector<std::string>& rhs,
+                         const bool negative, const int8 max_whitespace_gap,
+                         const bool case_sensitive, const int shard) {
+  Add(lhs, rhs,
+      /*callback=*/static_cast<CallbackId>(DefaultCallback::kAssertion),
+      /*callback_param=*/negative, max_whitespace_gap, case_sensitive, shard);
+}
+
+void Rules::AddValueMapping(const std::string& lhs,
+                            const std::vector<std::string>& rhs,
+                            const int64 value, const int8 max_whitespace_gap,
+                            const bool case_sensitive, const int shard) {
+  Add(lhs, rhs,
+      /*callback=*/static_cast<CallbackId>(DefaultCallback::kMapping),
+      /*callback_param=*/value, max_whitespace_gap, case_sensitive, shard);
+}
+
+void Rules::AddRegex(const std::string& lhs, const std::string& regex_pattern) {
+  AddRegex(AddNonterminal(lhs), regex_pattern);
+}
+
+void Rules::AddRegex(int lhs, const std::string& regex_pattern) {
+  nonterminals_[lhs].regex_rules.push_back(regex_rules_.size());
+  regex_rules_.push_back(regex_pattern);
 }
 
 Ir Rules::Finalize(const std::set<std::string>& predefined_nonterminals) const {
@@ -221,7 +396,7 @@
   std::set<std::pair<int, int>> scheduled_rules;
 
   // Define all used predefined nonterminals.
-  for (const auto it : nonterminal_names_) {
+  for (const auto& it : nonterminal_names_) {
     if (IsPredefinedNonterminal(it.first) ||
         predefined_nonterminals.find(it.first) !=
             predefined_nonterminals.end()) {
@@ -233,7 +408,9 @@
   // multiple rules or that have a filter callback on some rule.
   for (int i = 0; i < nonterminals_.size(); i++) {
     const NontermInfo& nonterminal = nonterminals_[i];
-    bool unmergeable = (nonterminal.rules.size() > 1);
+    bool unmergeable =
+        (nonterminal.from_annotation || nonterminal.rules.size() > 1 ||
+         !nonterminal.regex_rules.empty());
     for (const int rule_index : nonterminal.rules) {
       const Rule& rule = rules_[rule_index];
 
@@ -245,12 +422,23 @@
         unmergeable = true;
       }
     }
+
     if (unmergeable) {
       // Define unique nonterminal id.
       nonterminal_ids[i] = rules.AddUnshareableNonterminal(nonterminal.name);
     } else {
       nonterminal_ids[i] = rules.AddNonterminal(nonterminal.name);
     }
+
+    // Define regex rules.
+    for (const int regex_rule : nonterminal.regex_rules) {
+      rules.AddRegex(nonterminal_ids[i], regex_rules_[regex_rule]);
+    }
+  }
+
+  // Define annotations.
+  for (const auto& [annotation, nonterminal] : annotation_nonterminals_) {
+    rules.AddAnnotation(nonterminal_ids[nonterminal], annotation);
   }
 
   // Now, keep adding eligible rules (rules whose rhs is completely assigned)

diff --git a/native/utils/grammar/utils/rules.h b/native/utils/grammar/utils/rules.h
index 42fa7cd..5a2cbc2 100644
--- a/native/utils/grammar/utils/rules.h
+++ b/native/utils/grammar/utils/rules.h

@@ -14,6 +14,9 @@
  * limitations under the License.
  */
 
+// Utility functions for pre-processing, creating and testing context free
+// grammars.
+
 #ifndef LIBTEXTCLASSIFIER_UTILS_GRAMMAR_UTILS_RULES_H_
 #define LIBTEXTCLASSIFIER_UTILS_GRAMMAR_UTILS_RULES_H_
 
@@ -22,13 +25,12 @@
 
 #include "utils/grammar/types.h"
 #include "utils/grammar/utils/ir.h"
-#include "utils/strings/stringpiece.h"
 
 namespace libtextclassifier3::grammar {
 
-// Utility functions for pre-processing, creating and testing context free
-// grammars.
-//
+// Special nonterminals.
+constexpr const char* kFiller = "<filler>";
+
 // All rules for a grammar will be collected in a rules object.
 //
 //    Rules r;
@@ -63,14 +65,16 @@
   // Represents one item in a right-hand side, a single terminal or nonterminal.
   struct RhsElement {
     RhsElement() {}
-    explicit RhsElement(const std::string& terminal)
-        : is_terminal(true), terminal(terminal) {}
-    explicit RhsElement(const int nonterminal)
-        : is_terminal(false), nonterminal(nonterminal) {}
-
+    explicit RhsElement(const std::string& terminal, const bool is_optional)
+        : is_terminal(true), terminal(terminal), is_optional(is_optional) {}
+    explicit RhsElement(const int nonterminal, const bool is_optional)
+        : is_terminal(false),
+          nonterminal(nonterminal),
+          is_optional(is_optional) {}
     bool is_terminal;
     std::string terminal;
     int nonterminal;
+    bool is_optional;
   };
 
   // Represents the right-hand side, and possibly callback, of one rule.
@@ -87,8 +91,14 @@
     // The name of the non-terminal, if defined.
     std::string name;
 
+    // Whether the nonterminal is provided via an annotation.
+    bool from_annotation = false;
+
     // Rules that have this non-terminal as the lhs.
     std::vector<int> rules;
+
+    // Regex rules that have this non-terminal as the lhs.
+    std::vector<int> regex_rules;
   };
 
   // Adds a rule `lhs ::= rhs` with the given callback id and parameter.
@@ -98,13 +108,58 @@
   //  * A terminal
   // optionally followed by a `?` which indicates that the component is
   // optional. The `rhs` must contain at least one non-optional component.
-  void Add(StringPiece lhs, const std::vector<std::string>& rhs,
+  void Add(const std::string& lhs, const std::vector<std::string>& rhs,
            const CallbackId callback = kNoCallback,
-           const int64 callback_param = 0, const int8 max_whitespace_gap = -1,
-           const bool case_sensitive = false, const int shard = 0);
+           const int64 callback_param = 0, int8 max_whitespace_gap = -1,
+           bool case_sensitive = false, int shard = 0);
+
+  // Adds a rule `lhs ::= rhs` with the given callback id and parameter.
+  // The `rhs` must contain at least one non-optional component.
+  void Add(int lhs, const std::vector<RhsElement>& rhs,
+           CallbackId callback = kNoCallback, int64 callback_param = 0,
+           int8 max_whitespace_gap = -1, bool case_sensitive = false,
+           int shard = 0);
+
+  // Adds a rule `lhs ::= rhs` with exclusion.
+  // The rule only matches, if `excluded_nonterminal` doesn't match the same
+  // span.
+  void AddWithExclusion(const std::string& lhs,
+                        const std::vector<std::string>& rhs,
+                        const std::string& excluded_nonterminal,
+                        int8 max_whitespace_gap = -1,
+                        bool case_sensitive = false, int shard = 0);
+
+  // Adds an assertion callback.
+  void AddAssertion(const std::string& lhs, const std::vector<std::string>& rhs,
+                    bool negative = true, int8 max_whitespace_gap = -1,
+                    bool case_sensitive = false, int shard = 0);
+
+  // Adds a mapping callback.
+  void AddValueMapping(const std::string& lhs,
+                       const std::vector<std::string>& rhs, int64 value,
+                       int8 max_whitespace_gap = -1,
+                       bool case_sensitive = false, int shard = 0);
+
+  // Adds a regex rule.
+  void AddRegex(const std::string& lhs, const std::string& regex_pattern);
+  void AddRegex(int lhs, const std::string& regex_pattern);
 
   // Creates a nonterminal with the given name, if one doesn't already exist.
-  int AddNonterminal(StringPiece nonterminal_name);
+  int AddNonterminal(const std::string& nonterminal_name);
+
+  // Creates a new nonterminal.
+  int AddNewNonterminal();
+
+  // Defines a nonterminal for an externally provided annotation.
+  int AddAnnotation(const std::string& annotation_name);
+
+  // Defines a nonterminal for an externally provided annotation.
+  void BindAnnotation(const std::string& nonterminal_name,
+                      const std::string& annotation_name);
+
+  // Adds an alias for a nonterminal. This is a separate name for the same
+  // nonterminal.
+  void AddAlias(const std::string& nonterminal_name, const std::string& alias);
 
   // Defines a new filter id.
   void DefineFilter(const CallbackId filter_id) { filters_.insert(filter_id); }
@@ -117,23 +172,49 @@
   Ir Finalize(const std::set<std::string>& predefined_nonterminals = {}) const;
 
  private:
-  // Expands optional components in rules.
   void ExpandOptionals(
-      const int lhs, const std::vector<RhsElement>& rhs,
-      const CallbackId callback, const int64 callback_param,
-      const int8 max_whitespace_gap, const bool case_sensitive, const int shard,
-      std::vector<int>::const_iterator optional_element_indices,
+      int lhs, const std::vector<RhsElement>& rhs, CallbackId callback,
+      int64 callback_param, int8 max_whitespace_gap, bool case_sensitive,
+      int shard, std::vector<int>::const_iterator optional_element_indices,
       std::vector<int>::const_iterator optional_element_indices_end,
       std::vector<bool>* omit_these);
 
+  // Applies optimizations to the right hand side of a rule.
+  std::vector<RhsElement> OptimizeRhs(const std::vector<RhsElement>& rhs);
+
+  // Removes start and end anchors in case they are followed (respectively
+  // preceded) by unbounded filler.
+  std::vector<RhsElement> ResolveAnchors(
+      const std::vector<RhsElement>& rhs) const;
+
+  // Rewrites fillers in a rule.
+  // Fillers in a rule such as `lhs ::= <a> <filler> <b>` could be lowered as
+  // <tokens> ::= <token>
+  // <tokens> ::= <tokens> <token>
+  // This has the disadvantage that it will produce a match for each possible
+  // span in the text, which is quadratic in the number of tokens.
+  // It can be more efficiently written as:
+  // `lhs ::= <a_with_tokens> <b>` with
+  // `<a_with_tokens> ::= <a>`
+  // `<a_with_tokens> ::= <a_with_tokens> <token>`
+  // In this each occurrence of `<a>` can start a sequence of tokens.
+  std::vector<RhsElement> ResolveFillers(const std::vector<RhsElement>& rhs);
+
+  // Checks whether an element denotes a specific nonterminal.
+  bool IsNonterminalOfName(const RhsElement& element,
+                           const std::string& nonterminal) const;
+
   const int num_shards_;
 
   // Non-terminal to id map.
   std::unordered_map<std::string, int> nonterminal_names_;
   std::vector<NontermInfo> nonterminals_;
+  std::unordered_map<std::string, std::string> nonterminal_alias_;
+  std::unordered_map<std::string, int> annotation_nonterminals_;
 
   // Rules.
   std::vector<Rule> rules_;
+  std::vector<std::string> regex_rules_;
 
   // Ids of callbacks that should be treated as filters.
   std::unordered_set<CallbackId> filters_;

diff --git a/native/utils/grammar/utils/rules_test.cc b/native/utils/grammar/utils/rules_test.cc
new file mode 100644
index 0000000..6761118
--- /dev/null
+++ b/native/utils/grammar/utils/rules_test.cc

@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/utils/rules.h"
+
+#include "utils/grammar/rules_generated.h"
+#include "utils/grammar/utils/ir.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(SerializeRulesTest, HandlesSimpleRuleSet) {
+  Rules rules;
+
+  rules.Add("<verb>", {"buy"});
+  rules.Add("<verb>", {"bring"});
+  rules.Add("<verb>", {"remind"});
+  rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
+  rules.Add("<action>", {"<reminder>"});
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+  EXPECT_EQ(frozen_rules.terminals,
+            std::string("bring\0buy\0me\0remind\0to\0", 23));
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesRulesSetWithCallbacks) {
+  Rules rules;
+  const CallbackId output = 1;
+  const CallbackId filter = 2;
+  rules.DefineFilter(filter);
+
+  rules.Add("<verb>", {"buy"});
+  rules.Add("<verb>", {"bring"}, output, 0);
+  rules.Add("<verb>", {"remind"}, output, 0);
+  rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
+  rules.Add("<action>", {"<reminder>"}, filter, 0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals,
+            std::string("bring\0buy\0me\0remind\0to\0", 23));
+
+  // We have two identical output calls and one filter call in the rule set
+  // definition above.
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(2));
+
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesRulesWithWhitespaceGapLimits) {
+  Rules rules;
+  rules.Add("<iata>", {"lx"});
+  rules.Add("<iata>", {"aa"});
+  rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
+            /*max_whitespace_gap=*/0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesCaseSensitiveTerminals) {
+  Rules rules;
+  rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true);
+  rules.Add("<iata>", {"AA"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true);
+  rules.Add("<iata>", {"dl"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/false);
+  rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
+            /*max_whitespace_gap=*/0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("AA\0LX\0dl\0", 9));
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesMultipleShards) {
+  Rules rules(/*num_shards=*/2);
+  rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true, /*shard=*/0);
+  rules.Add("<iata>", {"aa"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/false, /*shard=*/1);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(2));
+  EXPECT_EQ(frozen_rules.terminals, std::string("LX\0aa\0", 6));
+}
+
+TEST(SerializeRulesTest, HandlesRegexRules) {
+  Rules rules;
+  rules.AddRegex("<code>", "[A-Z]+");
+  rules.AddRegex("<numbers>", "\\d+");
+  RulesSetT frozen_rules;
+  rules.Finalize().Serialize(/*include_debug_information=*/false,
+                             &frozen_rules);
+  EXPECT_THAT(frozen_rules.regex_annotator, SizeIs(2));
+}
+
+TEST(SerializeRulesTest, HandlesAlias) {
+  Rules rules;
+  rules.Add("<iata>", {"lx"});
+  rules.Add("<iata>", {"aa"});
+  rules.Add("<flight>", {"<iata>", "<4_digits>"});
+  rules.AddAlias("<flight_number>", "<flight>");
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(1));
+
+  // Only alias, no rule.
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
+
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+}
+
+TEST(SerializeRulesTest, ResolvesAnchorsAndFillers) {
+  Rules rules;
+  rules.Add("<code>",
+            {"<^>", "<filler>", "this", "is", "a", "test", "<filler>", "<$>"});
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("a\0test\0this\0", 12));
+
+  // Expect removal of anchors and fillers in this case.
+  // The rule above is equivalent to: <code> ::= this is a test, binarized into
+  // <tmp_0> ::= this is
+  // <tmp_1> ::= <tmp_0> a
+  // <code>  ::= <tmp_1> test
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+}
+
+TEST(SerializeRulesTest, HandlesAnnotations) {
+  Rules rules;
+  rules.AddAnnotation("phone");
+  rules.AddAnnotation("url");
+  rules.AddAnnotation("tracking_number");
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_THAT(frozen_rules.nonterminals->annotation_nt, SizeIs(3));
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[0]->key, "phone");
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[1]->key,
+            "tracking_number");
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[2]->key, "url");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/i18n/locale_test.cc b/native/utils/i18n/locale_test.cc
new file mode 100644
index 0000000..faea4f6
--- /dev/null
+++ b/native/utils/i18n/locale_test.cc

@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/i18n/locale.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(LocaleTest, ParseUnknown) {
+  Locale locale = Locale::Invalid();
+  EXPECT_FALSE(locale.IsValid());
+}
+
+TEST(LocaleTest, ParseSwissEnglish) {
+  Locale locale = Locale::FromBCP47("en-CH");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "en");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "CH");
+}
+
+TEST(LocaleTest, ParseChineseChina) {
+  Locale locale = Locale::FromBCP47("zh-CN");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "CN");
+}
+
+TEST(LocaleTest, ParseChineseTaiwan) {
+  Locale locale = Locale::FromBCP47("zh-Hant-TW");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "Hant");
+  EXPECT_EQ(locale.Region(), "TW");
+}
+
+TEST(LocaleTest, ParseEnglish) {
+  Locale locale = Locale::FromBCP47("en");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "en");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "");
+}
+
+TEST(LocaleTest, ParseCineseTraditional) {
+  Locale locale = Locale::FromBCP47("zh-Hant");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "Hant");
+  EXPECT_EQ(locale.Region(), "");
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedMatch) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-HK"),
+                                 Locale::FromBCP47("en-UK")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                           /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedNotMatch) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-tw")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en"),
+                                           Locale::FromBCP47("fr")};
+
+  EXPECT_FALSE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                            /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedAnyLocale) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-tw")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("*")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                           /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedEmptyLocales) {
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported({}, supported_locales,
+                                           /*default_value=*/true));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/intents/intent-config.fbs b/native/utils/intents/intent-config.fbs
index 76a0ddc..672eb9d 100755
--- a/native/utils/intents/intent-config.fbs
+++ b/native/utils/intents/intent-config.fbs

@@ -132,7 +132,6 @@
   type:AndroidSimpleIntentGeneratorExtraType;
 
   string_:string (shared);
-
   bool_:bool;
   int32_:int;
 }
@@ -141,9 +140,7 @@
 namespace libtextclassifier3;
 table AndroidSimpleIntentGeneratorCondition {
   type:AndroidSimpleIntentGeneratorConditionType;
-
   string_:string (shared);
-
   int32_:int;
   int64_:long;
 }

diff --git a/native/utils/intents/jni.cc b/native/utils/intents/jni.cc
index ca4cc28..051d078 100644
--- a/native/utils/intents/jni.cc
+++ b/native/utils/intents/jni.cc

@@ -175,40 +175,41 @@
     case Variant::TYPE_INT_VALUE:
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_int_, name.get(),
-                                  value.IntValue());
+                                  value.Value<int>());
 
     case Variant::TYPE_INT64_VALUE:
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_long_, name.get(),
-                                  value.Int64Value());
+                                  value.Value<int64>());
 
     case Variant::TYPE_FLOAT_VALUE:
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_float_, name.get(),
-                                  value.FloatValue());
+                                  value.Value<float>());
 
     case Variant::TYPE_DOUBLE_VALUE:
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_double_, name.get(),
-                                  value.DoubleValue());
+                                  value.Value<double>());
 
     case Variant::TYPE_BOOL_VALUE:
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_bool_, name.get(),
-                                  value.BoolValue());
+                                  value.Value<bool>());
 
     case Variant::TYPE_STRING_VALUE: {
       TC3_ASSIGN_OR_RETURN(
           ScopedLocalRef<jstring> value_jstring,
-          jni_cache_->ConvertToJavaString(value.StringValue()));
+          jni_cache_->ConvertToJavaString(value.ConstRefValue<std::string>()));
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_string_, name.get(),
                                   value_jstring.get());
     }
 
     case Variant::TYPE_STRING_VECTOR_VALUE: {
-      TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jobjectArray> value_jstring_array,
-                           AsStringArray(value.StringVectorValue()));
+      TC3_ASSIGN_OR_RETURN(
+          ScopedLocalRef<jobjectArray> value_jstring_array,
+          AsStringArray(value.ConstRefValue<std::vector<std::string>>()));
 
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_string_array_, name.get(),
@@ -216,8 +217,9 @@
     }
 
     case Variant::TYPE_FLOAT_VECTOR_VALUE: {
-      TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jfloatArray> value_jfloat_array,
-                           AsFloatArray(value.FloatVectorValue()));
+      TC3_ASSIGN_OR_RETURN(
+          ScopedLocalRef<jfloatArray> value_jfloat_array,
+          AsFloatArray(value.ConstRefValue<std::vector<float>>()));
 
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_float_array_, name.get(),
@@ -226,7 +228,7 @@
 
     case Variant::TYPE_INT_VECTOR_VALUE: {
       TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jintArray> value_jint_array,
-                           AsIntArray(value.IntVectorValue()));
+                           AsIntArray(value.ConstRefValue<std::vector<int>>()));
 
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_int_array_, name.get(),
@@ -234,8 +236,10 @@
     }
 
     case Variant::TYPE_STRING_VARIANT_MAP_VALUE: {
-      TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jobjectArray> value_jobect_array,
-                           AsNamedVariantArray(value.StringVariantMapValue()));
+      TC3_ASSIGN_OR_RETURN(
+          ScopedLocalRef<jobjectArray> value_jobect_array,
+          AsNamedVariantArray(
+              value.ConstRefValue<std::map<std::string, Variant>>()));
       return JniHelper::NewObject(env, named_variant_class_.get(),
                                   named_variant_from_named_variant_array_,
                                   name.get(), value_jobect_array.get());
@@ -263,7 +267,7 @@
       JniHelper::NewObjectArray(jni_cache_->GetEnv(), values.size(),
                                 named_variant_class_.get(), nullptr));
   int element_index = 0;
-  for (auto key_value_pair : values) {
+  for (const auto& key_value_pair : values) {
     if (!key_value_pair.second.HasValue()) {
       element_index++;
       continue;

diff --git a/native/utils/java/jni-helper.cc b/native/utils/java/jni-helper.cc
index de53bbe..d1677e4 100644
--- a/native/utils/java/jni-helper.cc
+++ b/native/utils/java/jni-helper.cc

@@ -139,6 +139,14 @@
   return result;
 }
 
+Status JniHelper::SetObjectArrayElement(JNIEnv* env, jobjectArray array,
+                                        jsize index, jobject val) {
+  TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
+  env->SetObjectArrayElement(array, index, val);
+  TC3_NO_EXCEPTION_OR_RETURN;
+  return Status::OK;
+}
+
 StatusOr<ScopedLocalRef<jobjectArray>> JniHelper::NewObjectArray(
     JNIEnv* env, jsize length, jclass element_class, jobject initial_element) {
   TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
@@ -149,6 +157,14 @@
   return result;
 }
 
+StatusOr<jsize> JniHelper::GetArrayLength(JNIEnv* env,
+                                          jarray jinput_fragments) {
+  TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
+  jsize result = env->GetArrayLength(jinput_fragments);
+  TC3_NO_EXCEPTION_OR_RETURN;
+  return result;
+}
+
 StatusOr<ScopedLocalRef<jstring>> JniHelper::NewStringUTF(JNIEnv* env,
                                                           const char* bytes) {
   TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;

diff --git a/native/utils/java/jni-helper.h b/native/utils/java/jni-helper.h
index aa26326..55d4696 100644
--- a/native/utils/java/jni-helper.h
+++ b/native/utils/java/jni-helper.h

@@ -100,6 +100,11 @@
   static StatusOr<ScopedLocalRef<jfloatArray>> NewFloatArray(JNIEnv* env,
                                                              jsize length);
 
+  static StatusOr<jsize> GetArrayLength(JNIEnv* env, jarray jinput_fragments);
+
+  static Status SetObjectArrayElement(JNIEnv* env, jobjectArray array,
+                                      jsize index, jobject val);
+
   // Call* methods.
   TC3_DEFINE_VARIADIC_SCOPED_LOCAL_REF_ENV_METHOD(CallObjectMethod, jobject,
                                                   jobject, TC3_JNI_NO_CHECK);

diff --git a/native/utils/lua-utils.cc b/native/utils/lua-utils.cc
index fa19923..d6fe2c4 100644
--- a/native/utils/lua-utils.cc
+++ b/native/utils/lua-utils.cc

@@ -223,6 +223,11 @@
 
 int LuaEnvironment::ReadFlatbuffer(const int index,
                                    ReflectiveFlatbuffer* buffer) const {
+  if (buffer == nullptr) {
+    TC3_LOG(ERROR) << "Called ReadFlatbuffer with null buffer: " << index;
+    lua_error(state_);
+    return LUA_ERRRUN;
+  }
   if (lua_type(state_, /*idx=*/index) != LUA_TTABLE) {
     TC3_LOG(ERROR) << "Expected table, got: "
                    << lua_type(state_, /*idx=*/kIndexStackTop);
@@ -278,48 +283,48 @@
         // Read repeated field.
         switch (field->type()->element()) {
           case reflection::Bool:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<bool>(field));
+            ReadRepeatedField<bool>(/*index=*/kIndexStackTop,
+                                    buffer->Repeated(field));
             break;
           case reflection::Byte:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<int8>(field));
+            ReadRepeatedField<int8>(/*index=*/kIndexStackTop,
+                                    buffer->Repeated(field));
             break;
           case reflection::UByte:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<uint8>(field));
+            ReadRepeatedField<uint8>(/*index=*/kIndexStackTop,
+                                     buffer->Repeated(field));
             break;
           case reflection::Int:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<int32>(field));
+            ReadRepeatedField<int32>(/*index=*/kIndexStackTop,
+                                     buffer->Repeated(field));
             break;
           case reflection::UInt:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<uint32>(field));
+            ReadRepeatedField<uint32>(/*index=*/kIndexStackTop,
+                                      buffer->Repeated(field));
             break;
           case reflection::Long:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<int64>(field));
+            ReadRepeatedField<int64>(/*index=*/kIndexStackTop,
+                                     buffer->Repeated(field));
             break;
           case reflection::ULong:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<uint64>(field));
+            ReadRepeatedField<uint64>(/*index=*/kIndexStackTop,
+                                      buffer->Repeated(field));
             break;
           case reflection::Float:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<float>(field));
+            ReadRepeatedField<float>(/*index=*/kIndexStackTop,
+                                     buffer->Repeated(field));
             break;
           case reflection::Double:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<double>(field));
+            ReadRepeatedField<double>(/*index=*/kIndexStackTop,
+                                      buffer->Repeated(field));
             break;
           case reflection::String:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<std::string>(field));
+            ReadRepeatedField<std::string>(/*index=*/kIndexStackTop,
+                                           buffer->Repeated(field));
             break;
           case reflection::Obj:
-            ReadRepeatedField(/*index=*/kIndexStackTop,
-                              buffer->Repeated<ReflectiveFlatbuffer>(field));
+            ReadRepeatedField<ReflectiveFlatbuffer>(/*index=*/kIndexStackTop,
+                                                    buffer->Repeated(field));
             break;
           default:
             TC3_LOG(ERROR) << "Unsupported repeated field type: "

diff --git a/native/utils/lua-utils.h b/native/utils/lua-utils.h
index f602aa0..b01471a 100644
--- a/native/utils/lua-utils.h
+++ b/native/utils/lua-utils.h

@@ -506,15 +506,15 @@
 
   // Reads a repeated field from lua.
   template <typename T>
-  void ReadRepeatedField(const int index, TypedRepeatedField<T>* result) const {
+  void ReadRepeatedField(const int index, RepeatedField* result) const {
     for (const auto& element : ReadVector<T>(index)) {
       result->Add(element);
     }
   }
 
   template <>
-  void ReadRepeatedField<ReflectiveFlatbuffer>(
-      const int index, TypedRepeatedField<ReflectiveFlatbuffer>* result) const {
+  void ReadRepeatedField<ReflectiveFlatbuffer>(const int index,
+                                               RepeatedField* result) const {
     lua_pushnil(state_);
     while (Next(index - 1)) {
       ReadFlatbuffer(index, result->Add());

diff --git a/native/utils/lua-utils_test.cc b/native/utils/lua-utils_test.cc
new file mode 100644
index 0000000..8c9f8de
--- /dev/null
+++ b/native/utils/lua-utils_test.cc

@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/lua-utils.h"
+
+#include <string>
+
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+using testing::Eq;
+using testing::FloatEq;
+
+std::string TestFlatbufferSchema() {
+  // Creates a test schema for flatbuffer passing tests.
+  // Cannot use the object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("float_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Float),
+          /*id=*/0,
+          /*offset=*/4),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("nested_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Obj,
+                                 /*element=*/reflection::None,
+                                 /*index=*/0 /* self */),
+          /*id=*/1,
+          /*offset=*/6),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("repeated_nested_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Vector,
+                                 /*element=*/reflection::Obj,
+                                 /*index=*/0 /* self */),
+          /*id=*/2,
+          /*offset=*/8),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("repeated_string_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Vector,
+                                 /*element=*/reflection::String),
+          /*id=*/3,
+          /*offset=*/10),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("string_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/4,
+          /*offset=*/12)};
+
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("TestData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+class LuaUtilsTest : public testing::Test, protected LuaEnvironment {
+ protected:
+  LuaUtilsTest()
+      : serialized_flatbuffer_schema_(TestFlatbufferSchema()),
+        schema_(flatbuffers::GetRoot<reflection::Schema>(
+            serialized_flatbuffer_schema_.data())),
+        flatbuffer_builder_(schema_) {
+    EXPECT_THAT(RunProtected([this] {
+                  LoadDefaultLibraries();
+                  return LUA_OK;
+                }),
+                Eq(LUA_OK));
+  }
+
+  void RunScript(StringPiece script) {
+    EXPECT_THAT(luaL_loadbuffer(state_, script.data(), script.size(),
+                                /*name=*/nullptr),
+                Eq(LUA_OK));
+    EXPECT_THAT(
+        lua_pcall(state_, /*nargs=*/0, /*num_results=*/1, /*errfunc=*/0),
+        Eq(LUA_OK));
+  }
+
+  const std::string serialized_flatbuffer_schema_;
+  const reflection::Schema* schema_;
+  ReflectiveFlatbufferBuilder flatbuffer_builder_;
+};
+
+TEST_F(LuaUtilsTest, HandlesVectors) {
+  {
+    PushVector(std::vector<int64>{1, 2, 3, 4, 5});
+    EXPECT_THAT(ReadVector<int64>(), ElementsAre(1, 2, 3, 4, 5));
+  }
+  {
+    PushVector(std::vector<std::string>{"hello", "there"});
+    EXPECT_THAT(ReadVector<std::string>(), ElementsAre("hello", "there"));
+  }
+  {
+    PushVector(std::vector<bool>{true, true, false});
+    EXPECT_THAT(ReadVector<bool>(), ElementsAre(true, true, false));
+  }
+}
+
+TEST_F(LuaUtilsTest, HandlesVectorIterators) {
+  {
+    const std::vector<int64> elements = {1, 2, 3, 4, 5};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<int64>(), ElementsAre(1, 2, 3, 4, 5));
+  }
+  {
+    const std::vector<std::string> elements = {"hello", "there"};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<std::string>(), ElementsAre("hello", "there"));
+  }
+  {
+    const std::vector<bool> elements = {true, true, false};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<bool>(), ElementsAre(true, true, false));
+  }
+}
+
+TEST_F(LuaUtilsTest, ReadsFlatbufferResults) {
+  // Setup.
+  RunScript(R"lua(
+    return {
+        float_field = 42.1,
+        string_field = "hello there",
+
+        -- Nested field.
+        nested_field = {
+          float_field = 64,
+          string_field = "hello nested",
+        },
+
+        -- Repeated fields.
+        repeated_string_field = { "a", "bold", "one" },
+        repeated_nested_field = {
+          { string_field = "a" },
+          { string_field = "b" },
+          { repeated_string_field = { "nested", "nested2" } },
+        },
+    }
+  )lua");
+
+  // Read the flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  ReadFlatbuffer(/*index=*/-1, buffer.get());
+  const std::string serialized_buffer = buffer->Serialize();
+
+  // Check fields. As we do not have flatbuffer compiled generated code for the
+  // ad hoc generated test schema, we have to read by manually using field
+  // offsets.
+  const flatbuffers::Table* flatbuffer_data =
+      flatbuffers::GetRoot<flatbuffers::Table>(serialized_buffer.data());
+  EXPECT_THAT(flatbuffer_data->GetField<float>(/*field=*/4, /*defaultval=*/0),
+              FloatEq(42.1));
+  EXPECT_THAT(
+      flatbuffer_data->GetPointer<const flatbuffers::String*>(/*field=*/12)
+          ->str(),
+      "hello there");
+
+  // Read the nested field.
+  const flatbuffers::Table* nested_field =
+      flatbuffer_data->GetPointer<const flatbuffers::Table*>(/*field=*/6);
+  EXPECT_THAT(nested_field->GetField<float>(/*field=*/4, /*defaultval=*/0),
+              FloatEq(64));
+  EXPECT_THAT(
+      nested_field->GetPointer<const flatbuffers::String*>(/*field=*/12)->str(),
+      "hello nested");
+
+  // Read the repeated string field.
+  auto repeated_strings = flatbuffer_data->GetPointer<
+      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>*>(
+      /*field=*/10);
+  EXPECT_THAT(repeated_strings->size(), Eq(3));
+  EXPECT_THAT(repeated_strings->GetAsString(0)->str(), Eq("a"));
+  EXPECT_THAT(repeated_strings->GetAsString(1)->str(), Eq("bold"));
+  EXPECT_THAT(repeated_strings->GetAsString(2)->str(), Eq("one"));
+
+  // Read the repeated nested field.
+  auto repeated_nested_fields = flatbuffer_data->GetPointer<
+      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::Table>>*>(
+      /*field=*/8);
+  EXPECT_THAT(repeated_nested_fields->size(), Eq(3));
+  EXPECT_THAT(repeated_nested_fields->Get(0)
+                  ->GetPointer<const flatbuffers::String*>(/*field=*/12)
+                  ->str(),
+              "a");
+  EXPECT_THAT(repeated_nested_fields->Get(1)
+                  ->GetPointer<const flatbuffers::String*>(/*field=*/12)
+                  ->str(),
+              "b");
+}
+
+TEST_F(LuaUtilsTest, HandlesSimpleFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  buffer->Set("float_field", 42.f);
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  // Setup.
+  RunScript(R"lua(
+    return arg.float_field
+  )lua");
+
+  EXPECT_THAT(Read<float>(), FloatEq(42));
+}
+
+TEST_F(LuaUtilsTest, HandlesRepeatedFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  RepeatedField* repeated_field = buffer->Repeated("repeated_string_field");
+  repeated_field->Add("this");
+  repeated_field->Add("is");
+  repeated_field->Add("a");
+  repeated_field->Add("test");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  // Return flatbuffer repeated field as vector.
+  RunScript(R"lua(
+    return arg.repeated_string_field
+  )lua");
+
+  EXPECT_THAT(ReadVector<std::string>(),
+              ElementsAre("this", "is", "a", "test"));
+}
+
+TEST_F(LuaUtilsTest, HandlesRepeatedNestedFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  RepeatedField* repeated_field = buffer->Repeated("repeated_nested_field");
+  repeated_field->Add()->Set("string_field", "hello");
+  repeated_field->Add()->Set("string_field", "my");
+  ReflectiveFlatbuffer* nested = repeated_field->Add();
+  nested->Set("string_field", "old");
+  RepeatedField* nested_repeated = nested->Repeated("repeated_string_field");
+  nested_repeated->Add("friend");
+  nested_repeated->Add("how");
+  nested_repeated->Add("are");
+  repeated_field->Add()->Set("string_field", "you?");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  RunScript(R"lua(
+    result = {}
+    for _, nested in pairs(arg.repeated_nested_field) do
+      result[#result + 1] = nested.string_field
+      for _, nested_string in pairs(nested.repeated_string_field) do
+        result[#result + 1] = nested_string
+      end
+    end
+    return result
+  )lua");
+
+  EXPECT_THAT(
+      ReadVector<std::string>(),
+      ElementsAre("hello", "my", "old", "friend", "how", "are", "you?"));
+}
+
+TEST_F(LuaUtilsTest, CorrectlyReadsTwoFlatbuffersSimultaneously) {
+  // The first flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  buffer->Set("string_field", "first");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+  // The second flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer2 = flatbuffer_builder_.NewRoot();
+  buffer2->Set("string_field", "second");
+  const std::string serialized_buffer2 = buffer2->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer2.data()));
+  lua_setglobal(state_, "arg2");
+
+  RunScript(R"lua(
+    return {arg.string_field, arg2.string_field}
+  )lua");
+
+  EXPECT_THAT(ReadVector<std::string>(), ElementsAre("first", "second"));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/math/fastexp.h b/native/utils/math/fastexp.h
index f690c73..8128627 100644
--- a/native/utils/math/fastexp.h
+++ b/native/utils/math/fastexp.h

@@ -31,9 +31,9 @@
 
 class FastMathClass {
  private:
-  static const int kBits = 7;
-  static const int kMask1 = (1 << kBits) - 1;
-  static const int kMask2 = 0xFF << kBits;
+  static constexpr int kBits = 7;
+  static constexpr int kMask1 = (1 << kBits) - 1;
+  static constexpr int kMask2 = 0xFF << kBits;
   static constexpr float kLogBase2OfE = 1.44269504088896340736f;
 
   struct Table {

diff --git a/native/utils/normalization.cc b/native/utils/normalization.cc
index fd64dbb..f9623f7 100644
--- a/native/utils/normalization.cc
+++ b/native/utils/normalization.cc

@@ -21,14 +21,14 @@
 
 namespace libtextclassifier3 {
 
-UnicodeText NormalizeText(const UniLib* unilib,
+UnicodeText NormalizeText(const UniLib& unilib,
                           const NormalizationOptions* normalization_options,
                           const UnicodeText& text) {
   return NormalizeTextCodepointWise(
       unilib, normalization_options->codepointwise_normalization(), text);
 }
 
-UnicodeText NormalizeTextCodepointWise(const UniLib* unilib,
+UnicodeText NormalizeTextCodepointWise(const UniLib& unilib,
                                        const uint32 codepointwise_ops,
                                        const UnicodeText& text) {
   // Sanity check.
@@ -42,7 +42,7 @@
     // Skip whitespace.
     if ((codepointwise_ops &
          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE) &&
-        unilib->IsWhitespace(codepoint)) {
+        unilib.IsWhitespace(codepoint)) {
       continue;
     }
 
@@ -50,7 +50,7 @@
     if ((codepointwise_ops &
          NormalizationOptions_::
              CodepointwiseNormalizationOp_DROP_PUNCTUATION) &&
-        unilib->IsPunctuation(codepoint)) {
+        unilib.IsPunctuation(codepoint)) {
       continue;
     }
 
@@ -59,12 +59,12 @@
     // Lower case.
     if (codepointwise_ops &
         NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE) {
-      normalized_codepoint = unilib->ToLower(normalized_codepoint);
+      normalized_codepoint = unilib.ToLower(normalized_codepoint);
 
       // Upper case.
     } else if (codepointwise_ops &
                NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE) {
-      normalized_codepoint = unilib->ToUpper(normalized_codepoint);
+      normalized_codepoint = unilib.ToUpper(normalized_codepoint);
     }
 
     result.push_back(normalized_codepoint);

diff --git a/native/utils/normalization.h b/native/utils/normalization.h
index 0ded163..ff00783 100644
--- a/native/utils/normalization.h
+++ b/native/utils/normalization.h

@@ -27,14 +27,14 @@
 namespace libtextclassifier3 {
 
 // Normalizes a text according to the options.
-UnicodeText NormalizeText(const UniLib* unilib,
+UnicodeText NormalizeText(const UniLib& unilib,
                           const NormalizationOptions* normalization_options,
                           const UnicodeText& text);
 
 // Normalizes a text codepoint wise by applying each codepoint wise op in
 // `codepointwise_ops` that is interpreted as a set of
 // `CodepointwiseNormalizationOp`.
-UnicodeText NormalizeTextCodepointWise(const UniLib* unilib,
+UnicodeText NormalizeTextCodepointWise(const UniLib& unilib,
                                        const uint32 codepointwise_ops,
                                        const UnicodeText& text);
 

diff --git a/native/utils/normalization_test.cc b/native/utils/normalization_test.cc
new file mode 100644
index 0000000..1f731c7
--- /dev/null
+++ b/native/utils/normalization_test.cc

@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/normalization.h"
+
+#include <string>
+
+#include "utils/base/integral_types.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::Eq;
+
+class NormalizationTest : public testing::Test {
+ protected:
+  NormalizationTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+
+  std::string NormalizeTextCodepointWise(const std::string& text,
+                                         const int32 codepointwise_ops) {
+    return libtextclassifier3::NormalizeTextCodepointWise(
+               unilib_, codepointwise_ops,
+               UTF8ToUnicodeText(text, /*do_copy=*/false))
+        .ToUTF8String();
+  }
+
+  UniLib unilib_;
+};
+
+TEST_F(NormalizationTest, ReturnsIdenticalStringWhenNoNormalization) {
+  EXPECT_THAT(NormalizeTextCodepointWise(
+                  "Never gonna let you down.",
+                  NormalizationOptions_::CodepointwiseNormalizationOp_NONE),
+              Eq("Never gonna let you down."));
+}
+
+#if !defined(TC3_UNILIB_DUMMY)
+TEST_F(NormalizationTest, DropsWhitespace) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never gonna let you down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never\tgonna\t\tlet\tyou\tdown.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never\u2003gonna\u2003let\u2003you\u2003down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+}
+
+TEST_F(NormalizationTest, DropsPunctuation) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never gonna let you down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("Never gonna let you down"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("αʹ Σημεῖόν ἐστιν οὗ μέρος οὐθέν"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "978—3—16—148410—0",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("9783161484100"));
+}
+
+TEST_F(NormalizationTest, LowercasesUnicodeText) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE),
+      Eq("αʹ. σημεῖόν ἐστιν, οὗ μέρος οὐθέν."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE |
+              NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE),
+      Eq("αʹ.σημεῖόνἐστιν,οὗμέροςοὐθέν."));
+}
+
+TEST_F(NormalizationTest, UppercasesUnicodeText) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Κανένας άνθρωπος δεν ξέρει",
+          NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE),
+      Eq("ΚΑΝΈΝΑΣ ΆΝΘΡΩΠΟΣ ΔΕΝ ΞΈΡΕΙ"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Κανένας άνθρωπος δεν ξέρει",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE |
+              NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE),
+      Eq("ΚΑΝΈΝΑΣΆΝΘΡΩΠΟΣΔΕΝΞΈΡΕΙ"));
+}
+#endif
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/regex-match_test.cc b/native/utils/regex-match_test.cc
new file mode 100644
index 0000000..c45fb29
--- /dev/null
+++ b/native/utils/regex-match_test.cc

@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/regex-match.h"
+
+#include <memory>
+
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class RegexMatchTest : public testing::Test {
+ protected:
+  RegexMatchTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+#ifdef TC3_UNILIB_ICU
+#ifndef TC3_DISABLE_LUA
+TEST_F(RegexMatchTest, HandlesSimpleVerification) {
+  EXPECT_TRUE(VerifyMatch(/*context=*/"", /*matcher=*/nullptr, "return true;"));
+}
+#endif  // TC3_DISABLE_LUA
+
+#ifndef TC3_DISABLE_LUA
+TEST_F(RegexMatchTest, HandlesCustomVerification) {
+  UnicodeText pattern = UTF8ToUnicodeText("(\\d{16})",
+                                          /*do_copy=*/true);
+  UnicodeText message = UTF8ToUnicodeText("cc: 4012888888881881",
+                                          /*do_copy=*/true);
+  const std::string verifier = R"(
+function luhn(candidate)
+    local sum = 0
+    local num_digits = string.len(candidate)
+    local parity = num_digits % 2
+    for pos = 1,num_digits do
+      d = tonumber(string.sub(candidate, pos, pos))
+      if pos % 2 ~= parity then
+        d = d * 2
+      end
+      if d > 9 then
+        d = d - 9
+      end
+      sum = sum + d
+    end
+    return (sum % 10) == 0
+end
+return luhn(match[1].text);
+  )";
+  const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+      unilib_.CreateRegexPattern(pattern);
+  ASSERT_TRUE(regex_pattern != nullptr);
+  const std::unique_ptr<UniLib::RegexMatcher> matcher =
+      regex_pattern->Matcher(message);
+  ASSERT_TRUE(matcher != nullptr);
+  int status = UniLib::RegexMatcher::kNoError;
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+
+  EXPECT_TRUE(VerifyMatch(message.ToUTF8String(), matcher.get(), verifier));
+}
+#endif  // TC3_DISABLE_LUA
+
+TEST_F(RegexMatchTest, RetrievesMatchGroupTest) {
+  UnicodeText pattern =
+      UTF8ToUnicodeText("never gonna (?:give (you) up|let (you) down)",
+                        /*do_copy=*/true);
+  const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+      unilib_.CreateRegexPattern(pattern);
+  ASSERT_TRUE(regex_pattern != nullptr);
+  UnicodeText message =
+      UTF8ToUnicodeText("never gonna give you up - never gonna let you down");
+  const std::unique_ptr<UniLib::RegexMatcher> matcher =
+      regex_pattern->Matcher(message);
+  ASSERT_TRUE(matcher != nullptr);
+  int status = UniLib::RegexMatcher::kNoError;
+
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
+              testing::Eq("never gonna give you up"));
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 1).value(),
+              testing::Eq("you"));
+  EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 2).has_value());
+
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
+              testing::Eq("never gonna let you down"));
+  EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 1).has_value());
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 2).value(),
+              testing::Eq("you"));
+}
+#endif
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/resources_test.cc b/native/utils/resources_test.cc
new file mode 100644
index 0000000..c385f39
--- /dev/null
+++ b/native/utils/resources_test.cc

@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/resources.h"
+#include "utils/i18n/locale.h"
+#include "utils/resources_generated.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class ResourcesTest
+    : public testing::TestWithParam<testing::tuple<bool, bool>> {
+ protected:
+  ResourcesTest() {}
+
+  std::string BuildTestResources(bool add_default_language = true) const {
+    ResourcePoolT test_resources;
+
+    // Test locales.
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "en";
+    test_resources.locale.back()->region = "US";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "en";
+    test_resources.locale.back()->region = "GB";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "de";
+    test_resources.locale.back()->region = "DE";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "fr";
+    test_resources.locale.back()->region = "FR";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "pt";
+    test_resources.locale.back()->region = "PT";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "pt";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "zh";
+    test_resources.locale.back()->script = "Hans";
+    test_resources.locale.back()->region = "CN";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "zh";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "fr";
+    test_resources.locale.back()->language = "fr-CA";
+    if (add_default_language) {
+      test_resources.locale.emplace_back(new LanguageTagT);  // default
+    }
+
+    // Test entries.
+    test_resources.resource_entry.emplace_back(new ResourceEntryT);
+    test_resources.resource_entry.back()->name = /*resource_name=*/"A";
+
+    // en-US, default
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "localize";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(0);
+    if (add_default_language) {
+      test_resources.resource_entry.back()->resource.back()->locale.push_back(
+          9);
+    }
+
+    // en-GB
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "localise";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(1);
+
+    // de-DE
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "lokalisieren";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(2);
+
+    // fr-FR, fr-CA
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "localiser";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(3);
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(8);
+
+    // pt-PT
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "localizar";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(4);
+
+    // pt
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "concentrar";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(5);
+
+    // zh-Hans-CN
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "龙";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(6);
+
+    // zh
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "龍";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(7);
+
+    if (compress()) {
+      EXPECT_TRUE(CompressResources(
+          &test_resources,
+          /*build_compression_dictionary=*/build_dictionary()));
+    }
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(ResourcePool::Pack(builder, &test_resources));
+
+    return std::string(
+        reinterpret_cast<const char*>(builder.GetBufferPointer()),
+        builder.GetSize());
+  }
+
+  bool compress() const { return testing::get<0>(GetParam()); }
+
+  bool build_dictionary() const { return testing::get<1>(GetParam()); }
+};
+
+INSTANTIATE_TEST_SUITE_P(Compression, ResourcesTest,
+                         testing::Combine(testing::Bool(), testing::Bool()));
+
+TEST_P(ResourcesTest, CorrectlyHandlesExactMatch) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-US")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-GB")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localise", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt-PT")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localizar", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("fr-CA")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+}
+
+TEST_P(ResourcesTest, CorrectlyHandlesTie) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  // Uses first best match in case of a tie.
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-CA")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, RequiresLanguageMatch) {
+  {
+    std::string test_resources =
+        BuildTestResources(/*add_default_language=*/false);
+    Resources resources(
+        flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+    EXPECT_FALSE(resources.GetResourceContent({Locale::FromBCP47("es-US")},
+                                              /*resource_name=*/"A",
+                                              /*result=*/nullptr));
+  }
+  {
+    std::string test_resources =
+        BuildTestResources(/*add_default_language=*/true);
+    Resources resources(
+        flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+    std::string content;
+    EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("es-US")},
+                                             /*resource_name=*/"A",
+                                             /*result=*/&content));
+    EXPECT_EQ("localize", content);
+  }
+}
+
+TEST_P(ResourcesTest, HandlesFallback) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("fr-CH")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans-ZZ")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+
+  // Fallback to default, en-US.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("ru")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, HandlesFallbackMultipleLocales) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+
+  // Still use inexact match with primary locale if language matches,
+  // even though secondary locale would match exactly.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("fr-CH"), Locale::FromBCP47("en-US")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+
+  // Use secondary language instead of default fallback if that is an exact
+  // language match.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("de")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("lokalisieren", content);
+
+  // Use tertiary language.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("it-IT"),
+       Locale::FromBCP47("de")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("lokalisieren", content);
+
+  // Default fallback if no locale matches.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("it-IT"),
+       Locale::FromBCP47("es")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, PreferGenericCallback) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt-BR")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("concentrar", content);  // Falls back to pt, not pt-PT.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hant")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hant-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+}
+
+TEST_P(ResourcesTest, PreferGenericWhenGeneric) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt")},
+                                           /*resource_name=*/"A", &content));
+
+  // Uses pt, not pt-PT.
+  EXPECT_EQ("concentrar", content);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/encoder_test.cc b/native/utils/sentencepiece/encoder_test.cc
new file mode 100644
index 0000000..740db35
--- /dev/null
+++ b/native/utils/sentencepiece/encoder_test.cc

@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/encoder.h"
+
+#include <memory>
+#include <vector>
+
+#include "utils/base/integral_types.h"
+#include "utils/container/sorted-strings-table.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+
+TEST(EncoderTest, SimpleTokenization) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores);
+
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellothere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 5, 1));
+  }
+
+  // Make probability of hello very low:
+  // hello gets now tokenized as hell + o.
+  scores[1] = -100.0;
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellothere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 2, 4, 5, 1));
+  }
+}
+
+TEST(EncoderTest, HandlesEdgeCases) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores);
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellhello", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 2, 3, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellohell", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 2, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellathere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+}
+
+TEST(EncoderTest, HandlesOutOfDictionary) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores,
+                        /*start_code=*/0, /*end_code=*/1,
+                        /*encoding_offset=*/3, /*unknown_code=*/2,
+                        /*unknown_score=*/-100.0);
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellhello", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 4, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellohell", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 4, 3, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellathere", &encoded_text));
+    EXPECT_THAT(encoded_text,
+                ElementsAre(0, /*hell*/ 3, /*unknown*/ 2, /*there*/ 6, 1));
+  }
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/test_utils.cc b/native/utils/sentencepiece/test_utils.cc
deleted file mode 100644
index f277a14..0000000
--- a/native/utils/sentencepiece/test_utils.cc
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/sentencepiece/test_utils.h"
-
-#include <memory>
-
-#include "utils/base/integral_types.h"
-#include "utils/container/double-array-trie.h"
-#include "utils/strings/stringpiece.h"
-
-namespace libtextclassifier3 {
-
-SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
-                                           bool add_dummy_prefix,
-                                           bool remove_extra_whitespaces,
-                                           bool escape_whitespaces) {
-  const uint32 trie_blob_size = reinterpret_cast<const uint32*>(spec.data())[0];
-  spec.RemovePrefix(sizeof(trie_blob_size));
-  const TrieNode* trie_blob = reinterpret_cast<const TrieNode*>(spec.data());
-  spec.RemovePrefix(trie_blob_size);
-  const int num_nodes = trie_blob_size / sizeof(TrieNode);
-  return SentencePieceNormalizer(
-      DoubleArrayTrie(trie_blob, num_nodes),
-      /*charsmap_normalized=*/StringPiece(spec.data(), spec.size()),
-      add_dummy_prefix, remove_extra_whitespaces, escape_whitespaces);
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/test_utils.h b/native/utils/sentencepiece/test_utils.h
deleted file mode 100644
index 0c833da..0000000
--- a/native/utils/sentencepiece/test_utils.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
-#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
-
-#include <string>
-#include <vector>
-
-#include "utils/sentencepiece/normalizer.h"
-#include "utils/strings/stringpiece.h"
-
-namespace libtextclassifier3 {
-
-SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
-                                           bool add_dummy_prefix,
-                                           bool remove_extra_whitespaces,
-                                           bool escape_whitespaces);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_

diff --git a/native/utils/strings/append_test.cc b/native/utils/strings/append_test.cc
new file mode 100644
index 0000000..8950761
--- /dev/null
+++ b/native/utils/strings/append_test.cc

@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/append.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace strings {
+
+TEST(StringUtilTest, SStringAppendF) {
+  std::string str;
+  SStringAppendF(&str, 5, "%d %d", 0, 1);
+  EXPECT_EQ(str, "0 1");
+
+  SStringAppendF(&str, 1, "%d", 9);
+  EXPECT_EQ(str, "0 19");
+
+  SStringAppendF(&str, 1, "%d", 10);
+  EXPECT_EQ(str, "0 191");
+
+  str.clear();
+
+  SStringAppendF(&str, 5, "%d", 100);
+  EXPECT_EQ(str, "100");
+}
+
+TEST(StringUtilTest, SStringAppendFBufCalc) {
+  std::string str;
+  SStringAppendF(&str, 0, "%d %s %d", 1, "hello", 2);
+  EXPECT_EQ(str, "1 hello 2");
+}
+
+TEST(StringUtilTest, JoinStrings) {
+  std::vector<std::string> vec;
+  vec.push_back("1");
+  vec.push_back("2");
+  vec.push_back("3");
+
+  EXPECT_EQ("1,2,3", JoinStrings(",", vec));
+  EXPECT_EQ("123", JoinStrings("", vec));
+  EXPECT_EQ("1, 2, 3", JoinStrings(", ", vec));
+  EXPECT_EQ("", JoinStrings(",", std::vector<std::string>()));
+}
+
+}  // namespace strings
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/numbers_test.cc b/native/utils/strings/numbers_test.cc
new file mode 100644
index 0000000..bf2f84a
--- /dev/null
+++ b/native/utils/strings/numbers_test.cc

@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/numbers.h"
+
+#include "utils/base/integral_types.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+void TestParseInt32(const char *c_str, bool expected_parsing_success,
+                    int32 expected_parsed_value = 0) {
+  int32 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt32(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt32Test, Normal) {
+  TestParseInt32("2", true, 2);
+  TestParseInt32("-357", true, -357);
+  TestParseInt32("7", true, 7);
+  TestParseInt32("+7", true, 7);
+  TestParseInt32("  +7", true, 7);
+  TestParseInt32("-23", true, -23);
+  TestParseInt32("  -23", true, -23);
+  TestParseInt32("04", true, 4);
+  TestParseInt32("07", true, 7);
+  TestParseInt32("08", true, 8);
+  TestParseInt32("09", true, 9);
+}
+
+TEST(ParseInt32Test, ErrorCases) {
+  TestParseInt32("", false);
+  TestParseInt32("  ", false);
+  TestParseInt32("not-a-number", false);
+  TestParseInt32("123a", false);
+}
+
+void TestParseInt64(const char *c_str, bool expected_parsing_success,
+                    int64 expected_parsed_value = 0) {
+  int64 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt64(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt64Test, Normal) {
+  TestParseInt64("2", true, 2);
+  TestParseInt64("-357", true, -357);
+  TestParseInt64("7", true, 7);
+  TestParseInt64("+7", true, 7);
+  TestParseInt64("  +7", true, 7);
+  TestParseInt64("-23", true, -23);
+  TestParseInt64("  -23", true, -23);
+  TestParseInt64("07", true, 7);
+  TestParseInt64("08", true, 8);
+}
+
+TEST(ParseInt64Test, ErrorCases) {
+  TestParseInt64("", false);
+  TestParseInt64("  ", false);
+  TestParseInt64("not-a-number", false);
+  TestParseInt64("23z", false);
+}
+
+void TestParseDouble(const char *c_str, bool expected_parsing_success,
+                     double expected_parsed_value = 0.0) {
+  double parsed_value = 0.0;
+  EXPECT_EQ(expected_parsing_success, ParseDouble(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_NEAR(expected_parsed_value, parsed_value, 0.00001);
+  }
+}
+
+TEST(ParseDoubleTest, Normal) {
+  TestParseDouble("2", true, 2.0);
+  TestParseDouble("-357.023", true, -357.023);
+  TestParseDouble("7.04", true, 7.04);
+  TestParseDouble("+7.2", true, 7.2);
+  TestParseDouble("  +7.236", true, 7.236);
+  TestParseDouble("-23.4", true, -23.4);
+  TestParseDouble("  -23.4", true, -23.4);
+}
+
+TEST(ParseDoubleTest, ErrorCases) {
+  TestParseDouble("", false);
+  TestParseDouble("  ", false);
+  TestParseDouble("not-a-number", false);
+  TestParseDouble("23.5a", false);
+}
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/split.h b/native/utils/strings/split.h
index b565258..98d066e 100644
--- a/native/utils/strings/split.h
+++ b/native/utils/strings/split.h

@@ -26,6 +26,9 @@
 namespace strings {
 
 std::vector<StringPiece> Split(const StringPiece &text, char delim);
+// Delete overload that takes r-value string, to avoid common pitfalls like:
+//   Split(GetSomeTransientString())
+std::vector<StringPiece> Split(const std::string &&text, char delim) = delete;
 
 }  // namespace strings
 }  // namespace libtextclassifier3

diff --git a/native/utils/strings/stringpiece_test.cc b/native/utils/strings/stringpiece_test.cc
new file mode 100644
index 0000000..64808d3
--- /dev/null
+++ b/native/utils/strings/stringpiece_test.cc

@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StringPieceTest, EndsWith) {
+  EXPECT_TRUE(EndsWith("hello there!", "there!"));
+  EXPECT_TRUE(EndsWith("hello there!", "!"));
+  EXPECT_FALSE(EndsWith("hello there!", "there"));
+  EXPECT_FALSE(EndsWith("hello there!", " hello there!"));
+  EXPECT_TRUE(EndsWith("hello there!", ""));
+  EXPECT_FALSE(EndsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, StartsWith) {
+  EXPECT_TRUE(StartsWith("hello there!", "hello"));
+  EXPECT_TRUE(StartsWith("hello there!", "hello "));
+  EXPECT_FALSE(StartsWith("hello there!", "there!"));
+  EXPECT_FALSE(StartsWith("hello there!", " hello there! "));
+  EXPECT_TRUE(StartsWith("hello there!", ""));
+  EXPECT_FALSE(StartsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, ConsumePrefix) {
+  StringPiece str("hello there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "hello "));
+  EXPECT_EQ(str.ToString(), "there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "there"));
+  EXPECT_EQ(str.ToString(), "!");
+  EXPECT_FALSE(ConsumePrefix(&str, "!!"));
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_TRUE(ConsumePrefix(&str, "!"));
+  EXPECT_EQ(str.ToString(), "");
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_FALSE(ConsumePrefix(&str, "!"));
+}
+
+TEST(StringPieceTest, ConsumeSuffix) {
+  StringPiece str("hello there!");
+  EXPECT_TRUE(ConsumeSuffix(&str, "!"));
+  EXPECT_EQ(str.ToString(), "hello there");
+  EXPECT_TRUE(ConsumeSuffix(&str, " there"));
+  EXPECT_EQ(str.ToString(), "hello");
+  EXPECT_FALSE(ConsumeSuffix(&str, "!!"));
+  EXPECT_TRUE(ConsumeSuffix(&str, ""));
+  EXPECT_TRUE(ConsumeSuffix(&str, "hello"));
+  EXPECT_EQ(str.ToString(), "");
+  EXPECT_TRUE(ConsumeSuffix(&str, ""));
+  EXPECT_FALSE(ConsumeSuffix(&str, "!"));
+}
+
+TEST(StringPieceTest, Find) {
+  StringPiece str("<hello there!>");
+  EXPECT_EQ(str.find('<'), 0);
+  EXPECT_EQ(str.find('>'), str.length() - 1);
+  EXPECT_EQ(str.find('?'), StringPiece::npos);
+  EXPECT_EQ(str.find('<', str.length() - 1), StringPiece::npos);
+  EXPECT_EQ(str.find('<', 0), 0);
+  EXPECT_EQ(str.find('>', str.length() - 1), str.length() - 1);
+}
+
+TEST(StringPieceTest, FindStringPiece) {
+  StringPiece str("<foo bar baz!>");
+  EXPECT_EQ(str.find("foo"), 1);
+  EXPECT_EQ(str.find("bar"), 5);
+  EXPECT_EQ(str.find("baz"), 9);
+  EXPECT_EQ(str.find("qux"), StringPiece::npos);
+  EXPECT_EQ(str.find("?"), StringPiece::npos);
+  EXPECT_EQ(str.find(">"), str.length() - 1);
+  EXPECT_EQ(str.find("<", str.length() - 1), StringPiece::npos);
+  EXPECT_EQ(str.find("<", 0), 0);
+  EXPECT_EQ(str.find(">", str.length() - 1), str.length() - 1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/substitute_test.cc b/native/utils/strings/substitute_test.cc
new file mode 100644
index 0000000..94b37ab
--- /dev/null
+++ b/native/utils/strings/substitute_test.cc

@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/substitute.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(SubstituteTest, Substitute) {
+  EXPECT_EQ("Hello, world!",
+            strings::Substitute("$0, $1!", {"Hello", "world"}));
+
+  // Out of order.
+  EXPECT_EQ("world, Hello!",
+            strings::Substitute("$1, $0!", {"Hello", "world"}));
+  EXPECT_EQ("b, a, c, b",
+            strings::Substitute("$1, $0, $2, $1", {"a", "b", "c"}));
+
+  // Literal $
+  EXPECT_EQ("$", strings::Substitute("$$", {}));
+  EXPECT_EQ("$1", strings::Substitute("$$1", {}));
+
+  const char* null_cstring = nullptr;
+  EXPECT_EQ("Text: ''", strings::Substitute("Text: '$0'", {null_cstring}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/utf8.h b/native/utils/strings/utf8.h
index bebdaaa..e871731 100644
--- a/native/utils/strings/utf8.h
+++ b/native/utils/strings/utf8.h

@@ -23,22 +23,14 @@
 
 // Returns the length (number of bytes) of the Unicode code point starting at
 // src, based on inspecting just that one byte.  Preconditions: src != NULL,
-// *src can be read, and *src is not '\0', and src points to a well-formed UTF-8
-// std::string.
-static inline int GetNumBytesForNonZeroUTF8Char(const char *src) {
+// *src can be read.
+static inline int GetNumBytesForUTF8Char(const char *src) {
   // On most platforms, char is unsigned by default, but iOS is an exception.
   // The cast below makes sure we always interpret *src as an unsigned char.
   return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
       [(*(reinterpret_cast<const unsigned char *>(src)) & 0xFF) >> 4];
 }
 
-// Like GetNumBytesForNonZeroUTF8Char, but *src may be '\0'; returns 0 in that
-// case.
-static inline int GetNumBytesForUTF8Char(const char *src) {
-  if (*src == '\0') return 0;
-  return GetNumBytesForNonZeroUTF8Char(src);
-}
-
 // Returns true if this byte is a trailing UTF-8 byte (10xx xxxx)
 static inline bool IsTrailByte(char x) {
   // return (x & 0xC0) == 0x80;

diff --git a/native/utils/strings/utf8_test.cc b/native/utils/strings/utf8_test.cc
new file mode 100644
index 0000000..28d971b
--- /dev/null
+++ b/native/utils/strings/utf8_test.cc

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/utf8.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(Utf8Test, ComputesUtf8LengthOfUnicodeCharacters) {
+  EXPECT_EQ(GetNumBytesForUTF8Char("\x00"), 1);
+  EXPECT_EQ(GetNumBytesForUTF8Char("h"), 1);
+  EXPECT_EQ(GetNumBytesForUTF8Char("😋"), 4);
+  EXPECT_EQ(GetNumBytesForUTF8Char("㍿"), 3);
+}
+
+TEST(Utf8Test, IsValidUTF8) {
+  EXPECT_TRUE(IsValidUTF8("1234😋hello", 13));
+  EXPECT_TRUE(IsValidUTF8("\u304A\u00B0\u106B", 8));
+  EXPECT_TRUE(IsValidUTF8("this is a test😋😋😋", 26));
+  EXPECT_TRUE(IsValidUTF8("\xf0\x9f\x98\x8b", 4));
+  // Too short (string is too short).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f", 2));
+  // Too long (too many trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x8b\x8b", 5));
+  // Too short (too few trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x61\x61", 5));
+}
+
+TEST(Utf8Test, ValidUTF8CharLength) {
+  EXPECT_EQ(ValidUTF8CharLength("1234😋hello", 13), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\u304A\u00B0\u106B", 8), 3);
+  EXPECT_EQ(ValidUTF8CharLength("this is a test😋😋😋", 26), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b", 4), 4);
+  // Too short (string is too short).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f", 2), -1);
+  // Too long (too many trailing bytes). First character is valid.
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b\x8b", 5), 4);
+  // Too short (too few trailing bytes).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x61\x61", 5), -1);
+}
+
+TEST(Utf8Test, CorrectlyTruncatesStrings) {
+  EXPECT_EQ(SafeTruncateLength("FooBar", 3), 3);
+  EXPECT_EQ(SafeTruncateLength("früh", 3), 2);
+  EXPECT_EQ(SafeTruncateLength("مَمِمّمَّمِّ", 5), 4);
+}
+
+TEST(Utf8Test, CorrectlyConvertsFromUtf8) {
+  EXPECT_EQ(ValidCharToRune("a"), 97);
+  EXPECT_EQ(ValidCharToRune("\0"), 0);
+  EXPECT_EQ(ValidCharToRune("\u304A"), 0x304a);
+  EXPECT_EQ(ValidCharToRune("\xe3\x81\x8a"), 0x304a);
+}
+
+TEST(Utf8Test, CorrectlyConvertsToUtf8) {
+  char utf8_encoding[4];
+  EXPECT_EQ(ValidRuneToChar(97, utf8_encoding), 1);
+  EXPECT_EQ(ValidRuneToChar(0, utf8_encoding), 1);
+  EXPECT_EQ(ValidRuneToChar(0x304a, utf8_encoding), 3);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/tensor-view_test.cc b/native/utils/tensor-view_test.cc
new file mode 100644
index 0000000..9467264
--- /dev/null
+++ b/native/utils/tensor-view_test.cc

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tensor-view.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(TensorViewTest, TestSize) {
+  std::vector<float> data{0.1, 0.2, 0.3, 0.4, 0.5, 0.6};
+  const TensorView<float> tensor(data.data(), {3, 1, 2});
+  EXPECT_TRUE(tensor.is_valid());
+  EXPECT_EQ(tensor.shape(), (std::vector<int>{3, 1, 2}));
+  EXPECT_EQ(tensor.data(), data.data());
+  EXPECT_EQ(tensor.size(), 6);
+  EXPECT_EQ(tensor.dims(), 3);
+  EXPECT_EQ(tensor.dim(0), 3);
+  EXPECT_EQ(tensor.dim(1), 1);
+  EXPECT_EQ(tensor.dim(2), 2);
+  std::vector<float> output_data(6);
+  EXPECT_TRUE(tensor.copy_to(output_data.data(), output_data.size()));
+  EXPECT_EQ(data, output_data);
+
+  // Should not copy when the output is small.
+  std::vector<float> small_output_data{-1, -1, -1};
+  EXPECT_FALSE(
+      tensor.copy_to(small_output_data.data(), small_output_data.size()));
+  // The output buffer should not be changed.
+  EXPECT_EQ(small_output_data, (std::vector<float>{-1, -1, -1}));
+
+  const TensorView<float> invalid_tensor = TensorView<float>::Invalid();
+  EXPECT_FALSE(invalid_tensor.is_valid());
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/test-utils.cc b/native/utils/test-utils.cc
new file mode 100644
index 0000000..8996a4a
--- /dev/null
+++ b/native/utils/test-utils.cc

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/test-utils.h"
+
+#include <iterator>
+
+#include "utils/codepoint-range.h"
+#include "utils/strings/utf8.h"
+#include "utils/utf8/unicodetext.h"
+
+namespace libtextclassifier3 {
+
+using libtextclassifier3::Token;
+
+std::vector<Token> TokenizeOnSpace(const std::string& text) {
+  return TokenizeOnDelimiters(text, {' '});
+}
+
+std::vector<Token> TokenizeOnDelimiters(
+    const std::string& text, const std::unordered_set<char32>& delimiters) {
+  const UnicodeText unicode_text = UTF8ToUnicodeText(text, /*do_copy=*/false);
+
+  std::vector<Token> result;
+
+  int token_start_codepoint = 0;
+  auto token_start_it = unicode_text.begin();
+  int codepoint_idx = 0;
+
+  UnicodeText::const_iterator it;
+  for (it = unicode_text.begin(); it < unicode_text.end(); it++) {
+    if (delimiters.find(*it) != delimiters.end()) {
+      // Only add a token when the string is non-empty.
+      if (token_start_it != it) {
+        result.push_back(Token{UnicodeText::UTF8Substring(token_start_it, it),
+                               token_start_codepoint, codepoint_idx});
+      }
+
+      token_start_codepoint = codepoint_idx + 1;
+      token_start_it = it;
+      token_start_it++;
+    }
+
+    codepoint_idx++;
+  }
+  // Only add a token when the string is non-empty.
+  if (token_start_it != it) {
+    result.push_back(Token{UnicodeText::UTF8Substring(token_start_it, it),
+                           token_start_codepoint, codepoint_idx});
+  }
+
+  return result;
+}
+
+}  // namespace  libtextclassifier3

diff --git a/native/utils/test-utils.h b/native/utils/test-utils.h
new file mode 100644
index 0000000..0e75190
--- /dev/null
+++ b/native/utils/test-utils.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Utilities for tests.
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_
+#define LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_
+
+#include <string>
+
+#include "annotator/types.h"
+
+namespace libtextclassifier3 {
+
+// Returns a list of Tokens for a given input string, by tokenizing on space.
+std::vector<Token> TokenizeOnSpace(const std::string& text);
+
+// Returns a list of Tokens for a given input string, by tokenizing on the
+// given set of delimiter codepoints.
+std::vector<Token> TokenizeOnDelimiters(
+    const std::string& text, const std::unordered_set<char32>& delimiters);
+
+}  // namespace  libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_

diff --git a/native/utils/test-utils_test.cc b/native/utils/test-utils_test.cc
new file mode 100644
index 0000000..bdaa285
--- /dev/null
+++ b/native/utils/test-utils_test.cc

@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/test-utils.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(TestUtilTest, TokenizeOnSpace) {
+  std::vector<Token> tokens =
+      TokenizeOnSpace("Where is Jörg Borg located? Maybe in Zürich ...");
+
+  EXPECT_EQ(tokens.size(), 9);
+
+  EXPECT_EQ(tokens[0].value, "Where");
+  EXPECT_EQ(tokens[0].start, 0);
+  EXPECT_EQ(tokens[0].end, 5);
+
+  EXPECT_EQ(tokens[1].value, "is");
+  EXPECT_EQ(tokens[1].start, 6);
+  EXPECT_EQ(tokens[1].end, 8);
+
+  EXPECT_EQ(tokens[2].value, "Jörg");
+  EXPECT_EQ(tokens[2].start, 9);
+  EXPECT_EQ(tokens[2].end, 13);
+
+  EXPECT_EQ(tokens[3].value, "Borg");
+  EXPECT_EQ(tokens[3].start, 14);
+  EXPECT_EQ(tokens[3].end, 18);
+
+  EXPECT_EQ(tokens[4].value, "located?");
+  EXPECT_EQ(tokens[4].start, 19);
+  EXPECT_EQ(tokens[4].end, 27);
+
+  EXPECT_EQ(tokens[5].value, "Maybe");
+  EXPECT_EQ(tokens[5].start, 28);
+  EXPECT_EQ(tokens[5].end, 33);
+
+  EXPECT_EQ(tokens[6].value, "in");
+  EXPECT_EQ(tokens[6].start, 34);
+  EXPECT_EQ(tokens[6].end, 36);
+
+  EXPECT_EQ(tokens[7].value, "Zürich");
+  EXPECT_EQ(tokens[7].start, 37);
+  EXPECT_EQ(tokens[7].end, 43);
+
+  EXPECT_EQ(tokens[8].value, "...");
+  EXPECT_EQ(tokens[8].start, 44);
+  EXPECT_EQ(tokens[8].end, 47);
+}
+
+TEST(TestUtilTest, TokenizeOnDelimiters) {
+  std::vector<Token> tokens = TokenizeOnDelimiters(
+      "This   might be čomplíčateď?!: Oder?", {' ', '?', '!'});
+
+  EXPECT_EQ(tokens.size(), 6);
+
+  EXPECT_EQ(tokens[0].value, "This");
+  EXPECT_EQ(tokens[0].start, 0);
+  EXPECT_EQ(tokens[0].end, 4);
+
+  EXPECT_EQ(tokens[1].value, "might");
+  EXPECT_EQ(tokens[1].start, 7);
+  EXPECT_EQ(tokens[1].end, 12);
+
+  EXPECT_EQ(tokens[2].value, "be");
+  EXPECT_EQ(tokens[2].start, 13);
+  EXPECT_EQ(tokens[2].end, 15);
+
+  EXPECT_EQ(tokens[3].value, "čomplíčateď");
+  EXPECT_EQ(tokens[3].start, 16);
+  EXPECT_EQ(tokens[3].end, 27);
+
+  EXPECT_EQ(tokens[4].value, ":");
+  EXPECT_EQ(tokens[4].start, 29);
+  EXPECT_EQ(tokens[4].end, 30);
+
+  EXPECT_EQ(tokens[5].value, "Oder");
+  EXPECT_EQ(tokens[5].start, 31);
+  EXPECT_EQ(tokens[5].end, 35);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/tflite-model-executor.cc b/native/utils/tflite-model-executor.cc
index bd4b142..55faea5 100644
--- a/native/utils/tflite-model-executor.cc
+++ b/native/utils/tflite-model-executor.cc

@@ -26,10 +26,14 @@
 TfLiteRegistration* Register_ADD();
 TfLiteRegistration* Register_CONCATENATION();
 TfLiteRegistration* Register_CONV_2D();
+TfLiteRegistration* Register_EQUAL();
 TfLiteRegistration* Register_FULLY_CONNECTED();
+TfLiteRegistration* Register_GREATER_EQUAL();
 TfLiteRegistration* Register_L2_NORMALIZATION();
 TfLiteRegistration* Register_MUL();
 TfLiteRegistration* Register_RESHAPE();
+TfLiteRegistration* Register_REDUCE_MAX();
+TfLiteRegistration* Register_REDUCE_ANY();
 TfLiteRegistration* Register_SOFTMAX();
 TfLiteRegistration* Register_GATHER();
 TfLiteRegistration* Register_TRANSPOSE();
@@ -54,6 +58,7 @@
 TfLiteRegistration* Register_SQUARED_DIFFERENCE();
 TfLiteRegistration* Register_RSQRT();
 TfLiteRegistration* Register_LOG_SOFTMAX();
+TfLiteRegistration* Register_WHERE();
 }  // namespace builtin
 }  // namespace ops
 }  // namespace tflite
@@ -76,10 +81,15 @@
                        tflite::ops::builtin::Register_CONV_2D(),
                        /*min_version=*/1,
                        /*max_version=*/3);
+  resolver->AddBuiltin(::tflite::BuiltinOperator_EQUAL,
+                       ::tflite::ops::builtin::Register_EQUAL());
+
   resolver->AddBuiltin(tflite::BuiltinOperator_FULLY_CONNECTED,
                        tflite::ops::builtin::Register_FULLY_CONNECTED(),
                        /*min_version=*/1,
                        /*max_version=*/4);
+  resolver->AddBuiltin(::tflite::BuiltinOperator_GREATER_EQUAL,
+                       ::tflite::ops::builtin::Register_GREATER_EQUAL());
   resolver->AddBuiltin(tflite::BuiltinOperator_L2_NORMALIZATION,
                        tflite::ops::builtin::Register_L2_NORMALIZATION(),
                        /*min_version=*/1,
@@ -88,6 +98,10 @@
                        tflite::ops::builtin::Register_MUL());
   resolver->AddBuiltin(tflite::BuiltinOperator_RESHAPE,
                        tflite::ops::builtin::Register_RESHAPE());
+  resolver->AddBuiltin(::tflite::BuiltinOperator_REDUCE_MAX,
+                       ::tflite::ops::builtin::Register_REDUCE_MAX());
+  resolver->AddBuiltin(::tflite::BuiltinOperator_REDUCE_ANY,
+                       ::tflite::ops::builtin::Register_REDUCE_ANY());
   resolver->AddBuiltin(tflite::BuiltinOperator_SOFTMAX,
                        tflite::ops::builtin::Register_SOFTMAX(),
                        /*min_version=*/1,
@@ -160,6 +174,8 @@
                        tflite::ops::builtin::Register_RSQRT());
   resolver->AddBuiltin(tflite::BuiltinOperator_LOG_SOFTMAX,
                        tflite::ops::builtin::Register_LOG_SOFTMAX());
+  resolver->AddBuiltin(::tflite::BuiltinOperator_WHERE,
+                       ::tflite::ops::builtin::Register_WHERE());
 }
 #else
 void RegisterSelectedOps(tflite::MutableOpResolver* resolver) {

diff --git a/native/utils/token-feature-extractor.cc b/native/utils/token-feature-extractor.cc
index b14f96e..ee915db 100644
--- a/native/utils/token-feature-extractor.cc
+++ b/native/utils/token-feature-extractor.cc

@@ -70,8 +70,8 @@
 }  // namespace
 
 TokenFeatureExtractor::TokenFeatureExtractor(
-    const TokenFeatureExtractorOptions& options, const UniLib& unilib)
-    : options_(options), unilib_(unilib) {
+    const TokenFeatureExtractorOptions& options, const UniLib* unilib)
+    : options_(options), unilib_(*unilib) {
   for (const std::string& pattern : options.regexp_features) {
     regex_patterns_.push_back(std::unique_ptr<UniLib::RegexPattern>(
         unilib_.CreateRegexPattern(UTF8ToUnicodeText(

diff --git a/native/utils/token-feature-extractor.h b/native/utils/token-feature-extractor.h
index fed113b..b3f2f33 100644
--- a/native/utils/token-feature-extractor.h
+++ b/native/utils/token-feature-extractor.h

@@ -65,8 +65,10 @@
 
 class TokenFeatureExtractor {
  public:
-  TokenFeatureExtractor(const TokenFeatureExtractorOptions& options,
-                        const UniLib& unilib);
+  // Des not take ownership of unilib, which must refer to a valid unilib
+  // instance that outlives this feature extractor.
+  explicit TokenFeatureExtractor(const TokenFeatureExtractorOptions& options,
+                                 const UniLib* unilib);
 
   // Extracts both the sparse (charactergram) and the dense features from a
   // token. is_in_span is a bool indicator whether the token is a part of the

diff --git a/native/utils/token-feature-extractor_test.cc b/native/utils/token-feature-extractor_test.cc
new file mode 100644
index 0000000..15a434c
--- /dev/null
+++ b/native/utils/token-feature-extractor_test.cc

@@ -0,0 +1,579 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/token-feature-extractor.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class TokenFeatureExtractorTest : public ::testing::Test {
+ protected:
+  explicit TokenFeatureExtractorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+class TestingTokenFeatureExtractor : public TokenFeatureExtractor {
+ public:
+  using TokenFeatureExtractor::HashToken;
+  using TokenFeatureExtractor::TokenFeatureExtractor;
+};
+
+TEST_F(TokenFeatureExtractorTest, ExtractAscii) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hello", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("H"),
+                  extractor.HashToken("e"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("^H"),
+                  extractor.HashToken("He"),
+                  extractor.HashToken("el"),
+                  extractor.HashToken("ll"),
+                  extractor.HashToken("lo"),
+                  extractor.HashToken("o$"),
+                  extractor.HashToken("^He"),
+                  extractor.HashToken("Hel"),
+                  extractor.HashToken("ell"),
+                  extractor.HashToken("llo"),
+                  extractor.HashToken("lo$")
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("w"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("r"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("d"),
+                  extractor.HashToken("!"),
+                  extractor.HashToken("^w"),
+                  extractor.HashToken("wo"),
+                  extractor.HashToken("or"),
+                  extractor.HashToken("rl"),
+                  extractor.HashToken("ld"),
+                  extractor.HashToken("d!"),
+                  extractor.HashToken("!$"),
+                  extractor.HashToken("^wo"),
+                  extractor.HashToken("wor"),
+                  extractor.HashToken("orl"),
+                  extractor.HashToken("rld"),
+                  extractor.HashToken("ld!"),
+                  extractor.HashToken("d!$"),
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiNoChargrams) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hello", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^Hello$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^world!$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("H"),
+                  extractor.HashToken("ě"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("ó"),
+                  extractor.HashToken("^H"),
+                  extractor.HashToken("Hě"),
+                  extractor.HashToken("ěl"),
+                  extractor.HashToken("ll"),
+                  extractor.HashToken("ló"),
+                  extractor.HashToken("ó$"),
+                  extractor.HashToken("^Hě"),
+                  extractor.HashToken("Hěl"),
+                  extractor.HashToken("ěll"),
+                  extractor.HashToken("lló"),
+                  extractor.HashToken("ló$")
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("w"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("r"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("d"),
+                  extractor.HashToken("!"),
+                  extractor.HashToken("^w"),
+                  extractor.HashToken("wo"),
+                  extractor.HashToken("or"),
+                  extractor.HashToken("rl"),
+                  extractor.HashToken("ld"),
+                  extractor.HashToken("d!"),
+                  extractor.HashToken("!$"),
+                  extractor.HashToken("^wo"),
+                  extractor.HashToken("wor"),
+                  extractor.HashToken("orl"),
+                  extractor.HashToken("rld"),
+                  extractor.HashToken("ld!"),
+                  extractor.HashToken("d!$"),
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractUnicodeNoChargrams) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^Hělló$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   extractor.HashToken("^world!$"),
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+}
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, ICUCaseFeature) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"Ř", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"ř", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0}));
+}
+#endif
+
+TEST_F(TokenFeatureExtractorTest, DigitRemapping) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = true;
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"9:30am", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"5:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"10:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features,
+              testing::Not(testing::ElementsAreArray(sparse_features2)));
+}
+
+TEST_F(TokenFeatureExtractorTest, DigitRemappingUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = true;
+  options.unicode_aware_features = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"9:30am", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"5:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"10:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features,
+              testing::Not(testing::ElementsAreArray(sparse_features2)));
+}
+
+TEST_F(TokenFeatureExtractorTest, LowercaseAscii) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.lowercase_tokens = true;
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"AABB", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"aaBB", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"aAbB", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+}
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, LowercaseUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.lowercase_tokens = true;
+  options.unicode_aware_features = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"ŘŘ", 0, 6}, true, &sparse_features, &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"řř", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+}
+#endif
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, RegexFeatures) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = false;
+  options.unicode_aware_features = false;
+  options.regexp_features.push_back("^[a-z]+$");  // all lower case.
+  options.regexp_features.push_back("^[0-9]+$");  // all digits.
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"abCde", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"abcde", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"12c45", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"12345", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 1.0}));
+}
+#endif
+
+TEST_F(TokenFeatureExtractorTest, ExtractTooLongWord) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{22};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  // Test that this runs. ASAN should catch problems.
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"abcdefghijklmnopqřstuvwxyz", 0, 0}, true,
+                    &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("^abcdefghij\1qřstuvwxyz"),
+                  extractor.HashToken("abcdefghij\1qřstuvwxyz$"),
+                  // clang-format on
+              }));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiUnicodeMatches) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3, 4, 5};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+
+  TestingTokenFeatureExtractor extractor_unicode(options, &unilib_);
+
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor_ascii(options, &unilib_);
+
+  for (const std::string& input :
+       {"https://www.abcdefgh.com/in/xxxkkkvayio",
+        "https://www.fjsidofj.om/xx/abadfy/xxxx/?xfjiis=ffffiijiihil",
+        "asdfhasdofjiasdofj#%()*%#*(aisdojfaosdifjiaofjdsiofjdi_fdis3w", "abcd",
+        "x", "Hello", "Hey,", "Hi", ""}) {
+    std::vector<int> sparse_features_unicode;
+    std::vector<float> dense_features_unicode;
+    extractor_unicode.Extract(Token{input, 0, 0}, true,
+                              &sparse_features_unicode,
+                              &dense_features_unicode);
+
+    std::vector<int> sparse_features_ascii;
+    std::vector<float> dense_features_ascii;
+    extractor_ascii.Extract(Token{input, 0, 0}, true, &sparse_features_ascii,
+                            &dense_features_ascii);
+
+    EXPECT_THAT(sparse_features_unicode, sparse_features_ascii) << input;
+    EXPECT_THAT(dense_features_unicode, dense_features_ascii) << input;
+  }
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractForPadToken) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token(), false, &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("<PAD>")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractFiltered) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  options.allowed_chargrams.insert("^H");
+  options.allowed_chargrams.insert("ll");
+  options.allowed_chargrams.insert("llo");
+  options.allowed_chargrams.insert("w");
+  options.allowed_chargrams.insert("!");
+  options.allowed_chargrams.insert("\xc4");  // UTF8 control character.
+
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hěllo", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  0,
+                  extractor.HashToken("\xc4"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("^H"),
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("ll"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("llo"),
+                  0
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   // clang-format off
+                  extractor.HashToken("w"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("!"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                                   // clang-format on
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+  EXPECT_EQ(extractor.HashToken("<PAD>"), 1);
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractEmptyToken) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  // Should not crash.
+  extractor.Extract(Token(), true, &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   // clang-format off
+                  extractor.HashToken("<PAD>"),
+                                   // clang-format on
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 1.0}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/tokenizer.cc b/native/utils/tokenizer.cc
index 5e50c09..bd47592 100644
--- a/native/utils/tokenizer.cc
+++ b/native/utils/tokenizer.cc

@@ -126,8 +126,7 @@
 
 void AppendCodepointToToken(UnicodeText::const_iterator it, Token* token) {
   token->value += std::string(
-      it.utf8_data(),
-      it.utf8_data() + GetNumBytesForNonZeroUTF8Char(it.utf8_data()));
+      it.utf8_data(), it.utf8_data() + GetNumBytesForUTF8Char(it.utf8_data()));
 }
 
 std::vector<Token> Tokenizer::InternalTokenize(
@@ -285,20 +284,19 @@
     }
   };
 
-  auto MaybeResetTokenAndAddChar = [&new_token, PushToken, &current_token_type](
-                                       int codepoint_index,
-                                       NumberTokenType token_type,
-                                       UnicodeText::const_iterator it,
-                                       bool is_whitespace = false) {
-    if (current_token_type != token_type) {
-      PushToken();
-      new_token = Token("", codepoint_index, codepoint_index,
-                        /*is_padding=*/false, is_whitespace);
-    }
-    new_token.end += 1;
-    AppendCodepointToToken(it, &new_token);
-    current_token_type = token_type;
-  };
+  auto MaybeResetTokenAndAddChar =
+      [&new_token, PushToken, &current_token_type](
+          int codepoint_index, NumberTokenType token_type,
+          UnicodeText::const_iterator it, bool is_whitespace = false) {
+        if (current_token_type != token_type) {
+          PushToken();
+          new_token = Token("", codepoint_index, codepoint_index,
+                            /*is_padding=*/false, is_whitespace);
+        }
+        new_token.end += 1;
+        AppendCodepointToToken(it, &new_token);
+        current_token_type = token_type;
+      };
 
   auto FinishTokenAndAddSeparator =
       [&new_token, result, &current_token_type, PushToken](

diff --git a/native/utils/tokenizer_test.cc b/native/utils/tokenizer_test.cc
new file mode 100644
index 0000000..f73f8f8
--- /dev/null
+++ b/native/utils/tokenizer_test.cc

@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tokenizer.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAreArray;
+
+class TestingTokenizer : public Tokenizer {
+ public:
+  TestingTokenizer(
+      const TokenizationType type, const UniLib* unilib,
+      const std::vector<const TokenizationCodepointRange*>& codepoint_ranges,
+      const std::vector<const CodepointRange*>&
+          internal_tokenizer_codepoint_ranges,
+      const bool split_on_script_change,
+      const bool icu_preserve_whitespace_tokens,
+      const bool preserve_floating_numbers)
+      : Tokenizer(type, unilib, codepoint_ranges,
+                  internal_tokenizer_codepoint_ranges, split_on_script_change,
+                  icu_preserve_whitespace_tokens, preserve_floating_numbers) {}
+
+  using Tokenizer::FindTokenizationRange;
+};
+
+class TestingTokenizerProxy {
+ public:
+  TestingTokenizerProxy(
+      TokenizationType type,
+      const std::vector<TokenizationCodepointRangeT>& codepoint_range_configs,
+      const std::vector<CodepointRangeT>& internal_codepoint_range_configs,
+      const bool split_on_script_change,
+      const bool icu_preserve_whitespace_tokens,
+      const bool preserve_floating_numbers)
+      : INIT_UNILIB_FOR_TESTING(unilib_) {
+    const int num_configs = codepoint_range_configs.size();
+    std::vector<const TokenizationCodepointRange*> configs_fb;
+    configs_fb.reserve(num_configs);
+    const int num_internal_configs = internal_codepoint_range_configs.size();
+    std::vector<const CodepointRange*> internal_configs_fb;
+    internal_configs_fb.reserve(num_internal_configs);
+    buffers_.reserve(num_configs + num_internal_configs);
+    for (int i = 0; i < num_configs; i++) {
+      flatbuffers::FlatBufferBuilder builder;
+      builder.Finish(CreateTokenizationCodepointRange(
+          builder, &codepoint_range_configs[i]));
+      buffers_.push_back(builder.Release());
+      configs_fb.push_back(flatbuffers::GetRoot<TokenizationCodepointRange>(
+          buffers_.back().data()));
+    }
+    for (int i = 0; i < num_internal_configs; i++) {
+      flatbuffers::FlatBufferBuilder builder;
+      builder.Finish(
+          CreateCodepointRange(builder, &internal_codepoint_range_configs[i]));
+      buffers_.push_back(builder.Release());
+      internal_configs_fb.push_back(
+          flatbuffers::GetRoot<CodepointRange>(buffers_.back().data()));
+    }
+    tokenizer_ = std::unique_ptr<TestingTokenizer>(new TestingTokenizer(
+        type, &unilib_, configs_fb, internal_configs_fb, split_on_script_change,
+        icu_preserve_whitespace_tokens, preserve_floating_numbers));
+  }
+
+  TokenizationCodepointRange_::Role TestFindTokenizationRole(int c) const {
+    const TokenizationCodepointRangeT* range =
+        tokenizer_->FindTokenizationRange(c);
+    if (range != nullptr) {
+      return range->role;
+    } else {
+      return TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+    }
+  }
+
+  std::vector<Token> Tokenize(const std::string& utf8_text) const {
+    return tokenizer_->Tokenize(utf8_text);
+  }
+
+ private:
+  UniLib unilib_;
+  std::vector<flatbuffers::DetachedBuffer> buffers_;
+  std::unique_ptr<TestingTokenizer> tokenizer_;
+};
+
+TEST(TokenizerTest, FindTokenizationRange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 10;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 1234;
+  config->end = 12345;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {}, /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+
+  // Test hits to the first group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(0),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(5),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(10),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test a hit to the second group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(31),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(32),
+            TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(33),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test hits to the third group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(1233),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(1234),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(12344),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(12345),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test a hit outside.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(99),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+}
+
+TEST(TokenizerTest, TokenizeOnSpace) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  // Space character.
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("Hello world!");
+
+  EXPECT_THAT(tokens,
+              ElementsAreArray({Token("Hello", 0, 5), Token("world!", 6, 12)}));
+}
+
+TEST(TokenizerTest, TokenizeOnSpaceAndScriptChange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  // Latin.
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 32;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  config->script_id = 1;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+  config->script_id = 1;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 33;
+  config->end = 0x77F + 1;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  config->script_id = 1;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/true,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  EXPECT_THAT(tokenizer.Tokenize("앨라배마 주 전화(123) 456-789웹사이트"),
+              std::vector<Token>({Token("앨라배마", 0, 4), Token("주", 5, 6),
+                                  Token("전화", 7, 10), Token("(123)", 10, 15),
+                                  Token("456-789", 16, 23),
+                                  Token("웹사이트", 23, 28)}));
+}  // namespace
+
+TEST(TokenizerTest, TokenizeComplex) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  // Source: http://www.unicode.org/Public/10.0.0/ucd/Blocks-10.0.0d1.txt
+  // Latin - cyrilic.
+  //   0000..007F; Basic Latin
+  //   0080..00FF; Latin-1 Supplement
+  //   0100..017F; Latin Extended-A
+  //   0180..024F; Latin Extended-B
+  //   0250..02AF; IPA Extensions
+  //   02B0..02FF; Spacing Modifier Letters
+  //   0300..036F; Combining Diacritical Marks
+  //   0370..03FF; Greek and Coptic
+  //   0400..04FF; Cyrillic
+  //   0500..052F; Cyrillic Supplement
+  //   0530..058F; Armenian
+  //   0590..05FF; Hebrew
+  //   0600..06FF; Arabic
+  //   0700..074F; Syriac
+  //   0750..077F; Arabic Supplement
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 32;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 33;
+  config->end = 0x77F + 1;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+
+  // CJK
+  // 2E80..2EFF; CJK Radicals Supplement
+  // 3000..303F; CJK Symbols and Punctuation
+  // 3040..309F; Hiragana
+  // 30A0..30FF; Katakana
+  // 3100..312F; Bopomofo
+  // 3130..318F; Hangul Compatibility Jamo
+  // 3190..319F; Kanbun
+  // 31A0..31BF; Bopomofo Extended
+  // 31C0..31EF; CJK Strokes
+  // 31F0..31FF; Katakana Phonetic Extensions
+  // 3200..32FF; Enclosed CJK Letters and Months
+  // 3300..33FF; CJK Compatibility
+  // 3400..4DBF; CJK Unified Ideographs Extension A
+  // 4DC0..4DFF; Yijing Hexagram Symbols
+  // 4E00..9FFF; CJK Unified Ideographs
+  // A000..A48F; Yi Syllables
+  // A490..A4CF; Yi Radicals
+  // A4D0..A4FF; Lisu
+  // A500..A63F; Vai
+  // F900..FAFF; CJK Compatibility Ideographs
+  // FE30..FE4F; CJK Compatibility Forms
+  // 20000..2A6DF; CJK Unified Ideographs Extension B
+  // 2A700..2B73F; CJK Unified Ideographs Extension C
+  // 2B740..2B81F; CJK Unified Ideographs Extension D
+  // 2B820..2CEAF; CJK Unified Ideographs Extension E
+  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
+  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2E80;
+  config->end = 0x2EFF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x3000;
+  config->end = 0xA63F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0xF900;
+  config->end = 0xFAFF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0xFE30;
+  config->end = 0xFE4F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x20000;
+  config->end = 0x2A6DF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2A700;
+  config->end = 0x2B73F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2B740;
+  config->end = 0x2B81F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2B820;
+  config->end = 0x2CEAF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2CEB0;
+  config->end = 0x2EBEF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2F800;
+  config->end = 0x2FA1F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  // Thai.
+  // 0E00..0E7F; Thai
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x0E00;
+  config->end = 0x0E7F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens;
+
+  tokens = tokenizer.Tokenize(
+      "問少目木輸走猶術権自京門録球変。細開括省用掲情結傍走愛明氷。");
+  EXPECT_EQ(tokens.size(), 30);
+
+  tokens = tokenizer.Tokenize("問少目 hello 木輸ยามきゃ");
+  // clang-format off
+  EXPECT_THAT(
+      tokens,
+      ElementsAreArray({Token("問", 0, 1),
+                        Token("少", 1, 2),
+                        Token("目", 2, 3),
+                        Token("hello", 4, 9),
+                        Token("木", 10, 11),
+                        Token("輸", 11, 12),
+                        Token("ย", 12, 13),
+                        Token("า", 13, 14),
+                        Token("ม", 14, 15),
+                        Token("き", 15, 16),
+                        Token("ゃ", 16, 17)}));
+  // clang-format on
+}
+
+#if defined(TC3_TEST_ICU) || defined(__APPLE__)
+TEST(TokenizerTest, ICUTokenizeWithWhitespaces) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("พระบาท สมเด็จ พระ ปร มิ");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("พระบาท", 0, 6),
+                                Token(" ", 6, 7),
+                                Token("สมเด็จ", 7, 13),
+                                Token(" ", 13, 14),
+                                Token("พระ", 14, 17),
+                                Token(" ", 17, 18),
+                                Token("ปร", 18, 20),
+                                Token(" ", 20, 21),
+                                Token("มิ", 21, 23)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, ICUTokenizePunctuation) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens =
+      tokenizer.Tokenize("The interval is: -(12, 138*)");
+  // clang-format off
+  ASSERT_EQ(
+      tokens,
+            std::vector<Token>({Token("The", 0, 3),
+                                Token(" ", 3, 4),
+                                Token("interval", 4, 12),
+                                Token(" ", 12, 13),
+                                Token("is", 13, 15),
+                                Token(":", 15, 16),
+                                Token(" ", 16, 17),
+                                Token("-", 17, 18),
+                                Token("(", 18, 19),
+                                Token("12", 19, 21),
+                                Token(",", 21, 22),
+                                Token(" ", 22, 23),
+                                Token("138", 23, 26),
+                                Token("*", 26, 27),
+                                Token(")", 27, 28)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, ICUTokenizeWithNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("3.1 3﹒2 3．3");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("3.1", 0, 3),
+                                Token(" ", 3, 4),
+                                Token("3﹒2", 4, 7),
+                                Token(" ", 7, 8),
+                                Token("3．3", 8, 11)}));
+  // clang-format on
+}
+#endif
+
+#if defined(TC3_TEST_ICU)
+TEST(TokenizerTest, ICUTokenize) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("พระบาทสมเด็จพระปรมิ");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("พระบาท", 0, 6),
+                                Token("สมเด็จ", 6, 12),
+                                Token("พระ", 12, 15),
+                                Token("ปร", 15, 17),
+                                Token("มิ", 17, 19)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, MixedTokenize) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  std::vector<CodepointRangeT> internal_configs;
+  CodepointRangeT* interal_config;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 0;
+  interal_config->end = 128;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 128;
+  interal_config->end = 256;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 256;
+  interal_config->end = 384;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 384;
+  interal_config->end = 592;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_MIXED, configs,
+                                  internal_configs,
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+
+  std::vector<Token> tokens = tokenizer.Tokenize(
+      "こんにちはJapanese-ląnguagę text 你好世界 http://www.google.com/");
+  ASSERT_EQ(
+      tokens,
+      // clang-format off
+      std::vector<Token>({Token("こんにちは", 0, 5),
+                          Token("Japanese-ląnguagę", 5, 22),
+                          Token("text", 23, 27),
+                          Token("你好", 28, 30),
+                          Token("世界", 30, 32),
+                          Token("http://www.google.com/", 33, 55)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, InternalTokenizeOnScriptChange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 256;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+
+  {
+    TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER,
+                                    configs, {},
+                                    /*split_on_script_change=*/false,
+                                    /*icu_preserve_whitespace_tokens=*/false,
+                                    /*preserve_floating_numbers=*/false);
+
+    EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"),
+              std::vector<Token>({Token("앨라배마123웹사이트", 0, 11)}));
+  }
+
+  {
+    TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER,
+                                    configs, {},
+                                    /*split_on_script_change=*/true,
+                                    /*icu_preserve_whitespace_tokens=*/false,
+                                    /*preserve_floating_numbers=*/false);
+    EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"),
+              std::vector<Token>({Token("앨라배마", 0, 4), Token("123", 4, 7),
+                                  Token("웹사이트", 7, 11)}));
+  }
+}
+#endif
+
+TEST(TokenizerTest, LetterDigitTokenize) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("7% -3.14 68.9#? 7% $99 .18.");
+  ASSERT_EQ(tokens,
+            std::vector<Token>(
+                {Token("7", 0, 1), Token("%", 1, 2), Token(" ", 2, 3),
+                 Token("-", 3, 4), Token("3.14", 4, 8), Token(" ", 8, 9),
+                 Token("68.9", 9, 13), Token("#", 13, 14), Token("?", 14, 15),
+                 Token(" ", 15, 16), Token("7", 16, 17), Token("%", 17, 18),
+                 Token(" ", 18, 19), Token("$", 19, 20), Token("99", 20, 22),
+                 Token(" ", 22, 23), Token(".", 23, 24), Token("18", 24, 26),
+                 Token(".", 26, 27)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeUnicode) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("２ pércént ３パーセント");
+  ASSERT_EQ(tokens, std::vector<Token>({Token("２", 0, 1), Token(" ", 1, 2),
+                                        Token("pércént", 2, 9),
+                                        Token(" ", 9, 10), Token("３", 10, 11),
+                                        Token("パーセント", 11, 16)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeWithDots) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("3 3﹒2 3．3%");
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("3", 0, 1), Token(" ", 1, 2),
+                                Token("3﹒2", 2, 5), Token(" ", 5, 6),
+                                Token("3．3", 6, 9), Token("%", 9, 10)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeDoNotPreserveFloatingNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("15.12.2019 january's 3.2");
+  ASSERT_EQ(tokens,
+            std::vector<Token>(
+                {Token("15", 0, 2), Token(".", 2, 3), Token("12", 3, 5),
+                 Token(".", 5, 6), Token("2019", 6, 10), Token(" ", 10, 11),
+                 Token("january", 11, 18), Token("'", 18, 19),
+                 Token("s", 19, 20), Token(" ", 20, 21), Token("3", 21, 22),
+                 Token(".", 22, 23), Token("2", 23, 24)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeStrangeStringFloatingNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("The+2345++the +íí+");
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("The", 0, 3), Token("+", 3, 4),
+                                Token("2345", 4, 8), Token("+", 8, 9),
+                                Token("+", 9, 10), Token("the", 10, 13),
+                                Token(" ", 13, 14), Token("+", 14, 15),
+                                Token("íí", 15, 17), Token("+", 17, 18)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeWhitespcesInSameToken) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("2 3  4   5");
+  ASSERT_EQ(tokens, std::vector<Token>({Token("2", 0, 1), Token(" ", 1, 2),
+                                        Token("3", 2, 3), Token("  ", 3, 5),
+                                        Token("4", 5, 6), Token("   ", 6, 9),
+                                        Token("5", 9, 10)}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/utf8/NSString+Unicode.h b/native/utils/utf8/NSString+Unicode.h
deleted file mode 100644
index 734d58f..0000000
--- a/native/utils/utf8/NSString+Unicode.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#import <Foundation/Foundation.h>
-
-/// Defines utility methods for operating with Unicode in @c NSString.
-/// @discussion Unicode has 1,114,112 code points ( http://en.wikipedia.org/wiki/Code_point ),
-///             and multiple encodings that map these code points into code units.
-///             @c NSString API exposes the string as if it were encoded in UTF-16, which makes use
-///             of surrogate pairs ( http://en.wikipedia.org/wiki/UTF-16 ).
-///             The methods in this category translate indices between Unicode codepoints and
-///             UTF-16 unichars.
-@interface NSString (Unicode)
-
-/// Returns the number of Unicode codepoints for a string slice.
-/// @param start The NSString start index.
-/// @param length The number of unichar units.
-/// @return The number of Unicode code points in the specified unichar range.
-- (NSUInteger)tc_countChar32:(NSUInteger)start withLength:(NSUInteger)length;
-
-/// Returns the length of the string in terms of Unicode codepoints.
-/// @return The number of Unicode codepoints in this string.
-- (NSUInteger)tc_codepointLength;
-
-@end

diff --git a/native/utils/utf8/unicodetext.cc b/native/utils/utf8/unicodetext.cc
index 45bbbf6..7b56ce2 100644
--- a/native/utils/utf8/unicodetext.cc
+++ b/native/utils/utf8/unicodetext.cc

@@ -288,7 +288,7 @@
 }
 
 UnicodeText::const_iterator& UnicodeText::const_iterator::operator++() {
-  it_ += GetNumBytesForNonZeroUTF8Char(it_);
+  it_ += GetNumBytesForUTF8Char(it_);
   return *this;
 }
 

diff --git a/native/utils/utf8/unicodetext_test.cc b/native/utils/utf8/unicodetext_test.cc
new file mode 100644
index 0000000..4e8883b
--- /dev/null
+++ b/native/utils/utf8/unicodetext_test.cc

@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8/unicodetext.h"
+
+#include "utils/strings/stringpiece.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class UnicodeTextTest : public testing::Test {
+ protected:
+  UnicodeTextTest() : empty_text_() {
+    text_.push_back(0x1C0);
+    text_.push_back(0x4E8C);
+    text_.push_back(0xD7DB);
+    text_.push_back(0x34);
+    text_.push_back(0x1D11E);
+  }
+
+  UnicodeText empty_text_;
+  UnicodeText text_;
+};
+
+TEST(UnicodeTextTest, ConstructionFromUnicodeText) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+  EXPECT_EQ(UnicodeText(text).ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(UnicodeText(text, /*do_copy=*/false).ToUTF8String(), "1234😋hello");
+}
+
+// Tests for our modifications of UnicodeText.
+TEST(UnicodeTextTest, Custom) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+  EXPECT_EQ(text.ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(text.size_codepoints(), 10);
+  EXPECT_EQ(text.size_bytes(), 13);
+
+  auto it_begin = text.begin();
+  std::advance(it_begin, 4);
+  auto it_end = text.begin();
+  std::advance(it_end, 6);
+  EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "😋h");
+}
+
+TEST(UnicodeTextTest, StringPieceView) {
+  std::string raw_text = "1234😋hello";
+  UnicodeText text =
+      UTF8ToUnicodeText(StringPiece(raw_text), /*do_copy=*/false);
+  EXPECT_EQ(text.ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(text.size_codepoints(), 10);
+  EXPECT_EQ(text.size_bytes(), 13);
+
+  auto it_begin = text.begin();
+  std::advance(it_begin, 4);
+  auto it_end = text.begin();
+  std::advance(it_end, 6);
+  EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "😋h");
+}
+
+TEST(UnicodeTextTest, Substring) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+
+  EXPECT_EQ(
+      UnicodeText::Substring(std::next(text.begin(), 4),
+                             std::next(text.begin(), 6), /*do_copy=*/true),
+      UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(
+      UnicodeText::Substring(std::next(text.begin(), 4),
+                             std::next(text.begin(), 6), /*do_copy=*/false),
+      UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/true),
+            UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/false),
+            UTF8ToUnicodeText("😋h"));
+}
+
+TEST(UnicodeTextTest, Ownership) {
+  const std::string src = "\u304A\u00B0\u106B";
+
+  UnicodeText alias;
+  alias.PointToUTF8(src.data(), src.size());
+  EXPECT_EQ(alias.data(), src.data());
+  UnicodeText::const_iterator it = alias.begin();
+  EXPECT_EQ(*it++, 0x304A);
+  EXPECT_EQ(*it++, 0x00B0);
+  EXPECT_EQ(*it++, 0x106B);
+  EXPECT_EQ(it, alias.end());
+
+  UnicodeText t = alias;  // Copy initialization copies the data.
+  EXPECT_NE(t.data(), alias.data());
+}
+
+TEST(UnicodeTextTest, Validation) {
+  EXPECT_TRUE(UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("\u304A\u00B0\u106B", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("this is a test😋😋😋", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x8b", /*do_copy=*/false).is_valid());
+  // Too short (string is too short).
+  EXPECT_FALSE(UTF8ToUnicodeText("\xf0\x9f", /*do_copy=*/false).is_valid());
+  // Too long (too many trailing bytes).
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x8b\x8b", /*do_copy=*/false).is_valid());
+  // Too short (too few trailing bytes).
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x61\x61", /*do_copy=*/false).is_valid());
+  // Invalid with context.
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("hello \xf0\x9f\x98\x61\x61 world1", /*do_copy=*/false)
+          .is_valid());
+}
+
+class IteratorTest : public UnicodeTextTest {};
+
+TEST_F(IteratorTest, Iterates) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0x1C0, *iter);
+  EXPECT_EQ(&iter, &++iter);  // operator++ returns *this.
+  EXPECT_EQ(0x4E8C, *iter++);
+  EXPECT_EQ(0xD7DB, *iter);
+  // Make sure you can dereference more than once.
+  EXPECT_EQ(0xD7DB, *iter);
+  EXPECT_EQ(0x34, *++iter);
+  EXPECT_EQ(0x1D11E, *++iter);
+  ASSERT_TRUE(iter != text_.end());
+  iter++;
+  EXPECT_TRUE(iter == text_.end());
+}
+
+TEST_F(IteratorTest, MultiPass) {
+  // Also tests Default Constructible and Assignable.
+  UnicodeText::const_iterator i1, i2;
+  i1 = text_.begin();
+  i2 = i1;
+  EXPECT_EQ(0x4E8C, *++i1);
+  EXPECT_TRUE(i1 != i2);
+  EXPECT_EQ(0x1C0, *i2);
+  ++i2;
+  EXPECT_TRUE(i1 == i2);
+  EXPECT_EQ(0x4E8C, *i2);
+}
+
+TEST_F(IteratorTest, ReverseIterates) {
+  UnicodeText::const_iterator iter = text_.end();
+  EXPECT_TRUE(iter == text_.end());
+  iter--;
+  ASSERT_TRUE(iter != text_.end());
+  EXPECT_EQ(0x1D11E, *iter--);
+  EXPECT_EQ(0x34, *iter);
+  EXPECT_EQ(0xD7DB, *--iter);
+  // Make sure you can dereference more than once.
+  EXPECT_EQ(0xD7DB, *iter);
+  --iter;
+  EXPECT_EQ(0x4E8C, *iter--);
+  EXPECT_EQ(0x1C0, *iter);
+  EXPECT_TRUE(iter == text_.begin());
+}
+
+TEST_F(IteratorTest, Comparable) {
+  UnicodeText::const_iterator i1, i2;
+  i1 = text_.begin();
+  i2 = i1;
+  ++i2;
+
+  EXPECT_TRUE(i1 < i2);
+  EXPECT_TRUE(text_.begin() <= i1);
+  EXPECT_FALSE(i1 >= i2);
+  EXPECT_FALSE(i1 > text_.end());
+}
+
+TEST_F(IteratorTest, Advance) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0x1C0, *iter);
+  std::advance(iter, 4);
+  EXPECT_EQ(0x1D11E, *iter);
+  ++iter;
+  EXPECT_TRUE(iter == text_.end());
+}
+
+TEST_F(IteratorTest, Distance) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0, std::distance(text_.begin(), iter));
+  EXPECT_EQ(5, std::distance(iter, text_.end()));
+  ++iter;
+  ++iter;
+  EXPECT_EQ(2, std::distance(text_.begin(), iter));
+  EXPECT_EQ(3, std::distance(iter, text_.end()));
+  ++iter;
+  ++iter;
+  EXPECT_EQ(4, std::distance(text_.begin(), iter));
+  ++iter;
+  EXPECT_EQ(0, std::distance(iter, text_.end()));
+}
+
+class OperatorTest : public UnicodeTextTest {};
+
+TEST_F(OperatorTest, Clear) {
+  UnicodeText empty_text(UTF8ToUnicodeText("", /*do_copy=*/false));
+  EXPECT_FALSE(text_ == empty_text);
+  text_.clear();
+  EXPECT_TRUE(text_ == empty_text);
+}
+
+TEST_F(OperatorTest, Empty) {
+  EXPECT_TRUE(empty_text_.empty());
+  EXPECT_FALSE(text_.empty());
+  text_.clear();
+  EXPECT_TRUE(text_.empty());
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/variant.cc b/native/utils/variant.cc
index 9cdc0b6..0513440 100644
--- a/native/utils/variant.cc
+++ b/native/utils/variant.cc

@@ -21,26 +21,26 @@
 std::string Variant::ToString() const {
   switch (GetType()) {
     case Variant::TYPE_BOOL_VALUE:
-      if (BoolValue()) {
+      if (Value<bool>()) {
         return "true";
       } else {
         return "false";
       }
       break;
     case Variant::TYPE_INT_VALUE:
-      return std::to_string(IntValue());
+      return std::to_string(Value<int>());
       break;
     case Variant::TYPE_INT64_VALUE:
-      return std::to_string(Int64Value());
+      return std::to_string(Value<int64>());
       break;
     case Variant::TYPE_FLOAT_VALUE:
-      return std::to_string(FloatValue());
+      return std::to_string(Value<float>());
       break;
     case Variant::TYPE_DOUBLE_VALUE:
-      return std::to_string(DoubleValue());
+      return std::to_string(Value<double>());
       break;
     case Variant::TYPE_STRING_VALUE:
-      return StringValue();
+      return ConstRefValue<std::string>();
       break;
     default:
       TC3_LOG(FATAL) << "Unsupported variant type: " << GetType();

diff --git a/native/utils/variant.h b/native/utils/variant.h
index 11c361c..551a822 100644
--- a/native/utils/variant.h
+++ b/native/utils/variant.h

@@ -85,110 +85,178 @@
 
   Variant& operator=(const Variant&) = default;
 
-  int Int8Value() const {
-    TC3_CHECK(HasInt8());
+  template <class T>
+  struct dependent_false : std::false_type {};
+
+  template <typename T>
+  T Value() const {
+    static_assert(dependent_false<T>::value, "Not supported.");
+  }
+
+  template <>
+  int8 Value() const {
+    TC3_CHECK(Has<int8>());
     return int8_value_;
   }
 
-  int UInt8Value() const {
-    TC3_CHECK(HasUInt8());
+  template <>
+  uint8 Value() const {
+    TC3_CHECK(Has<uint8>());
     return uint8_value_;
   }
 
-  int IntValue() const {
-    TC3_CHECK(HasInt());
+  template <>
+  int Value() const {
+    TC3_CHECK(Has<int>());
     return int_value_;
   }
 
-  uint UIntValue() const {
-    TC3_CHECK(HasUInt());
+  template <>
+  uint Value() const {
+    TC3_CHECK(Has<uint>());
     return uint_value_;
   }
 
-  int64 Int64Value() const {
-    TC3_CHECK(HasInt64());
+  template <>
+  int64 Value() const {
+    TC3_CHECK(Has<int64>());
     return long_value_;
   }
 
-  uint64 UInt64Value() const {
-    TC3_CHECK(HasUInt64());
+  template <>
+  uint64 Value() const {
+    TC3_CHECK(Has<uint64>());
     return ulong_value_;
   }
 
-  float FloatValue() const {
-    TC3_CHECK(HasFloat());
+  template <>
+  float Value() const {
+    TC3_CHECK(Has<float>());
     return float_value_;
   }
 
-  double DoubleValue() const {
-    TC3_CHECK(HasDouble());
+  template <>
+  double Value() const {
+    TC3_CHECK(Has<double>());
     return double_value_;
   }
 
-  bool BoolValue() const {
-    TC3_CHECK(HasBool());
+  template <>
+  bool Value() const {
+    TC3_CHECK(Has<bool>());
     return bool_value_;
   }
 
-  const std::string& StringValue() const {
-    TC3_CHECK(HasString());
+  template <typename T>
+  const T& ConstRefValue() const;
+
+  template <>
+  const std::string& ConstRefValue() const {
+    TC3_CHECK(Has<std::string>());
     return string_value_;
   }
 
-  const std::vector<std::string>& StringVectorValue() const {
-    TC3_CHECK(HasStringVector());
+  template <>
+  const std::vector<std::string>& ConstRefValue() const {
+    TC3_CHECK(Has<std::vector<std::string>>());
     return string_vector_value_;
   }
 
-  const std::vector<float>& FloatVectorValue() const {
-    TC3_CHECK(HasFloatVector());
+  template <>
+  const std::vector<float>& ConstRefValue() const {
+    TC3_CHECK(Has<std::vector<float>>());
     return float_vector_value_;
   }
 
-  const std::vector<int>& IntVectorValue() const {
-    TC3_CHECK(HasIntVector());
+  template <>
+  const std::vector<int>& ConstRefValue() const {
+    TC3_CHECK(Has<std::vector<int>>());
     return int_vector_value_;
   }
 
-  const std::map<std::string, Variant>& StringVariantMapValue() const {
-    TC3_CHECK(HasStringVariantMap());
+  template <>
+  const std::map<std::string, Variant>& ConstRefValue() const {
+    TC3_CHECK((Has<std::map<std::string, Variant>>()));
     return string_variant_map_value_;
   }
 
+  template <typename T>
+  bool Has() const;
+
+  template <>
+  bool Has<int8>() const {
+    return type_ == TYPE_INT8_VALUE;
+  }
+
+  template <>
+  bool Has<uint8>() const {
+    return type_ == TYPE_UINT8_VALUE;
+  }
+
+  template <>
+  bool Has<int>() const {
+    return type_ == TYPE_INT_VALUE;
+  }
+
+  template <>
+  bool Has<uint>() const {
+    return type_ == TYPE_UINT_VALUE;
+  }
+
+  template <>
+  bool Has<int64>() const {
+    return type_ == TYPE_INT64_VALUE;
+  }
+
+  template <>
+  bool Has<uint64>() const {
+    return type_ == TYPE_UINT64_VALUE;
+  }
+
+  template <>
+  bool Has<float>() const {
+    return type_ == TYPE_FLOAT_VALUE;
+  }
+
+  template <>
+  bool Has<double>() const {
+    return type_ == TYPE_DOUBLE_VALUE;
+  }
+
+  template <>
+  bool Has<bool>() const {
+    return type_ == TYPE_BOOL_VALUE;
+  }
+
+  template <>
+  bool Has<std::string>() const {
+    return type_ == TYPE_STRING_VALUE;
+  }
+
+  template <>
+  bool Has<std::vector<std::string>>() const {
+    return type_ == TYPE_STRING_VECTOR_VALUE;
+  }
+
+  template <>
+  bool Has<std::vector<float>>() const {
+    return type_ == TYPE_FLOAT_VECTOR_VALUE;
+  }
+
+  template <>
+  bool Has<std::vector<int>>() const {
+    return type_ == TYPE_INT_VECTOR_VALUE;
+  }
+
+  template <>
+  bool Has<std::map<std::string, Variant>>() const {
+    return type_ == TYPE_STRING_VARIANT_MAP_VALUE;
+  }
+
   // Converts the value of this variant to its string representation, regardless
   // of the type of the actual value.
   std::string ToString() const;
 
-  bool HasInt8() const { return type_ == TYPE_INT8_VALUE; }
-
-  bool HasUInt8() const { return type_ == TYPE_UINT8_VALUE; }
-
-  bool HasInt() const { return type_ == TYPE_INT_VALUE; }
-
-  bool HasUInt() const { return type_ == TYPE_UINT_VALUE; }
-
-  bool HasInt64() const { return type_ == TYPE_INT64_VALUE; }
-
-  bool HasUInt64() const { return type_ == TYPE_UINT64_VALUE; }
-
-  bool HasFloat() const { return type_ == TYPE_FLOAT_VALUE; }
-
-  bool HasDouble() const { return type_ == TYPE_DOUBLE_VALUE; }
-
-  bool HasBool() const { return type_ == TYPE_BOOL_VALUE; }
-
-  bool HasString() const { return type_ == TYPE_STRING_VALUE; }
-
-  bool HasStringVector() const { return type_ == TYPE_STRING_VECTOR_VALUE; }
-
-  bool HasFloatVector() const { return type_ == TYPE_FLOAT_VECTOR_VALUE; }
-
-  bool HasIntVector() const { return type_ == TYPE_INT_VECTOR_VALUE; }
-
-  bool HasStringVariantMap() const {
-    return type_ == TYPE_STRING_VARIANT_MAP_VALUE;
-  }
-
   Type GetType() const { return type_; }
 
   bool HasValue() const { return type_ != TYPE_EMPTY; }

diff --git a/native/utils/variant_test.cc b/native/utils/variant_test.cc
new file mode 100644
index 0000000..cf0acfb
--- /dev/null
+++ b/native/utils/variant_test.cc

@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/variant.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(VariantTest, GetType) {
+  EXPECT_EQ(Variant().GetType(), Variant::TYPE_EMPTY);
+  EXPECT_EQ(Variant(static_cast<int8_t>(9)).GetType(),
+            Variant::TYPE_INT8_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint8_t>(9)).GetType(),
+            Variant::TYPE_UINT8_VALUE);
+  EXPECT_EQ(Variant(static_cast<int>(9)).GetType(), Variant::TYPE_INT_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint>(9)).GetType(), Variant::TYPE_UINT_VALUE);
+  EXPECT_EQ(Variant(static_cast<int64>(9)).GetType(),
+            Variant::TYPE_INT64_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint64>(9)).GetType(),
+            Variant::TYPE_UINT64_VALUE);
+  EXPECT_EQ(Variant(static_cast<float>(9)).GetType(),
+            Variant::TYPE_FLOAT_VALUE);
+  EXPECT_EQ(Variant(static_cast<double>(9)).GetType(),
+            Variant::TYPE_DOUBLE_VALUE);
+  EXPECT_EQ(Variant(true).GetType(), Variant::TYPE_BOOL_VALUE);
+  EXPECT_EQ(Variant("hello").GetType(), Variant::TYPE_STRING_VALUE);
+}
+
+TEST(VariantTest, HasValue) {
+  EXPECT_FALSE(Variant().HasValue());
+  EXPECT_TRUE(Variant(static_cast<int8_t>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint8_t>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<int>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<int64>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint64>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<float>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<double>(9)).HasValue());
+  EXPECT_TRUE(Variant(true).HasValue());
+  EXPECT_TRUE(Variant("hello").HasValue());
+}
+
+TEST(VariantTest, Value) {
+  EXPECT_EQ(Variant(static_cast<int8_t>(9)).Value<int8>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint8_t>(9)).Value<uint8>(), 9);
+  EXPECT_EQ(Variant(static_cast<int>(9)).Value<int>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint>(9)).Value<uint>(), 9);
+  EXPECT_EQ(Variant(static_cast<int64>(9)).Value<int64>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint64>(9)).Value<uint64>(), 9);
+  EXPECT_EQ(Variant(static_cast<float>(9)).Value<float>(), 9);
+  EXPECT_EQ(Variant(static_cast<double>(9)).Value<double>(), 9);
+  EXPECT_EQ(Variant(true).Value<bool>(), true);
+  EXPECT_EQ(Variant("hello").ConstRefValue<std::string>(), "hello");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/notification/res/values-es-rUS/strings.xml b/notification/res/values-b+es+419/strings.xml
similarity index 100%
rename from notification/res/values-es-rUS/strings.xml
rename to notification/res/values-b+es+419/strings.xml


diff --git a/notification/res/values-b+sr+Latn/strings.xml b/notification/res/values-b+sr+Latn/strings.xml
new file mode 100755
index 0000000..480ef86
--- /dev/null
+++ b/notification/res/values-b+sr+Latn/strings.xml

@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="utf-8"?>
+<resources xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2">
+  <string name="tc_notif_copy_code_desc">Kopiraj „%1$s“</string>
+  <string name="tc_notif_code_copied_to_clipboard">Kôd je kopiran</string>
+</resources>

diff --git a/notification/res/values-in/strings.xml b/notification/res/values-id/strings.xml
similarity index 100%
rename from notification/res/values-in/strings.xml
rename to notification/res/values-id/strings.xml


diff --git a/notification/res/values-nb/strings.xml b/notification/res/values-no/strings.xml
similarity index 100%
rename from notification/res/values-nb/strings.xml
rename to notification/res/values-no/strings.xml


diff --git a/notification/res/values-zh-rCN/strings.xml b/notification/res/values-zh/strings.xml
similarity index 100%
rename from notification/res/values-zh-rCN/strings.xml
rename to notification/res/values-zh/strings.xml


diff --git a/notification/src/com/android/textclassifier/notification/SmartSuggestionsHelper.java b/notification/src/com/android/textclassifier/notification/SmartSuggestionsHelper.java
index fab0dd1..0a2cce7 100644
--- a/notification/src/com/android/textclassifier/notification/SmartSuggestionsHelper.java
+++ b/notification/src/com/android/textclassifier/notification/SmartSuggestionsHelper.java

@@ -35,9 +35,12 @@
 import android.util.Pair;
 import android.view.textclassifier.ConversationAction;
 import android.view.textclassifier.ConversationActions;
+import android.view.textclassifier.TextClassification;
 import android.view.textclassifier.TextClassificationContext;
 import android.view.textclassifier.TextClassificationManager;
 import android.view.textclassifier.TextClassifier;
+
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import java.time.Instant;
@@ -48,6 +51,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 import javax.annotation.Nullable;
 
 /**
@@ -76,14 +80,24 @@
   private static final int MAX_RESULT_ID_TO_CACHE = 20;
   private static final ImmutableList<String> HINTS =
       ImmutableList.of(ConversationActions.Request.HINT_FOR_NOTIFICATION);
-  private static final ConversationActions EMPTY_CONVERSATION_ACTIONS =
-      new ConversationActions(ImmutableList.of(), null);
+  private static final SuggestConversationActionsResult EMPTY_SUGGEST_CONVERSATION_ACTION_RESULT =
+      new SuggestConversationActionsResult(
+          Optional.empty(), new ConversationActions(ImmutableList.of(), /* id= */ null));
 
   private final Context context;
   private final TextClassificationManager textClassificationManager;
   private final SmartSuggestionsConfig config;
   private final LruCache<String, SmartSuggestionsLogSession> sessionCache =
-      new LruCache<>(MAX_RESULT_ID_TO_CACHE);
+      new LruCache<String, SmartSuggestionsLogSession>(MAX_RESULT_ID_TO_CACHE) {
+        @Override
+        protected void entryRemoved(
+            boolean evicted,
+            String key,
+            SmartSuggestionsLogSession oldSession,
+            SmartSuggestionsLogSession newSession) {
+          oldSession.destroy();
+        }
+      };
   private final TextClassificationContext textClassificationContext;
 
   public SmartSuggestionsHelper(Context context, SmartSuggestionsConfig config) {
@@ -103,26 +117,20 @@
   public SmartSuggestions onNotificationEnqueued(StatusBarNotification statusBarNotification) {
     // Whenever onNotificationEnqueued() is called again on the same notification key, its
     // previous session is ended.
-    removeAndDestroySession(statusBarNotification.getKey());
+    sessionCache.remove(statusBarNotification.getKey());
 
     boolean eligibleForReplyAdjustment =
         config.shouldGenerateReplies() && isEligibleForReplyAdjustment(statusBarNotification);
     boolean eligibleForActionAdjustment =
         config.shouldGenerateActions() && isEligibleForActionAdjustment(statusBarNotification);
 
-    TextClassifier textClassifier =
-        textClassificationManager.createTextClassificationSession(textClassificationContext);
-
-    ConversationActions conversationActionsResult =
+    SuggestConversationActionsResult suggestConversationActionsResult =
         suggestConversationActions(
-            textClassifier,
-            statusBarNotification,
-            eligibleForReplyAdjustment,
-            eligibleForActionAdjustment);
+            statusBarNotification, eligibleForReplyAdjustment, eligibleForActionAdjustment);
 
-    String resultId = conversationActionsResult.getId();
+    String resultId = suggestConversationActionsResult.conversationActions.getId();
     List<ConversationAction> conversationActions =
-        conversationActionsResult.getConversationActions();
+        suggestConversationActionsResult.conversationActions.getConversationActions();
 
     ArrayList<CharSequence> replies = new ArrayList<>();
     Map<CharSequence, Float> repliesScore = new ArrayMap<>();
@@ -156,33 +164,34 @@
       }
     }
 
-    if (!TextUtils.isEmpty(resultId)) {
-      SmartSuggestionsLogSession session =
-          new SmartSuggestionsLogSession(
-              resultId, repliesScore, textClassifier, textClassificationContext);
-      session.onSuggestionsGenerated(conversationActions);
+    suggestConversationActionsResult.textClassifier.ifPresent(
+        textClassifier -> {
+          if (TextUtils.isEmpty(resultId)) {
+            // Missing the result id, skip logging.
+            textClassifier.destroy();
+          } else {
+            SmartSuggestionsLogSession session =
+                new SmartSuggestionsLogSession(
+                    resultId,
+                    repliesScore,
+                    textClassifier,
+                    textClassificationContext);
+            session.onSuggestionsGenerated(conversationActions);
 
-      // Store the session if we expect more logging from it, destroy it otherwise.
-      if (!conversationActions.isEmpty()
-          && suggestionsMightBeUsedInNotification(
-              statusBarNotification, !actions.isEmpty(), !replies.isEmpty())) {
-        sessionCache.put(statusBarNotification.getKey(), session);
-      } else {
-        session.destroy();
-      }
-    }
+            // Store the session if we expect more logging from it, destroy it otherwise.
+            if (!conversationActions.isEmpty()
+                && suggestionsMightBeUsedInNotification(
+                    statusBarNotification, !actions.isEmpty(), !replies.isEmpty())) {
+              sessionCache.put(statusBarNotification.getKey(), session);
+            } else {
+              session.destroy();
+            }
+          }
+        });
 
     return new SmartSuggestions(replies, actions);
   }
 
-  private void removeAndDestroySession(String notificationKey) {
-    SmartSuggestionsLogSession session = sessionCache.get(notificationKey);
-    if (session != null) {
-      session.destroy();
-    }
-    sessionCache.remove(notificationKey);
-  }
-
   /**
    * Creates notification action from ConversationAction that does not come up a RemoteAction. It
    * could happen because we don't have common intents for some actions, like copying text.
@@ -258,23 +267,20 @@
   }
 
   /** Adds action adjustments based on the notification contents. */
-  private ConversationActions suggestConversationActions(
-      TextClassifier textClassifier,
-      StatusBarNotification statusBarNotification,
-      boolean includeReplies,
-      boolean includeActions) {
+  private SuggestConversationActionsResult suggestConversationActions(
+      StatusBarNotification statusBarNotification, boolean includeReplies, boolean includeActions) {
     if (!includeReplies && !includeActions) {
-      return EMPTY_CONVERSATION_ACTIONS;
+      return EMPTY_SUGGEST_CONVERSATION_ACTION_RESULT;
     }
     ImmutableList<ConversationActions.Message> messages =
         extractMessages(statusBarNotification.getNotification());
     if (messages.isEmpty()) {
-      return EMPTY_CONVERSATION_ACTIONS;
+      return EMPTY_SUGGEST_CONVERSATION_ACTION_RESULT;
     }
     // Do not generate smart actions if the last message is from the local user.
     ConversationActions.Message lastMessage = Iterables.getLast(messages);
     if (arePersonsEqual(ConversationActions.Message.PERSON_USER_SELF, lastMessage.getAuthor())) {
-      return EMPTY_CONVERSATION_ACTIONS;
+      return EMPTY_SUGGEST_CONVERSATION_ACTION_RESULT;
     }
 
     TextClassifier.EntityConfig.Builder typeConfigBuilder =
@@ -298,7 +304,9 @@
             .setTypeConfig(typeConfigBuilder.build())
             .build();
 
-    return textClassifier.suggestConversationActions(request);
+    TextClassifier textClassifier = createTextClassificationSession();
+    return new SuggestConversationActionsResult(
+        Optional.of(textClassifier), textClassifier.suggestConversationActions(request));
   }
 
   /**
@@ -462,9 +470,30 @@
     return ImmutableList.copyOf(new ArrayList<>(extractMessages));
   }
 
+  @VisibleForTesting
+  TextClassifier createTextClassificationSession() {
+    return textClassificationManager.createTextClassificationSession(textClassificationContext);
+  }
+
   private static boolean arePersonsEqual(Person left, Person right) {
     return Objects.equals(left.getKey(), right.getKey())
         && TextUtils.equals(left.getName(), right.getName())
         && Objects.equals(left.getUri(), right.getUri());
   }
+
+  /**
+   * Result object of {@link #suggestConversationActions(StatusBarNotification, boolean, boolean)}.
+   */
+  private static class SuggestConversationActionsResult {
+    /** The text classifier session that was involved to make suggestions, if any. */
+    final Optional<TextClassifier> textClassifier;
+    /** The resultant suggestions. */
+    final ConversationActions conversationActions;
+
+    SuggestConversationActionsResult(
+        Optional<TextClassifier> textClassifier, ConversationActions conversationActions) {
+      this.textClassifier = textClassifier;
+      this.conversationActions = conversationActions;
+    }
+  }
 }

diff --git a/notification/tests/AndroidTest.xml b/notification/tests/AndroidTest.xml
new file mode 100644
index 0000000..1890e75
--- /dev/null
+++ b/notification/tests/AndroidTest.xml

@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- Copyright (C) 2020 The Android Open Source Project
+
+     Licensed under the Apache License, Version 2.0 (the "License");
+     you may not use this file except in compliance with the License.
+     You may obtain a copy of the License at
+
+          http://www.apache.org/licenses/LICENSE-2.0
+
+     Unless required by applicable law or agreed to in writing, software
+     distributed under the License is distributed on an "AS IS" BASIS,
+     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+     See the License for the specific language governing permissions and
+     limitations under the License.
+-->
+<!-- This test config file is auto-generated. -->
+<configuration description="Runs TextClassifierNotificationTests.">
+    <option name="test-suite-tag" value="apct" />
+    <option name="test-suite-tag" value="apct-instrumentation" />
+    <target_preparer class="com.android.tradefed.targetprep.suite.SuiteApkInstaller">
+        <option name="cleanup-apks" value="true" />
+        <option name="test-file-name" value="TextClassifierNotificationTests.apk" />
+    </target_preparer>
+
+    <test class="com.android.tradefed.testtype.AndroidJUnitTest" >
+        <option name="package" value="com.android.textclassifier.notification" />
+        <option name="runner" value="androidx.test.runner.AndroidJUnitRunner" />
+    </test>
+
+    <object type="module_controller" class="com.android.tradefed.testtype.suite.module.MainlineTestModuleController">
+        <option name="mainline-module-package-name" value="com.google.android.extservices" />
+    </object>
+</configuration>

diff --git a/notification/tests/src/com/android/textclassifier/notification/CopyCodeActivityTest.java b/notification/tests/src/com/android/textclassifier/notification/CopyCodeActivityTest.java
index 0682ff0..966fbe0 100644
--- a/notification/tests/src/com/android/textclassifier/notification/CopyCodeActivityTest.java
+++ b/notification/tests/src/com/android/textclassifier/notification/CopyCodeActivityTest.java

@@ -47,12 +47,14 @@
 
   @Test
   public void onCreate_emptyCode() throws Exception {
-    activityRule.launchActivity(EMPTY_INTENT);
-
     ClipboardManager clipboardManager =
         ApplicationProvider.getApplicationContext().getSystemService(ClipboardManager.class);
     // Use shell's permissions to ensure we can access the clipboard
     InstrumentationRegistry.getInstrumentation().getUiAutomation().adoptShellPermissionIdentity();
+    clipboardManager.clearPrimaryClip();
+
+    activityRule.launchActivity(EMPTY_INTENT);
+
     try {
       assertThat(clipboardManager.hasPrimaryClip()).isFalse();
     } finally {
@@ -62,12 +64,14 @@
 
   @Test
   public void onCreate_codeCopied() throws Exception {
-    activityRule.launchActivity(CODE_INTENT);
-
     ClipboardManager clipboardManager =
         ApplicationProvider.getApplicationContext().getSystemService(ClipboardManager.class);
     // Use shell's permissions to ensure we can access the clipboard
     InstrumentationRegistry.getInstrumentation().getUiAutomation().adoptShellPermissionIdentity();
+    clipboardManager.clearPrimaryClip();
+
+    activityRule.launchActivity(CODE_INTENT);
+
     ClipData clipFromClipboard;
     try {
       assertThat(clipboardManager.hasPrimaryClip()).isTrue();

diff --git a/notification/tests/src/com/android/textclassifier/notification/SmartSuggestionsHelperTest.java b/notification/tests/src/com/android/textclassifier/notification/SmartSuggestionsHelperTest.java
index bc10cc0..9d0a720 100644
--- a/notification/tests/src/com/android/textclassifier/notification/SmartSuggestionsHelperTest.java
+++ b/notification/tests/src/com/android/textclassifier/notification/SmartSuggestionsHelperTest.java

@@ -47,6 +47,7 @@
 import java.util.Collection;
 import java.util.List;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 
@@ -64,22 +65,36 @@
   private final Context context = ApplicationProvider.getApplicationContext();
   private final FakeTextClassifier fakeTextClassifier = new FakeTextClassifier();
   private final TestConfig config = new TestConfig();
-  private SmartSuggestionsHelper smartActions;
+  private TestableSmartSuggestionsHelper smartActions;
   private Notification.Builder notificationBuilder;
 
   @Before
   public void setup() {
     TextClassificationManager textClassificationManager =
         context.getSystemService(TextClassificationManager.class);
-    // Workaround b/144163980.
-    // TODO(tonymak) Remove this workaround once the latest emulator image is dropped.
-    textClassificationManager.setTextClassificationSessionFactory(
-        classificationContext -> fakeTextClassifier);
     textClassificationManager.setTextClassifier(fakeTextClassifier);
-    smartActions = new SmartSuggestionsHelper(context, config);
+    smartActions = new TestableSmartSuggestionsHelper(context, config);
     notificationBuilder = new Notification.Builder(context, "id");
   }
 
+  static class TestableSmartSuggestionsHelper extends SmartSuggestionsHelper {
+    private int numOfSessionsCreated = 0;
+
+    TestableSmartSuggestionsHelper(Context context, SmartSuggestionsConfig config) {
+      super(context, config);
+    }
+
+    @Override
+    TextClassifier createTextClassificationSession() {
+      numOfSessionsCreated += 1;
+      return super.createTextClassificationSession();
+    }
+
+    int getNumOfSessionsCreated() {
+      return numOfSessionsCreated;
+    }
+  }
+
   @Test
   public void onNotificationEnqueued_notMessageCategory() {
     Notification notification = notificationBuilder.setContentText(MESSAGE).build();
@@ -90,6 +105,8 @@
 
     assertThat(smartSuggestions.getReplies()).isEmpty();
     assertThat(smartSuggestions.getActions()).isEmpty();
+    // Ideally, we should verify that createTextClassificationSession
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(0);
   }
 
   @Test
@@ -107,6 +124,7 @@
 
     assertThat(smartSuggestions.getReplies()).isEmpty();
     assertThat(smartSuggestions.getActions()).isEmpty();
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(0);
   }
 
   @Test
@@ -123,6 +141,7 @@
 
     assertThat(smartSuggestions.getReplies()).isEmpty();
     assertAdjustmentWithSmartAction(smartSuggestions);
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(1);
   }
 
   @Test
@@ -139,6 +158,7 @@
     List<Message> messages = request.getConversation();
     assertThat(messages).hasSize(1);
     assertThat(messages.get(0).getText().toString()).isEqualTo(MESSAGE);
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(1);
   }
 
   @Test
@@ -172,6 +192,7 @@
     assertMessage(messages.get(1), "secondMessage", PERSON_USER_SELF, 2000);
     assertMessage(messages.get(2), "thirdMessage", userA, 3000);
     assertMessage(messages.get(3), "fourthMessage", userB, 4000);
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(1);
   }
 
   @Test
@@ -195,6 +216,7 @@
 
     assertThat(smartSuggestions.getReplies()).isEmpty();
     assertThat(smartSuggestions.getActions()).isEmpty();
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(0);
   }
 
   @Test
@@ -215,6 +237,7 @@
 
     assertThat(smartSuggestions.getReplies()).isEmpty();
     assertThat(smartSuggestions.getActions()).isEmpty();
+    assertThat(smartActions.getNumOfSessionsCreated()).isEqualTo(0);
   }
 
   @Test
@@ -345,6 +368,25 @@
     assertThat(smartSuggestions.getActions().get(0).title.toString()).isEqualTo("12345");
   }
 
+  @Ignore // Disabled because it is way too slow to run on an emulator.
+  @Test
+  public void noBinderLeakage() {
+    // Use the real text classifier from system.
+    TextClassificationManager textClassificationManager =
+        context.getSystemService(TextClassificationManager.class);
+    textClassificationManager.setTextClassifier(null);
+
+    // System server crashes when there are more than 20,000 leaked binder proxy.
+    // See
+    // http://cs/android/frameworks/base/core/java/android/os/BinderProxy.java?l=73&rcl=ae52315c8c7d0391bd3c7bca0525a98eeb4cd840.
+    for (int i = 0; i < 20000; i++) {
+      Notification notification = createMessageCategoryNotification();
+      StatusBarNotification statusBarNotification =
+          createStatusBarNotification(notification, PACKAGE_NAME);
+      smartActions.onNotificationEnqueued(statusBarNotification);
+    }
+  }
+
   private Notification createMessageCategoryNotification() {
     return notificationBuilder
         .setContentText(MESSAGE)
commit	4fe16ac05cfbce74aa726061d6b1b578b14ef0a6	[log] [tgz]
author	Tony Mak <tonymak@google.com>	Tue Aug 25 01:06:14 2020 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	Tue Aug 25 01:06:14 2020 +0000
tree	d1a8518de75e5c9244b19ebc0b8ea8af456bddcf
parent	282003139f2535ac1fa7d68abeaeeeff0aafb774 [diff]
parent	8feb63bdafaec145dab3f38c3418491e3bb7a3c7 [diff]