native/annotator/number/number_test-include.cc - platform/external/libtextclassifier - Git at Google

 /*
  * Copyright (C) 2018 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "annotator/number/number_test-include.h"

 #include <set>
 #include <string>
 #include <vector>

 #include "annotator/collections.h"
 #include "annotator/model_generated.h"
 #include "annotator/types-test-util.h"
 #include "annotator/types.h"
 #include "utils/tokenizer-utils.h"
 #include "utils/utf8/unicodetext.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"

 namespace libtextclassifier3 {
 namespace test_internal {

 using ::testing::AllOf;
 using ::testing::ElementsAre;
 using ::testing::Field;
 using ::testing::IsEmpty;
 using ::testing::Matcher;
 using ::testing::UnorderedElementsAre;

 namespace {
 const flatbuffers::DetachedBuffer* CreateOptionsData(ModeFlag enabled_modes) {
   NumberAnnotatorOptionsT options;
   options.enabled = true;
   options.priority_score = -10.0;
   options.float_number_priority_score = 1.0;
   options.enabled_annotation_usecases =
       1 << AnnotationUsecase_ANNOTATION_USECASE_RAW;
   options.max_number_of_digits = 20;
   options.enabled_modes = enabled_modes;

   options.percentage_priority_score = 1.0;
   options.percentage_annotation_usecases =
       (1 << AnnotationUsecase_ANNOTATION_USECASE_RAW) +
       (1 << AnnotationUsecase_ANNOTATION_USECASE_SMART);
   std::set<std::string> percent_suffixes(
       {"パーセント", "percent", "pércént", "pc", "pct", "%", "٪", "﹪", "％"});
   for (const std::string& string_value : percent_suffixes) {
     options.percentage_pieces_string.append(string_value);
     options.percentage_pieces_string.push_back('\0');
   }

   flatbuffers::FlatBufferBuilder builder;
   builder.Finish(NumberAnnotatorOptions::Pack(builder, &options));
   return new flatbuffers::DetachedBuffer(builder.Release());
 }
 }  // namespace

 const NumberAnnotatorOptions*
 NumberAnnotatorTest::TestingNumberAnnotatorOptions(ModeFlag enabled_modes) {
   static const flatbuffers::DetachedBuffer* options_data_selection =
       CreateOptionsData(ModeFlag_SELECTION);
   static const flatbuffers::DetachedBuffer* options_data_no_selection =
       CreateOptionsData(ModeFlag_ANNOTATION_AND_CLASSIFICATION);
   static const flatbuffers::DetachedBuffer* options_data_all =
       CreateOptionsData(ModeFlag_ALL);

   if (enabled_modes == ModeFlag_SELECTION) {
     return flatbuffers::GetRoot<NumberAnnotatorOptions>(
         options_data_selection->data());
   } else if (enabled_modes == ModeFlag_ANNOTATION_AND_CLASSIFICATION) {
     return flatbuffers::GetRoot<NumberAnnotatorOptions>(
         options_data_no_selection->data());
   } else {
     return flatbuffers::GetRoot<NumberAnnotatorOptions>(
         options_data_all->data());
   }
 }

 MATCHER_P(IsCorrectCollection, collection, "collection is " + collection) {
   return arg.collection == collection;
 }

 MATCHER_P(IsCorrectNumericValue, numeric_value,
           "numeric value is " + std::to_string(numeric_value)) {
   return arg.numeric_value == numeric_value;
 }

 MATCHER_P(IsCorrectNumericDoubleValue, numeric_double_value,
           "numeric double value is " + std::to_string(numeric_double_value)) {
   return arg.numeric_double_value == numeric_double_value;
 }

 MATCHER_P(IsCorrectScore, score, "score is " + std::to_string(score)) {
   return arg.score == score;
 }

 MATCHER_P(IsCorrectPriortyScore, priority_score,
           "priority score is " + std::to_string(priority_score)) {
   return arg.priority_score == priority_score;
 }

 MATCHER_P(IsCorrectSpan, span,
           "span is (" + std::to_string(span.first) + "," +
               std::to_string(span.second) + ")") {
   return arg.span == span;
 }

 MATCHER_P(Classification, inner, "") {
   return testing::ExplainMatchResult(inner, arg.classification,
                                      result_listener);
 }

 static Matcher<AnnotatedSpan> IsAnnotatedSpan(
     const CodepointSpan& codepoint_span, const std::string& collection,
     const int int_value, const double double_value,
     const float priority_score = -10, const float score = 1) {
   return AllOf(
       IsCorrectSpan(codepoint_span),
       Classification(ElementsAre(AllOf(
           IsCorrectCollection(collection), IsCorrectNumericValue(int_value),
           IsCorrectNumericDoubleValue(double_value), IsCorrectScore(score),
           IsCorrectPriortyScore(priority_score)))));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberCorrectly) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345 ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345);
 }

 TEST_F(NumberAnnotatorForSelectionTest,
        ClassifyTextDisabledClassificationReturnsFalse) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345 ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesAndParsesNumberAsFloatCorrectly) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345.12345 ..."), {4, 15},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345.12345);
 }

 TEST_F(NumberAnnotatorTest,
        ClassifiesAndParsesNumberAsFloatCorrectlyWithoutDecimals) {
   ClassificationResult classification_result;
   // The dot after a number is considered punctuation, not part of a floating
   // number.
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345. ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345. ..."), {4, 10},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345);

   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345. ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_FLOAT_EQ(classification_result.numeric_double_value, 12345);
 }

 TEST_F(NumberAnnotatorTest, FindsAllIntegerAndFloatNumbersInText) {
   std::vector<AnnotatedSpan> result;
   // In the context "68.9#" -> 68.9 is a number because # is punctuation.
   // In the context "68.9#?" -> 68.9 is not a number because is followed by two
   // punctuation signs.
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("how much is 2 plus 5 divided by 7% minus 3.14 "
                         "what about 68.9# or 68.9#?"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(12, 13), "number",
                                   /*int_value=*/2, /*double_value=*/2.0),
                   IsAnnotatedSpan(CodepointSpan(19, 20), "number",
                                   /*int_value=*/5, /*double_value=*/5.0),
                   IsAnnotatedSpan(CodepointSpan(32, 33), "number",
                                   /*int_value=*/7, /*double_value=*/7.0),
                   IsAnnotatedSpan(CodepointSpan(32, 34), "percentage",
                                   /*int_value=*/7, /*double_value=*/7.0,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(41, 45), "number",
                                   /*int_value=*/3, /*double_value=*/3.14,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(57, 61), "number",
                                   /*int_value=*/68, /*double_value=*/68.9,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesNonNumberCorrectly) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 123a45 ..."), {4, 10},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345..12345 ..."), {4, 16},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345a ..."), {4, 11},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesNumberSelectionCorrectly) {
   ClassificationResult classification_result;
   // Punctuation after a number is not part of the number.
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 14, ..."), {4, 6},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 14);
   EXPECT_EQ(classification_result.numeric_double_value, 14);

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 14, ..."), {4, 7},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesPercentageSignCorrectly) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 99% ..."), {4, 7},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 99);
   EXPECT_EQ(classification_result.numeric_double_value, 99);
 }

 TEST_F(NumberAnnotatorTest, ClassifiesPercentageWordCorrectly) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 15 percent ..."), {4, 14},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 15);
   EXPECT_EQ(classification_result.numeric_double_value, 15);
 }

 TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiPercentageIncorrectSuffix) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("15 café"), {0, 7},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiFrPercentageCorrectSuffix) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("25 pércént"), {0, 10},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 25);
   EXPECT_EQ(classification_result.numeric_double_value, 25);
 }

 TEST_F(NumberAnnotatorTest, ClassifiesNonAsciiJaPercentageCorrectSuffix) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("10パーセント"), {0, 7},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 10);
   EXPECT_EQ(classification_result.numeric_double_value, 10);

   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("明日の降水確率は10パーセント  音量を12にセット"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_CLASSIFICATION,
       &result));
   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(8, 10), "number",
                                   /*int_value=*/10, /*double_value=*/10.0),
                   IsAnnotatedSpan(CodepointSpan(8, 15), "percentage",
                                   /*int_value=*/10, /*double_value=*/10.0,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(20, 22), "number",
                                   /*int_value=*/12, /*double_value=*/12.0)));
 }

 TEST_F(NumberAnnotatorTest, FindsAllNumbersInText) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... 12345 ... 9 is my number and 27% or 68# #38 ＃39 "
                         "but not $99."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(
       result,
       UnorderedElementsAre(
           IsAnnotatedSpan(CodepointSpan(4, 9), "number",
                           /*int_value=*/12345, /*double_value=*/12345.0),
           IsAnnotatedSpan(CodepointSpan(14, 15), "number",
                           /*int_value=*/9, /*double_value=*/9.0),
           IsAnnotatedSpan(CodepointSpan(33, 35), "number",
                           /*int_value=*/27, /*double_value=*/27.0),
           IsAnnotatedSpan(CodepointSpan(33, 36), "percentage",
                           /*int_value=*/27, /*double_value=*/27.0,
                           /*priority_score=*/1),
           IsAnnotatedSpan(CodepointSpan(40, 42), "number",
                           /*int_value=*/68, /*double_value=*/68.0),
           IsAnnotatedSpan(CodepointSpan(45, 47), "number",
                           /*int_value=*/38, /*double_value=*/38.0),
           IsAnnotatedSpan(CodepointSpan(49, 51), "number",
                           /*int_value=*/39, /*double_value=*/39.0)));
 }

 TEST_F(NumberAnnotatorForAnnotationAndClassificationTest,
        FindsAllDisabledModeReturnsNoResults) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... 12345 ... 9 is my number and 27% or 68# #38 ＃39 "
                         "but not $99."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));

   EXPECT_THAT(result, IsEmpty());
 }

 TEST_F(NumberAnnotatorTest, FindsNoNumberInText) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... 12345a ... 12345..12345 and 123a45 are not valid. "
                         "And -#5% is also bad."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));
   ASSERT_EQ(result.size(), 0);
 }

 TEST_F(NumberAnnotatorTest, FindsNumberWithPunctuation) {
   std::vector<AnnotatedSpan> result;
   // A number should be followed by only one punctuation signs => 15 is not a
   // number.
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText(
           "It's 12, 13, 14! Or 15??? For sure 16: 17; 18. and －19"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_CLASSIFICATION,
       &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(5, 7), "number",
                                   /*int_value=*/12, /*double_value=*/12.0),
                   IsAnnotatedSpan(CodepointSpan(9, 11), "number",
                                   /*int_value=*/13, /*double_value=*/13.0),
                   IsAnnotatedSpan(CodepointSpan(13, 15), "number",
                                   /*int_value=*/14, /*double_value=*/14.0),
                   IsAnnotatedSpan(CodepointSpan(35, 37), "number",
                                   /*int_value=*/16, /*double_value=*/16.0),
                   IsAnnotatedSpan(CodepointSpan(39, 41), "number",
                                   /*int_value=*/17, /*double_value=*/17.0),
                   IsAnnotatedSpan(CodepointSpan(43, 45), "number",
                                   /*int_value=*/18, /*double_value=*/18.0),
                   IsAnnotatedSpan(CodepointSpan(51, 54), "number",
                                   /*int_value=*/-19, /*double_value=*/-19.0)));
 }

 TEST_F(NumberAnnotatorTest, FindsFloatNumberWithPunctuation) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("It's 12.123, 13.45, 14.54321! Or 15.1? Maybe 16.33: "
                         "17.21; but for sure 18.90."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(5, 11), "number",
                                   /*int_value=*/12, /*double_value=*/12.123,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(13, 18), "number",
                                   /*int_value=*/13, /*double_value=*/13.45,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(20, 28), "number",
                                   /*int_value=*/14, /*double_value=*/14.54321,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(33, 37), "number",
                                   /*int_value=*/15, /*double_value=*/15.1,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(45, 50), "number",
                                   /*int_value=*/16, /*double_value=*/16.33,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(52, 57), "number",
                                   /*int_value=*/17, /*double_value=*/17.21,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(72, 77), "number",
                                   /*int_value=*/18, /*double_value=*/18.9,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, HandlesNumbersAtBeginning) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("-5"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_SELECTION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan(
                           CodepointSpan(0, 2), "number",
                           /*int_value=*/-5, /*double_value=*/-5)));
 }

 TEST_F(NumberAnnotatorTest, HandlesNegativeNumbers) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Number -5 and -5% and not number --5%"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(7, 9), "number",
                                   /*int_value=*/-5, /*double_value=*/-5),
                   IsAnnotatedSpan(CodepointSpan(14, 16), "number",
                                   /*int_value=*/-5, /*double_value=*/-5),
                   IsAnnotatedSpan(CodepointSpan(14, 17), "percentage",
                                   /*int_value=*/-5, /*double_value=*/-5,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, FindGoodPercentageContexts) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText(
           "5 percent, 10 pct, 25 pc and 17%, -5 percent, 10％ are percentages"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_SELECTION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(0, 1), "number",
                                   /*int_value=*/5, /*double_value=*/5),
                   IsAnnotatedSpan(CodepointSpan(0, 9), "percentage",
                                   /*int_value=*/5, /*double_value=*/5,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(11, 13), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(11, 17), "percentage",
                                   /*int_value=*/10, /*double_value=*/10,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(19, 21), "number",
                                   /*int_value=*/25, /*double_value=*/25),
                   IsAnnotatedSpan(CodepointSpan(19, 24), "percentage",
                                   /*int_value=*/25, /*double_value=*/25,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(29, 31), "number",
                                   /*int_value=*/17, /*double_value=*/17),
                   IsAnnotatedSpan(CodepointSpan(29, 32), "percentage",
                                   /*int_value=*/17, /*double_value=*/17,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(34, 36), "number",
                                   /*int_value=*/-5, /*double_value=*/-5),
                   IsAnnotatedSpan(CodepointSpan(34, 44), "percentage",
                                   /*int_value=*/-5, /*double_value=*/-5,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(46, 48), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(46, 49), "percentage",
                                   /*int_value=*/10, /*double_value=*/10,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, FindSinglePercentageInContext) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("5%"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(
                           IsAnnotatedSpan(CodepointSpan(0, 1), "number",
                                           /*int_value=*/5, /*double_value=*/5),
                           IsAnnotatedSpan(CodepointSpan(0, 2), "percentage",
                                           /*int_value=*/5, /*double_value=*/5,
                                           /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, IgnoreBadPercentageContexts) {
   std::vector<AnnotatedSpan> result;
   // A valid number is followed by only one punctuation element.
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("10, pct, 25 prc, 5#: percentage are not percentages"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(0, 2), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(9, 11), "number",
                                   /*int_value=*/25, /*double_value=*/25)));
 }

 TEST_F(NumberAnnotatorTest, IgnoreBadPercentagePunctuationContexts) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText(
           "#!24% or :?33 percent are not valid percentages, nor numbers."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_TRUE(result.empty());
 }

 TEST_F(NumberAnnotatorTest, FindPercentageInNonAsciiContext) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText(
           "At the café 10% or 25 percent of people are nice. Only 10%!"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(12, 14), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(12, 15), "percentage",
                                   /*int_value=*/10, /*double_value=*/10,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(19, 21), "number",
                                   /*int_value=*/25, /*double_value=*/25),
                   IsAnnotatedSpan(CodepointSpan(19, 29), "percentage",
                                   /*int_value=*/25, /*double_value=*/25,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(55, 57), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(55, 58), "percentage",
                                   /*int_value=*/10, /*double_value=*/10,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSuffixWithAdditionalIgnoredCharactersDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23#!? percent"), {0, 13},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSuffixWithAdditionalRandomTokensDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23 asdf 3.14 pct asdf"), {0, 21},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSuffixWithAdditionalRandomPrefixSuffixDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("abdf23 percentabdf"), {0, 18},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSuffixWithAdditionalRandomStringsDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("#?!23 percent#!?"), {0, 16},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenBothPercentSymbolAndSuffixDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23% percent"), {0, 11},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSymbolWithAdditionalPrefixCharactersDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("#?23%"), {0, 5},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenNumberWithAdditionalCharactersDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23#!?"), {0, 5},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenPercentSymbolWithAdditionalCharactersDoesNotParsesIt) {
   ClassificationResult classification_result;
   // ! does not belong to the percentage annotation
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23%!"), {0, 3},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 23);
   EXPECT_EQ(classification_result.numeric_double_value, 23);

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23%!"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest,
        WhenAdditionalCharactersWithMisplacedPercentSymbolDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("23.:;%"), {0, 6},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMultipleMinusSignsDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("--11"), {1, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "number"),
                     Field(&ClassificationResult::numeric_value, -11),
                     Field(&ClassificationResult::numeric_double_value, -11)));

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("--11"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMultipleMinusSignsPercentSignDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("--11%"), {1, 5},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "percentage"),
                     Field(&ClassificationResult::numeric_value, -11),
                     Field(&ClassificationResult::numeric_double_value, -11)));

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("--11%"), {0, 5},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenPlusMinusSignsDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("+-11"), {1, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "number"),
                     Field(&ClassificationResult::numeric_value, -11),
                     Field(&ClassificationResult::numeric_double_value, -11)));

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("+-11"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMinusPlusSignsDoesNotParsesIt) {
   ClassificationResult classification_result;
   // + right before a number is not included in the number annotation
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("-+11"), {1, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("-+11"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMinusSignSuffixDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("10-"), {0, 3},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMultipleCharSuffixDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("10**"), {0, 2},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "number"),
                     Field(&ClassificationResult::numeric_value, 10),
                     Field(&ClassificationResult::numeric_double_value, 10)));

   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("10**"), {0, 3},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("10**"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMultipleCharPrefixDoesNotParsesIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("**10"), {1, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("**10"), {0, 4},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenLowestSupportedNumberParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("-1000000000"), {0, 11},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_THAT(
       classification_result,
       AllOf(Field(&ClassificationResult::collection, "number"),
             Field(&ClassificationResult::numeric_value, -1000000000),
             Field(&ClassificationResult::numeric_double_value, -1000000000)));
 }

 TEST_F(NumberAnnotatorTest, WhenLargestSupportedNumberParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("1000000000"), {0, 10},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_THAT(
       classification_result,
       AllOf(Field(&ClassificationResult::collection, "number"),
             Field(&ClassificationResult::numeric_value, 1000000000),
             Field(&ClassificationResult::numeric_double_value, 1000000000)));
 }

 TEST_F(NumberAnnotatorTest, WhenLowestSupportedFloatNumberParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("-999999999.999999999"), {0, 20},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "number"),
                     Field(&ClassificationResult::numeric_value, -1000000000),
                     Field(&ClassificationResult::numeric_double_value,
                           -999999999.999999999)));
 }

 TEST_F(NumberAnnotatorTest, WhenLargestFloatSupportedNumberParsesIt) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("999999999.999999999"), {0, 19},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_THAT(classification_result,
               AllOf(Field(&ClassificationResult::collection, "number"),
                     Field(&ClassificationResult::numeric_value, 1000000000),
                     Field(&ClassificationResult::numeric_double_value,
                           999999999.999999999)));
 }

 TEST_F(NumberAnnotatorTest, WhenLargeNumberDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("1234567890123456789012345678901234567890"), {0, 40},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenMinusInTheMiddleDoesNotParseIt) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("2016-2017"), {0, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, WhenSuffixWithoutNumberDoesNotParseIt) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... % ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_ANNOTATION, &result));

   ASSERT_EQ(result.size(), 0);
 }

 TEST_F(NumberAnnotatorTest, WhenPrefixWithoutNumberDoesNotParseIt) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... $ ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_ANNOTATION, &result));

   ASSERT_EQ(result.size(), 0);
 }

 TEST_F(NumberAnnotatorTest, WhenPrefixAndSuffixWithoutNumberDoesNotParseIt) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("... $% ..."), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_ANNOTATION, &result));

   ASSERT_EQ(result.size(), 0);
 }

 TEST_F(NumberAnnotatorTest, ForNumberAnnotationsSetsScoreAndPriorityScore) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345 ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));

   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, -10);

   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Come at 9 or 10 ok?"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(8, 9), "number",
                                   /*int_value=*/9, /*double_value=*/9),
                   IsAnnotatedSpan(CodepointSpan(13, 15), "number",
                                   /*int_value=*/10, /*double_value=*/10)));
 }

 TEST_F(NumberAnnotatorTest,
        ForFloatNumberAnnotationsSetsScoreAndPriorityScore) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345.12345 ..."), {4, 15},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345.12345);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, 1);

   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Results are between 12.5 and 13.5, right?"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(20, 24), "number",
                                   /*int_value=*/12, /*double_value=*/12.5,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(29, 33), "number",
                                   /*int_value=*/13, /*double_value=*/13.5,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, ForPercentageAnnotationsSetsScoreAndPriorityScore) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345% ..."), {4, 10},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, 1);

   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345 percent ..."), {4, 17},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, 1);

   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Results are between 9% and 10 percent."),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));
   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(20, 21), "number",
                                   /*int_value=*/9, /*double_value=*/9),
                   IsAnnotatedSpan(CodepointSpan(20, 22), "percentage",
                                   /*int_value=*/9, /*double_value=*/9,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(27, 29), "number",
                                   /*int_value=*/10, /*double_value=*/10),
                   IsAnnotatedSpan(CodepointSpan(27, 37), "percentage",
                                   /*int_value=*/10, /*double_value=*/10,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, NumberDisabledPercentageEnabledForSmartUsecase) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345 ..."), {4, 9},
       AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result));

   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345% ..."), {4, 10},
       AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345.0);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, 1);

   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("... 12345percent ..."), {4, 16},
       AnnotationUsecase_ANNOTATION_USECASE_SMART, &classification_result));
   EXPECT_EQ(classification_result.collection, "percentage");
   EXPECT_EQ(classification_result.numeric_value, 12345);
   EXPECT_EQ(classification_result.numeric_double_value, 12345);
   EXPECT_EQ(classification_result.score, 1);
   EXPECT_EQ(classification_result.priority_score, 1);

   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Accuracy for experiment 3 is 9%."),
       AnnotationUsecase_ANNOTATION_USECASE_SMART, ModeFlag_ANNOTATION,
       &result));
   EXPECT_THAT(result, UnorderedElementsAre(
                           IsAnnotatedSpan(CodepointSpan(29, 31), "percentage",
                                           /*int_value=*/9, /*double_value=*/9.0,
                                           /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, MathOperatorsNotAnnotatedAsNumbersFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("how much is 2 + 2 or 5 - 96 * 89"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(12, 13), "number",
                                   /*int_value=*/2, /*double_value=*/2),
                   IsAnnotatedSpan(CodepointSpan(16, 17), "number",
                                   /*int_value=*/2, /*double_value=*/2),
                   IsAnnotatedSpan(CodepointSpan(21, 22), "number",
                                   /*int_value=*/5, /*double_value=*/5),
                   IsAnnotatedSpan(CodepointSpan(25, 27), "number",
                                   /*int_value=*/96, /*double_value=*/96),
                   IsAnnotatedSpan(CodepointSpan(30, 32), "number",
                                   /*int_value=*/89, /*double_value=*/89)));
 }

 TEST_F(NumberAnnotatorTest, MathOperatorsNotAnnotatedAsNumbersClassifyText) {
   ClassificationResult classification_result;
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("2 + 2"), {2, 3},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_FALSE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("2 - 96 * 89"), {2, 3},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
 }

 TEST_F(NumberAnnotatorTest, SlashSeparatesTwoNumbersFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("what's 1 + 2/3 * 4／5 * 6 / 7"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(7, 8), "number",
                                   /*int_value=*/1, /*double_value=*/1),
                   IsAnnotatedSpan(CodepointSpan(11, 12), "number",
                                   /*int_value=*/2, /*double_value=*/2),
                   IsAnnotatedSpan(CodepointSpan(13, 14), "number",
                                   /*int_value=*/3, /*double_value=*/3),
                   IsAnnotatedSpan(CodepointSpan(17, 18), "number",
                                   /*int_value=*/4, /*double_value=*/4),
                   IsAnnotatedSpan(CodepointSpan(19, 20), "number",
                                   /*int_value=*/5, /*double_value=*/5),
                   IsAnnotatedSpan(CodepointSpan(23, 24), "number",
                                   /*int_value=*/6, /*double_value=*/6),
                   IsAnnotatedSpan(CodepointSpan(27, 28), "number",
                                   /*int_value=*/7, /*double_value=*/7)));
 }

 TEST_F(NumberAnnotatorTest, SlashSeparatesTwoNumbersClassifyText) {
   ClassificationResult classification_result;
   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("what's 1 + 2/3 * 4"), {11, 12},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 2);
   EXPECT_EQ(classification_result.numeric_double_value, 2);
   EXPECT_EQ(classification_result.score, 1);

   EXPECT_TRUE(number_annotator_.ClassifyText(
       UTF8ToUnicodeText("what's 1 + 2/3 * 4"), {13, 14},
       AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification_result));
   EXPECT_EQ(classification_result.collection, "number");
   EXPECT_EQ(classification_result.numeric_value, 3);
   EXPECT_EQ(classification_result.numeric_double_value, 3);
   EXPECT_EQ(classification_result.score, 1);
 }

 TEST_F(NumberAnnotatorTest, SlashDoesNotSeparatesTwoNumbersFindAll) {
   std::vector<AnnotatedSpan> result;
   // 2 in the "2/" context is a number because / is punctuation
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("what's 2a2/3 or 2/s4 or 2/ or /3 or //3 or 2//"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(IsAnnotatedSpan(
                           CodepointSpan(24, 25), "number",
                           /*int_value=*/2, /*double_value=*/2)));
 }

 TEST_F(NumberAnnotatorTest, BracketsContextAnnotatedFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("The interval is: (12, 13) or [-12, -4.5)"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(18, 20), "number",
                                   /*int_value=*/12, /*double_value=*/12),
                   IsAnnotatedSpan(CodepointSpan(22, 24), "number",
                                   /*int_value=*/13, /*double_value=*/13),
                   IsAnnotatedSpan(CodepointSpan(30, 33), "number",
                                   /*int_value=*/-12, /*double_value=*/-12),
                   IsAnnotatedSpan(CodepointSpan(35, 39), "number",
                                   /*int_value=*/-4, /*double_value=*/-4.5,
                                   /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, BracketsContextNotAnnotatedFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("The interval is: -(12, 138*)"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_TRUE(result.empty());
 }

 TEST_F(NumberAnnotatorTest, FractionalNumberDotsFindAll) {
   std::vector<AnnotatedSpan> result;
   // Dots source: https://unicode-search.net/unicode-namesearch.pl?term=period
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("3.1 3﹒2 3．3"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(
                           IsAnnotatedSpan(CodepointSpan(0, 3), "number",
                                           /*int_value=*/3, /*double_value=*/3.1,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(4, 7), "number",
                                           /*int_value=*/3, /*double_value=*/3.2,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(8, 11), "number",
                                           /*int_value=*/3, /*double_value=*/3.3,
                                           /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, NonAsciiDigitsFindAll) {
   std::vector<AnnotatedSpan> result;
   // Dots source: https://unicode-search.net/unicode-namesearch.pl?term=period
   // Digits source: https://unicode-search.net/unicode-namesearch.pl?term=digit
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("３ ３﹒２ ３．３%"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(
                           IsAnnotatedSpan(CodepointSpan(0, 1), "number",
                                           /*int_value=*/3, /*double_value=*/3),
                           IsAnnotatedSpan(CodepointSpan(2, 5), "number",
                                           /*int_value=*/3, /*double_value=*/3.2,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(6, 9), "number",
                                           /*int_value=*/3, /*double_value=*/3.3,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(6, 10), "percentage",
                                           /*int_value=*/3, /*double_value=*/3.3,
                                           /*priority_score=*/1)));
 }

 TEST_F(NumberAnnotatorTest, AnnotatedZeroPrecededNumbersFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("Numbers: 0.9 or 09 or 09.9 or 032310"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result, UnorderedElementsAre(
                           IsAnnotatedSpan(CodepointSpan(9, 12), "number",
                                           /*int_value=*/0, /*double_value=*/0.9,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(16, 18), "number",
                                           /*int_value=*/9, /*double_value=*/9),
                           IsAnnotatedSpan(CodepointSpan(22, 26), "number",
                                           /*int_value=*/9, /*double_value=*/9.9,
                                           /*priority_score=*/1),
                           IsAnnotatedSpan(CodepointSpan(30, 36), "number",
                                           /*int_value=*/32310,
                                           /*double_value=*/32310)));
 }

 TEST_F(NumberAnnotatorTest, ZeroAfterDotFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("15.0 16.00"), AnnotationUsecase_ANNOTATION_USECASE_RAW,
       ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(0, 4), "number",
                                   /*int_value=*/15, /*double_value=*/15),
                   IsAnnotatedSpan(CodepointSpan(5, 10), "number",
                                   /*int_value=*/16, /*double_value=*/16)));
 }

 TEST_F(NumberAnnotatorTest, NineDotNineFindAll) {
   std::vector<AnnotatedSpan> result;
   EXPECT_TRUE(number_annotator_.FindAll(
       UTF8ToUnicodeText("9.9 9.99 99.99 99.999 99.9999"),
       AnnotationUsecase_ANNOTATION_USECASE_RAW, ModeFlag_ANNOTATION, &result));

   EXPECT_THAT(result,
               UnorderedElementsAre(
                   IsAnnotatedSpan(CodepointSpan(0, 3), "number",
                                   /*int_value=*/9, /*double_value=*/9.9,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(4, 8), "number",
                                   /*int_value=*/9, /*double_value=*/9.99,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(9, 14), "number",
                                   /*int_value=*/99, /*double_value=*/99.99,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(15, 21), "number",
                                   /*int_value=*/99, /*double_value=*/99.999,
                                   /*priority_score=*/1),
                   IsAnnotatedSpan(CodepointSpan(22, 29), "number",
                                   /*int_value=*/99, /*double_value=*/99.9999,
                                   /*priority_score=*/1)));
 }

 }  // namespace test_internal
 }  // namespace libtextclassifier3