blob: 82904e5abcf5c6abb151e6409a9e7e3312025d30 [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "text-classifier.h"
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace libtextclassifier2 {
namespace {
using testing::ElementsAreArray;
using testing::Pair;
std::string FirstResult(
const std::vector<std::pair<std::string, float>>& results) {
if (results.empty()) {
return "<INVALID RESULTS>";
}
return results[0].first;
}
MATCHER_P3(IsAnnotatedSpan, start, end, best_class, "") {
return testing::Value(arg.span, Pair(start, end)) &&
testing::Value(FirstResult(arg.classification), best_class);
}
std::string ReadFile(const std::string& file_name) {
std::ifstream file_stream(file_name);
return std::string(std::istreambuf_iterator<char>(file_stream), {});
}
std::string GetModelPath() {
return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
}
TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb");
EXPECT_FALSE(classifier);
}
TEST(TextClassifierTest, ClassifyText) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
EXPECT_EQ("other",
FirstResult(classifier->ClassifyText(
"this afternoon Barack Obama gave a speech at", {15, 27})));
EXPECT_EQ("other",
FirstResult(classifier->ClassifyText("you@android.com", {0, 15})));
EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
"Contact me at you@android.com", {14, 29})));
EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
"Call me at (800) 123-456 today", {11, 24})));
EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
"Visit www.google.com every today!", {6, 20})));
// More lines.
EXPECT_EQ("other",
FirstResult(classifier->ClassifyText(
"this afternoon Barack Obama gave a speech at|Visit "
"www.google.com every today!|Call me at (800) 123-456 today.",
{15, 27})));
EXPECT_EQ("other",
FirstResult(classifier->ClassifyText(
"this afternoon Barack Obama gave a speech at|Visit "
"www.google.com every today!|Call me at (800) 123-456 today.",
{51, 65})));
EXPECT_EQ("phone",
FirstResult(classifier->ClassifyText(
"this afternoon Barack Obama gave a speech at|Visit "
"www.google.com every today!|Call me at (800) 123-456 today.",
{90, 103})));
// Single word.
EXPECT_EQ("other", FirstResult(classifier->ClassifyText("obama", {0, 5})));
EXPECT_EQ("other", FirstResult(classifier->ClassifyText("asdf", {0, 4})));
EXPECT_EQ("<INVALID RESULTS>",
FirstResult(classifier->ClassifyText("asdf", {0, 0})));
// Junk.
EXPECT_EQ("<INVALID RESULTS>",
FirstResult(classifier->ClassifyText("", {0, 0})));
EXPECT_EQ("<INVALID RESULTS>", FirstResult(classifier->ClassifyText(
"a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5})));
}
TEST(TextClassifierTest, PhoneFiltering) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
"phone: (123) 456 789", {7, 20})));
EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
"phone: (123) 456 789,0001112", {7, 25})));
EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
"phone: (123) 456 789,0001112", {7, 28})));
}
TEST(TextClassifierTest, SuggestSelection) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
EXPECT_EQ(classifier->SuggestSelection(
"this afternoon Barack Obama gave a speech at", {15, 21}),
std::make_pair(15, 21));
// Try passing whole string.
// If more than 1 token is specified, we should return back what entered.
EXPECT_EQ(
classifier->SuggestSelection("350 Third Street, Cambridge", {0, 27}),
std::make_pair(0, 27));
// Single letter.
EXPECT_EQ(classifier->SuggestSelection("a", {0, 1}), std::make_pair(0, 1));
// Single word.
EXPECT_EQ(classifier->SuggestSelection("asdf", {0, 4}), std::make_pair(0, 4));
EXPECT_EQ(
classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
std::make_pair(11, 23));
// Unpaired bracket stripping.
EXPECT_EQ(
classifier->SuggestSelection("call me at (857) 225 3556 today", {11, 16}),
std::make_pair(11, 25));
EXPECT_EQ(
classifier->SuggestSelection("call me at (857 225 3556 today", {11, 15}),
std::make_pair(12, 24));
EXPECT_EQ(
classifier->SuggestSelection("call me at 857 225 3556) today", {11, 14}),
std::make_pair(11, 23));
EXPECT_EQ(
classifier->SuggestSelection("call me at )857 225 3556( today", {11, 15}),
std::make_pair(12, 24));
// If the resulting selection would be empty, the original span is returned.
EXPECT_EQ(classifier->SuggestSelection("call me at )( today", {11, 13}),
std::make_pair(11, 13));
EXPECT_EQ(classifier->SuggestSelection("call me at ( today", {11, 12}),
std::make_pair(11, 12));
EXPECT_EQ(classifier->SuggestSelection("call me at ) today", {11, 12}),
std::make_pair(11, 12));
}
TEST(TextClassifierTest, SuggestSelectionsAreSymmetric) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}),
std::make_pair(0, 27));
EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {4, 9}),
std::make_pair(0, 27));
EXPECT_EQ(
classifier->SuggestSelection("350 Third Street, Cambridge", {10, 16}),
std::make_pair(0, 27));
EXPECT_EQ(classifier->SuggestSelection("a\nb\nc\n350 Third Street, Cambridge",
{16, 22}),
std::make_pair(6, 33));
}
TEST(TextClassifierTest, SuggestSelectionWithNewLine) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}),
std::make_pair(4, 16));
EXPECT_EQ(classifier->SuggestSelection("857 225 3556\nabc", {0, 3}),
std::make_pair(0, 12));
}
TEST(TextClassifierTest, SuggestSelectionWithPunctuation) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
// From the right.
EXPECT_EQ(classifier->SuggestSelection(
"this afternoon BarackObama, gave a speech at", {15, 26}),
std::make_pair(15, 26));
// From the right multiple.
EXPECT_EQ(classifier->SuggestSelection(
"this afternoon BarackObama,.,.,, gave a speech at", {15, 26}),
std::make_pair(15, 26));
// From the left multiple.
EXPECT_EQ(classifier->SuggestSelection(
"this afternoon ,.,.,,BarackObama gave a speech at", {21, 32}),
std::make_pair(21, 32));
// From both sides.
EXPECT_EQ(classifier->SuggestSelection(
"this afternoon !BarackObama,- gave a speech at", {16, 27}),
std::make_pair(16, 27));
}
TEST(TextClassifierTest, SuggestSelectionNoCrashWithJunk) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
// Try passing in bunch of invalid selections.
EXPECT_EQ(classifier->SuggestSelection("", {0, 27}), std::make_pair(0, 27));
EXPECT_EQ(classifier->SuggestSelection("", {-10, 27}),
std::make_pair(-10, 27));
EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {0, 27}),
std::make_pair(0, 27));
EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-30, 300}),
std::make_pair(-30, 300));
EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-10, -1}),
std::make_pair(-10, -1));
EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {100, 17}),
std::make_pair(100, 17));
}
TEST(TextClassifierTest, Annotate) {
std::unique_ptr<TextClassifier> classifier =
TextClassifier::FromPath(GetModelPath() + "test_model.fb");
ASSERT_TRUE(classifier);
const std::string test_string =
"& saw Barak Obama today .. 350 Third Street, Cambridge\nand my phone "
"number is 853 225 3556.";
EXPECT_THAT(classifier->Annotate(test_string),
ElementsAreArray({
IsAnnotatedSpan(0, 0, "<INVALID RESULTS>"),
IsAnnotatedSpan(2, 5, "other"),
IsAnnotatedSpan(6, 11, "other"),
IsAnnotatedSpan(12, 17, "other"),
IsAnnotatedSpan(18, 23, "other"),
IsAnnotatedSpan(24, 24, "<INVALID RESULTS>"),
IsAnnotatedSpan(27, 54, "address"),
IsAnnotatedSpan(55, 58, "other"),
IsAnnotatedSpan(59, 61, "other"),
IsAnnotatedSpan(62, 67, "other"),
IsAnnotatedSpan(68, 74, "other"),
IsAnnotatedSpan(75, 77, "other"),
IsAnnotatedSpan(78, 90, "phone"),
}));
}
// TODO(jacekj): Test the regex functionality.
} // namespace
} // namespace libtextclassifier2