blob: 8db88ab01c0b0ca6fbd218e4bf183e5cb86c74d6 [file] [log] [blame]
/*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "utils/grammar/utils/rules.h"
#include "utils/grammar/rules_generated.h"
#include "utils/grammar/utils/ir.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
namespace libtextclassifier3::grammar {
namespace {
using ::testing::IsEmpty;
using ::testing::SizeIs;
TEST(SerializeRulesTest, HandlesSimpleRuleSet) {
Rules rules;
rules.Add("<verb>", {"buy"});
rules.Add("<verb>", {"bring"});
rules.Add("<verb>", {"remind"});
rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
rules.Add("<action>", {"<reminder>"});
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_THAT(frozen_rules.lhs, IsEmpty());
EXPECT_EQ(frozen_rules.terminals,
std::string("bring\0buy\0me\0remind\0to\0", 23));
EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
}
TEST(SerializeRulesTest, HandlesRulesSetWithCallbacks) {
Rules rules;
const CallbackId output = 1;
rules.Add("<verb>", {"buy"});
rules.Add("<verb>", {"bring"});
rules.Add("<verb>", {"remind"});
rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
rules.Add("<action>", {"<reminder>"}, output, 0);
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals,
std::string("bring\0buy\0me\0remind\0to\0", 23));
EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
}
TEST(SerializeRulesTest, HandlesRulesWithWhitespaceGapLimits) {
Rules rules;
rules.Add("<iata>", {"lx"});
rules.Add("<iata>", {"aa"});
rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
/*max_whitespace_gap=*/0);
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
}
TEST(SerializeRulesTest, HandlesCaseSensitiveTerminals) {
Rules rules;
rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
/*case_sensitive=*/true);
rules.Add("<iata>", {"AA"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
/*case_sensitive=*/true);
rules.Add("<iata>", {"dl"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
/*case_sensitive=*/false);
rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
/*max_whitespace_gap=*/0);
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals, std::string("AA\0LX\0dl\0", 9));
EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
}
TEST(SerializeRulesTest, HandlesMultipleShards) {
Rules rules(/*num_shards=*/2);
rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
/*case_sensitive=*/true, /*shard=*/0);
rules.Add("<iata>", {"aa"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
/*case_sensitive=*/false, /*shard=*/1);
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(2));
EXPECT_EQ(frozen_rules.terminals, std::string("LX\0aa\0", 6));
}
TEST(SerializeRulesTest, HandlesRegexRules) {
Rules rules;
rules.AddRegex("<code>", "[A-Z]+");
rules.AddRegex("<numbers>", "\\d+");
RulesSetT frozen_rules;
rules.Finalize().Serialize(/*include_debug_information=*/false,
&frozen_rules);
EXPECT_THAT(frozen_rules.regex_annotator, SizeIs(2));
}
TEST(SerializeRulesTest, HandlesAlias) {
Rules rules;
rules.Add("<iata>", {"lx"});
rules.Add("<iata>", {"aa"});
rules.Add("<flight>", {"<iata>", "<4_digits>"});
rules.AddAlias("<flight_number>", "<flight>");
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(1));
// Only alias, no rule.
EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
EXPECT_THAT(frozen_rules.lhs, IsEmpty());
}
TEST(SerializeRulesTest, ResolvesAnchorsAndFillers) {
Rules rules;
rules.Add("<code>",
{"<^>", "<filler>", "this", "is", "a", "test", "<filler>", "<$>"});
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals, std::string("a\0test\0this\0", 12));
// Expect removal of anchors and fillers in this case.
// The rule above is equivalent to: <code> ::= this is a test, binarized into
// <tmp_0> ::= this is
// <tmp_1> ::= <tmp_0> a
// <code> ::= <tmp_1> test
EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
EXPECT_THAT(frozen_rules.lhs, IsEmpty());
}
TEST(SerializeRulesTest, HandlesFillers) {
Rules rules;
rules.Add("<test>", {"<filler>?", "a", "test"});
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_EQ(frozen_rules.terminals, std::string("a\0test\0", 7));
// Expect removal of anchors and fillers in this case.
// The rule above is equivalent to: <code> ::= this is a test, binarized into
// <tmp_0> ::= <filler> a
// <test> ::= <tmp_0> test
// <test> ::= a test
// <filler> ::= <token> <filler>
EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(4));
// <filler> ::= <token>
EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
}
TEST(SerializeRulesTest, HandlesAnnotations) {
Rules rules;
rules.AddAnnotation("phone");
rules.AddAnnotation("url");
rules.AddAnnotation("tracking_number");
const Ir ir = rules.Finalize();
RulesSetT frozen_rules;
ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
EXPECT_THAT(frozen_rules.rules, SizeIs(1));
EXPECT_THAT(frozen_rules.nonterminals->annotation_nt, SizeIs(3));
EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[0]->key, "phone");
EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[1]->key,
"tracking_number");
EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[2]->key, "url");
}
} // namespace
} // namespace libtextclassifier3::grammar