blob: e64de32d458dbe270b405427b85d2a28dcb8f9e1 [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/query/query-processor.h"
#include <cstdint>
#include <memory>
#include <string>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/numeric/dummy-numeric-index.h"
#include "icing/index/numeric/numeric-index.h"
#include "icing/jni/jni-cache.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/portable/platform.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-features.h"
#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/icu-data-file-helper.h"
#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer-factory.h"
#include "icing/transform/normalizer.h"
#include "unicode/uloc.h"
namespace icing {
namespace lib {
namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::SizeIs;
using ::testing::UnorderedElementsAre;
libtextclassifier3::StatusOr<DocumentStore::CreateResult> CreateDocumentStore(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store) {
return DocumentStore::Create(
filesystem, base_dir, clock, schema_store,
/*force_recovery_and_revalidate_documents=*/false,
/*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false,
/*use_persistent_hash_map=*/false,
PortableFileBackedProtoLog<DocumentWrapper>::kDeflateCompressionLevel,
/*initialize_stats=*/nullptr);
}
class QueryProcessorTest
: public ::testing::TestWithParam<SearchSpecProto::SearchType::Code> {
protected:
QueryProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
store_dir_(test_dir_ + "/store"),
schema_store_dir_(test_dir_ + "/schema_store"),
index_dir_(test_dir_ + "/index"),
numeric_index_dir_(test_dir_ + "/numeric_index") {}
void SetUp() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
// If we've specified using the reverse-JNI method for segmentation (i.e.
// not ICU), then we won't have the ICU data file included to set up.
// Technically, we could choose to use reverse-JNI for segmentation AND
// include an ICU data file, but that seems unlikely and our current BUILD
// setup doesn't do this.
ICING_ASSERT_OK(
// File generated via icu_data_file rule in //icing/BUILD.
icu_data_file_helper::SetUpICUDataFile(
GetTestFilePath("icing/icu.dat")));
}
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
Index::Options options(index_dir_,
/*index_merge_size=*/1024 * 1024,
/*lite_index_sort_at_indexing=*/true,
/*lite_index_sort_size=*/1024 * 8);
ICING_ASSERT_OK_AND_ASSIGN(
index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// TODO(b/249829533): switch to use persistent numeric index.
ICING_ASSERT_OK_AND_ASSIGN(
numeric_index_,
DummyNumericIndex<int64_t>::Create(filesystem_, numeric_index_dir_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
ICING_ASSERT_OK_AND_ASSIGN(
language_segmenter_,
language_segmenter_factory::Create(segmenter_options));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/1000));
ICING_ASSERT_OK_AND_ASSIGN(
query_processor_,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
document_store_.get(), schema_store_.get()));
}
libtextclassifier3::Status AddTokenToIndex(
DocumentId document_id, SectionId section_id,
TermMatchType::Code term_match_type, const std::string& token) {
Index::Editor editor = index_->Edit(document_id, section_id,
term_match_type, /*namespace_id=*/0);
auto status = editor.BufferTerm(token.c_str());
return status.ok() ? editor.IndexAllBufferedTerms() : status;
}
libtextclassifier3::Status AddToNumericIndex(DocumentId document_id,
const std::string& property,
SectionId section_id,
int64_t value) {
std::unique_ptr<NumericIndex<int64_t>::Editor> editor =
numeric_index_->Edit(property, document_id, section_id);
ICING_RETURN_IF_ERROR(editor->BufferKey(value));
return std::move(*editor).IndexAllBufferedKeys();
}
void TearDown() override {
document_store_.reset();
schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
Filesystem filesystem_;
const std::string test_dir_;
const std::string store_dir_;
const std::string schema_store_dir_;
private:
IcingFilesystem icing_filesystem_;
const std::string index_dir_;
const std::string numeric_index_dir_;
protected:
std::unique_ptr<Index> index_;
std::unique_ptr<NumericIndex<int64_t>> numeric_index_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
FakeClock fake_clock_;
std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<QueryProcessor> query_processor_;
};
TEST_P(QueryProcessorTest, CreationWithNullPointerShouldFail) {
EXPECT_THAT(
QueryProcessor::Create(/*index=*/nullptr, numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
document_store_.get(), schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), /*numeric_index_=*/nullptr,
language_segmenter_.get(), normalizer_.get(),
document_store_.get(), schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), numeric_index_.get(),
/*language_segmenter=*/nullptr, normalizer_.get(),
document_store_.get(), schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(
index_.get(), numeric_index_.get(), language_segmenter_.get(),
/*normalizer=*/nullptr, document_store_.get(), schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
/*document_store=*/nullptr, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
/*schema_store=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_P(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// We don't need to insert anything in the index since the empty query will
// match all DocumentIds from the DocumentStore
SearchSpecProto search_spec;
search_spec.set_query("()");
search_spec.set_search_type(GetParam());
if (GetParam() !=
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
ElementsAre(document_id2, document_id1));
EXPECT_THAT(results.query_terms, IsEmpty());
EXPECT_THAT(results.query_term_iterators, IsEmpty());
} else {
// TODO(b/208654892): Resolve the difference between RAW_QUERY and ADVANCED
// regarding empty composite expressions.
EXPECT_THAT(query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
}
TEST_P(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// We don't need to insert anything in the index since the empty query will
// match all DocumentIds from the DocumentStore
SearchSpecProto search_spec;
search_spec.set_query("");
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
ElementsAre(document_id2, document_id1));
EXPECT_THAT(results.query_terms, IsEmpty());
EXPECT_THAT(results.query_term_iterators, IsEmpty());
}
TEST_P(QueryProcessorTest, QueryTermNormalized) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hElLo WORLD");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, OneTermPrefixMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("he");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"he", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, OneTermPrefixMatchWithMaxSectionID) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = kMaxSectionId;
SectionIdMask section_id_mask = UINT64_C(1) << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
std::array<Hit::TermFrequency, kTotalNumSections> term_frequencies{};
term_frequencies[kMaxSectionId] = 1;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("he");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"he", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, OneTermExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"hello", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
}
TEST_P(QueryProcessorTest, AndSameTermExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello hello");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"hello", expected_section_ids_tf_map)));
ASSERT_FALSE(results.root_iterator->Advance().ok());
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
}
TEST_P(QueryProcessorTest, AndTwoTermExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello world");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
EqualsTermMatchInfo("world", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, AndSameTermPrefixMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("he he");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"he", expected_section_ids_tf_map)));
ASSERT_FALSE(results.root_iterator->Advance().ok());
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
}
TEST_P(QueryProcessorTest, AndTwoTermPrefixMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("he wo");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("he", expected_section_ids_tf_map),
EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
}
TEST_P(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
EXPECT_THAT(AddTokenToIndex(document_id, section_id,
TermMatchType::EXACT_ONLY, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello wo");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("hello", expected_section_ids_tf_map),
EqualsTermMatchInfo("wo", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
}
TEST_P(QueryProcessorTest, OrTwoTermExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello OR world");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"world", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
matched_terms_stats.clear();
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"hello", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
}
TEST_P(QueryProcessorTest, OrTwoTermPrefixMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::PREFIX;
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("he OR wo");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"wo", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
matched_terms_stats.clear();
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"he", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
}
TEST_P(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
EXPECT_THAT(AddTokenToIndex(document_id1, section_id,
TermMatchType::EXACT_ONLY, "hello"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, TermMatchType::PREFIX, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("hello OR wo");
search_spec.set_term_match_type(TermMatchType::PREFIX);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"wo", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
matched_terms_stats.clear();
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(matched_terms_stats, ElementsAre(EqualsTermMatchInfo(
"hello", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
}
TEST_P(QueryProcessorTest, CombinedAndOrTerms) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal puppy dog"
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
index_->Merge();
// Document 2 has content "animal kitten cat"
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
{
// OR gets precedence over AND, this is parsed as ((puppy OR kitten) AND
// dog)
SearchSpecProto search_spec;
search_spec.set_query("puppy OR kitten dog");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Only Document 1 matches since it has puppy AND dog
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("puppy", expected_section_ids_tf_map),
EqualsTermMatchInfo("dog", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(3));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("puppy", "kitten", "dog"));
}
{
// OR gets precedence over AND, this is parsed as (animal AND (puppy OR
// kitten))
SearchSpecProto search_spec;
search_spec.set_query("animal puppy OR kitten");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Both Document 1 and 2 match since Document 1 has animal AND puppy, and
// Document 2 has animal AND kitten
// Descending order of valid DocumentIds
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(
EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
matched_terms_stats.clear();
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(3));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("animal", "puppy", "kitten"));
}
{
// OR gets precedence over AND, this is parsed as (kitten AND ((foo OR
// bar) OR cat))
SearchSpecProto search_spec;
search_spec.set_query("kitten foo OR bar OR cat");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Only Document 2 matches since it has both kitten and cat
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 1}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("kitten", expected_section_ids_tf_map),
EqualsTermMatchInfo("cat", expected_section_ids_tf_map)));
EXPECT_THAT(results.query_term_iterators, SizeIs(4));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("kitten", "foo", "bar", "cat"));
}
}
TEST_P(QueryProcessorTest, OneGroup) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "puppy dog"
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "kitten cat"
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
// Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
// no documents would match. But with grouping, Document 1 matches puppy
SearchSpecProto search_spec;
search_spec.set_query("puppy OR (kitten foo)");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(3));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("puppy", "kitten", "foo"));
}
TEST_P(QueryProcessorTest, TwoGroups) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "puppy dog"
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "kitten cat"
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
// Without grouping, this would be parsed as (puppy AND (dog OR kitten) AND
// cat) and wouldn't match any documents. But with grouping, Document 1
// matches (puppy AND dog) and Document 2 matches (kitten and cat).
SearchSpecProto search_spec;
search_spec.set_query("(puppy dog) OR (kitten cat)");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
DocHitInfo expectedDocHitInfo2(document_id2);
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
EXPECT_THAT(results.query_term_iterators, SizeIs(4));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
}
TEST_P(QueryProcessorTest, ManyLevelNestedGrouping) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "puppy dog"
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "kitten cat"
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
// Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
// no documents would match. But with grouping, Document 1 matches puppy
SearchSpecProto search_spec;
search_spec.set_query("puppy OR ((((kitten foo))))");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(3));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("puppy", "kitten", "foo"));
}
TEST_P(QueryProcessorTest, OneLevelNestedGrouping) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "puppy dog"
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "kitten cat"
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
// Document 1 will match puppy and Document 2 matches (kitten AND (cat))
SearchSpecProto search_spec;
// TODO(b/208654892) decide how we want to handle queries of the form foo(...)
search_spec.set_query("puppy OR (kitten (cat))");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
DocHitInfo expectedDocHitInfo2(document_id2);
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
EXPECT_THAT(results.query_term_iterators, SizeIs(3));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""],
UnorderedElementsAre("puppy", "kitten", "cat"));
}
TEST_P(QueryProcessorTest, ExcludeTerm) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
// last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
IsOk());
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("-hello");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
// We don't know have the section mask to indicate what section "world"
// came. It doesn't matter which section it was in since the query doesn't
// care. It just wanted documents that didn't have "hello"
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone)));
EXPECT_THAT(results.query_terms, IsEmpty());
EXPECT_THAT(results.query_term_iterators, IsEmpty());
}
TEST_P(QueryProcessorTest, ExcludeNonexistentTerm) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
// last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
IsOk());
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("-foo");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone),
DocHitInfo(document_id1, kSectionIdMaskNone)));
EXPECT_THAT(results.query_terms, IsEmpty());
EXPECT_THAT(results.query_term_iterators, IsEmpty());
}
TEST_P(QueryProcessorTest, ExcludeAnd) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
// last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal dog"
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "animal cat"
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
{
SearchSpecProto search_spec;
search_spec.set_query("-dog -cat");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// The query is interpreted as "exclude all documents that have animal,
// and exclude all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_term_iterators, IsEmpty());
EXPECT_THAT(results.query_terms, IsEmpty());
}
{
SearchSpecProto search_spec;
search_spec.set_query("-animal cat");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// The query is interpreted as "exclude all documents that have animal,
// and include all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
}
}
TEST_P(QueryProcessorTest, ExcludeOr) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that they'll bump the
// last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal dog"
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "animal cat"
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
{
SearchSpecProto search_spec;
search_spec.set_query("-animal OR -cat");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// We don't have a section mask indicating which sections in this document
// matched the query since it's not based on section-term matching. It's
// more based on the fact that the query excluded all the other documents.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(DocHitInfo(document_id1, kSectionIdMaskNone)));
EXPECT_THAT(results.query_term_iterators, IsEmpty());
EXPECT_THAT(results.query_terms, IsEmpty());
}
{
SearchSpecProto search_spec;
search_spec.set_query("animal OR -cat");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo1(document_id1);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
DocHitInfo expectedDocHitInfo2(document_id2);
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
}
TEST_P(QueryProcessorTest, WithoutTermFrequency) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// just inserting the documents so that the DocHitInfoIterators will see
// that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal puppy dog", which is added to the main
// index.
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
ASSERT_THAT(index_->Merge(), IsOk());
// Document 2 has content "animal kitten cat", which is added to the lite
// index.
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
EXPECT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
IsOk());
EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
// OR gets precedence over AND, this is parsed as (animal AND (puppy OR
// kitten))
SearchSpecProto search_spec;
search_spec.set_query("animal puppy OR kitten");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
// The first Document to match (Document 2) matches on 'animal' AND 'kitten'
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
// Since need_hit_term_frequency is false, the expected term frequency for
// the section with the hit should be 0.
std::unordered_map<SectionId, Hit::TermFrequency>
expected_section_ids_tf_map = {{section_id, 0}};
std::vector<TermMatchInfo> matched_terms_stats;
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
EqualsTermMatchInfo("kitten", expected_section_ids_tf_map)));
// The second Document to match (Document 1) matches on 'animal' AND 'puppy'
ASSERT_THAT(results.root_iterator->Advance(), IsOk());
EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
section_id_mask);
matched_terms_stats.clear();
results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
EXPECT_THAT(
matched_terms_stats,
ElementsAre(EqualsTermMatchInfo("animal", expected_section_ids_tf_map),
EqualsTermMatchInfo("puppy", expected_section_ids_tf_map)));
// This should be empty because ranking_strategy != RELEVANCE_SCORE
EXPECT_THAT(results.query_term_iterators, IsEmpty());
}
TEST_P(QueryProcessorTest, DeletedFilter) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("email")
.Build()));
EXPECT_THAT(document_store_->Delete("namespace", "1",
fake_clock_.GetSystemTimeMilliseconds()),
IsOk());
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal dog"
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "animal cat"
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id2);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NamespaceFilter) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// namespaces populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace1", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace2", "2")
.SetSchema("email")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal dog"
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
IsOk());
// Document 2 has content "animal cat"
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("animal");
search_spec.set_term_match_type(term_match_type);
search_spec.add_namespace_filters("namespace1");
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, SchemaTypeFilter) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.AddType(SchemaTypeConfigBuilder().SetType("message"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Populate the index
SectionId section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document 1 has content "animal dog"
ASSERT_THAT(
AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
IsOk());
// Document 2 has content "animal cat"
ASSERT_THAT(
AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("animal");
search_spec.set_term_match_type(term_match_type);
search_spec.add_schema_type_filters("email");
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id1);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterForOneDocument) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
PropertyConfigBuilder()
.SetName("subject")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
// First and only indexed property, so it gets a section_id of 0
int subject_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Document has content "animal"
ASSERT_THAT(AddTokenToIndex(document_id, subject_section_id, term_match_type,
"animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
search_spec.set_query("subject:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Descending order of valid DocumentIds
DocHitInfo expectedDocHitInfo(document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["subject"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterAcrossSchemaTypes) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
.SetType("email")
// Section "a" would get sectionId 0
.AddProperty(PropertyConfigBuilder()
.SetName("a")
.SetDataTypeString(TERM_MATCH_EXACT,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
// SectionIds are assigned in ascending order per schema type,
// alphabetically.
int email_foo_section_id = 1;
int message_foo_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
// Message document has content "animal"
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Ordered by descending DocumentId, so message comes first since it was
// inserted last
DocHitInfo expectedDocHitInfo1(message_document_id);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
DocHitInfo expectedDocHitInfo2(email_document_id);
expectedDocHitInfo2.UpdateSection(/*section_id=*/1);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterWithinSchemaType) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
// Message document has content "animal"
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
// within documents of email schema
search_spec.set_query("foo:animal");
search_spec.add_schema_type_filters("email");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Shouldn't include the message document since we're only looking at email
// types
DocHitInfo expectedDocHitInfo(email_document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NestedPropertyFilter) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(
SchemaTypeConfigBuilder()
.SetType("email")
// Add an unindexed property so we generate section
// metadata on it
.AddProperty(PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeDocument(
"Foo", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(
SchemaTypeConfigBuilder()
.SetType("Foo")
// Add an unindexed property so we generate section
// metadata on it
.AddProperty(PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeDocument(
"Bar", /*index_nested_properties=*/true)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder()
.SetType("Bar")
// Add an unindexed property so we generate section
// metadata on it
.AddProperty(PropertyConfigBuilder()
.SetName("baz")
.SetDataTypeString(TERM_MATCH_EXACT,
TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, /*section_id=*/0,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
// within documents of email schema
search_spec.set_query("foo.bar.baz:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
DocHitInfo expectedDocHitInfo1(email_document_id);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo.bar.baz"],
UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterRespectsDifferentSectionIds) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
// Message document has content "animal", but put in in the same section id
// as the indexed email section id, the same id as indexed property "foo" in
// the message type
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
// within documents of email schema
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
DocHitInfo expectedDocHitInfo(email_document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, NonexistentPropertyFilterReturnsEmptyResults) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, /*section_id=*/0,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
// within documents of email schema
search_spec.set_query("nonexistent:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["nonexistent"],
UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, UnindexedPropertyFilterReturnsEmptyResults) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
.SetType("email")
// Add an unindexed property so we generate section
// metadata on it
.AddProperty(PropertyConfigBuilder()
.SetName("foo")
.SetDataType(TYPE_STRING)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
// Populate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, /*section_id=*/0,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
// within documents of email schema
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Even though the section id is the same, we should be able to tell that it
// doesn't match to the name of the section filter
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, PropertyFilterTermAndUnrestrictedTerm) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
int email_foo_section_id = 0;
int message_foo_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Poplate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal"
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "cat"),
IsOk());
// Message document has content "animal"
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
search_spec.set_query("cat OR foo:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Ordered by descending DocumentId, so message comes first since it was
// inserted last
DocHitInfo expectedDocHitInfo1(message_document_id);
expectedDocHitInfo1.UpdateSection(/*section_id=*/0);
DocHitInfo expectedDocHitInfo2(email_document_id);
expectedDocHitInfo2.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
EXPECT_THAT(results.query_term_iterators, SizeIs(2));
EXPECT_THAT(results.query_terms, SizeIs(2));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, TypePropertyFilter) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email")
.AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("baz")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message")
.AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("baz")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
// SectionIds are assigned in ascending order per schema type,
// alphabetically.
int email_bar_section_id = 0;
int email_baz_section_id = 1;
int email_foo_section_id = 2;
int message_bar_section_id = 0;
int message_baz_section_id = 1;
int message_foo_section_id = 2;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Poplate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal" in all sections
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
term_match_type, "animal"),
IsOk());
// Message document has content "animal" in all sections
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
search_spec.set_query("animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
// email has property filters for foo and baz properties
TypePropertyMask *email_mask = search_spec.add_type_property_filters();
email_mask->set_schema_type("email");
email_mask->add_paths("foo");
email_mask->add_paths("baz");
// message has property filters for bar and baz properties
TypePropertyMask *message_mask = search_spec.add_type_property_filters();
message_mask->set_schema_type("message");
message_mask->add_paths("bar");
message_mask->add_paths("baz");
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Ordered by descending DocumentId, so message comes first since it was
// inserted last
DocHitInfo expected_doc_hit_info1(message_document_id);
expected_doc_hit_info1.UpdateSection(message_bar_section_id);
expected_doc_hit_info1.UpdateSection(message_baz_section_id);
DocHitInfo expected_doc_hit_info2(email_document_id);
expected_doc_hit_info2.UpdateSection(email_foo_section_id);
expected_doc_hit_info2.UpdateSection(email_baz_section_id);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expected_doc_hit_info1, expected_doc_hit_info2));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, TypePropertyFilterWithSectionRestrict) {
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email")
.AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("baz")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.AddType(SchemaTypeConfigBuilder().SetType("message")
.AddProperty(
PropertyConfigBuilder()
.SetName("foo")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("bar")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(
PropertyConfigBuilder()
.SetName("baz")
.SetDataTypeString(TERM_MATCH_EXACT, TOKENIZER_PLAIN)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
// SectionIds are assigned in ascending order per schema type,
// alphabetically.
int email_bar_section_id = 0;
int email_baz_section_id = 1;
int email_foo_section_id = 2;
int message_bar_section_id = 0;
int message_baz_section_id = 1;
int message_foo_section_id = 2;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
// schema types populated.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.Build()));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("message")
.Build()));
// Poplate the index
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
// Email document has content "animal" in all sections
ASSERT_THAT(AddTokenToIndex(email_document_id, email_foo_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(email_document_id, email_bar_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(email_document_id, email_baz_section_id,
term_match_type, "animal"),
IsOk());
// Message document has content "animal" in all sections
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(message_document_id, message_bar_section_id,
term_match_type, "animal"),
IsOk());
ASSERT_THAT(AddTokenToIndex(message_document_id, message_baz_section_id,
term_match_type, "animal"),
IsOk());
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
// email has property filters for foo and baz properties
TypePropertyMask *email_mask = search_spec.add_type_property_filters();
email_mask->set_schema_type("email");
email_mask->add_paths("foo");
email_mask->add_paths("baz");
// message has property filters for bar and baz properties
TypePropertyMask *message_mask = search_spec.add_type_property_filters();
message_mask->set_schema_type("message");
message_mask->add_paths("bar");
message_mask->add_paths("baz");
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
fake_clock_.GetSystemTimeMilliseconds()));
// Only hits in sections allowed by both the property filters and section
// restricts should be returned. Message document should not be returned since
// section foo specified in the section restrict is not allowed by the
// property filters.
DocHitInfo expected_doc_hit_info(email_document_id);
expected_doc_hit_info.UpdateSection(email_foo_section_id);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expected_doc_hit_info));
EXPECT_THAT(results.query_term_iterators, SizeIs(1));
EXPECT_THAT(results.query_terms, SizeIs(1));
EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
}
TEST_P(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// Arbitrary value, just has to be less than the document's creation
// timestamp + ttl
FakeClock fake_clock;
fake_clock.SetSystemTimeMilliseconds(50);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, store_dir_, &fake_clock,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.SetCreationTimestampMs(10)
.SetTtlMs(100)
.Build()));
// Populate the index
int section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
// Perform query
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QueryProcessor> local_query_processor,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
document_store_.get(), schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
local_query_processor->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
DocHitInfo expectedDocHitInfo(document_id);
expectedDocHitInfo.UpdateSection(/*section_id=*/0);
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(expectedDocHitInfo));
}
TEST_P(QueryProcessorTest, DocumentPastTtlFilteredOut) {
// Create the schema and document store
SchemaProto schema = SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
.Build();
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
// Arbitrary value, just has to be greater than the document's creation
// timestamp + ttl
FakeClock fake_clock_local;
fake_clock_local.SetSystemTimeMilliseconds(200);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
CreateDocumentStore(&filesystem_, store_dir_, &fake_clock_local,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("email")
.SetCreationTimestampMs(50)
.SetTtlMs(100)
.Build()));
// Populate the index
int section_id = 0;
TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
EXPECT_THAT(
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
// Perform query
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QueryProcessor> local_query_processor,
QueryProcessor::Create(index_.get(), numeric_index_.get(),
language_segmenter_.get(), normalizer_.get(),
document_store_.get(), schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello");
search_spec.set_term_match_type(term_match_type);
search_spec.set_search_type(GetParam());
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
local_query_processor->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_local.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
}
TEST_P(QueryProcessorTest, NumericFilter) {
if (GetParam() !=
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
GTEST_SKIP() << "Numeric filter is only supported in advanced query.";
}
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
.SetType("transaction")
.AddProperty(PropertyConfigBuilder()
.SetName("price")
.SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL))
.AddProperty(PropertyConfigBuilder()
.SetName("cost")
.SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
// SectionIds are assigned alphabetically
SectionId cost_section_id = 0;
SectionId price_section_id = 1;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("transaction")
.AddInt64Property("price", 10)
.Build()));
ICING_ASSERT_OK(
AddToNumericIndex(document_one_id, "price", price_section_id, 10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_two_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "2")
.SetSchema("transaction")
.AddInt64Property("price", 25)
.Build()));
ICING_ASSERT_OK(
AddToNumericIndex(document_two_id, "price", price_section_id, 25));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_three_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "3")
.SetSchema("transaction")
.AddInt64Property("cost", 2)
.Build()));
ICING_ASSERT_OK(
AddToNumericIndex(document_three_id, "cost", cost_section_id, 2));
SearchSpecProto search_spec;
search_spec.set_query("price < 20");
search_spec.set_search_type(GetParam());
search_spec.add_enabled_features(std::string(kNumericSearchFeature));
ICING_ASSERT_OK_AND_ASSIGN(
QueryResults results,
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_one_id, std::vector<SectionId>{price_section_id})));
search_spec.set_query("price == 25");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_two_id, std::vector<SectionId>{price_section_id})));
search_spec.set_query("cost > 2");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
search_spec.set_query("cost >= 2");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(
document_three_id, std::vector<SectionId>{cost_section_id})));
search_spec.set_query("price <= 25");
ICING_ASSERT_OK_AND_ASSIGN(
results, query_processor_->ParseSearch(
search_spec, ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds()));
EXPECT_THAT(
GetDocHitInfos(results.root_iterator.get()),
ElementsAre(EqualsDocHitInfo(document_two_id,
std::vector<SectionId>{price_section_id}),
EqualsDocHitInfo(document_one_id,
std::vector<SectionId>{price_section_id})));
}
TEST_P(QueryProcessorTest, NumericFilterWithoutEnablingFeatureFails) {
if (GetParam() !=
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY) {
GTEST_SKIP() << "Numeric filter is only supported in advanced query.";
}
// Create the schema and document store
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder()
.SetType("transaction")
.AddProperty(PropertyConfigBuilder()
.SetName("price")
.SetDataTypeInt64(NUMERIC_MATCH_RANGE)
.SetCardinality(CARDINALITY_OPTIONAL)))
.Build();
SectionId price_section_id = 0;
ASSERT_THAT(schema_store_->SetSchema(
schema, /*ignore_errors_and_delete_documents=*/false,
/*allow_circular_schema_definitions=*/false),
IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_one_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema("transaction")
.AddInt64Property("price", 10)
.Build()));
ICING_ASSERT_OK(
AddToNumericIndex(document_one_id, "price", price_section_id, 10));
SearchSpecProto search_spec;
search_spec.set_query("price < 20");
search_spec.set_search_type(GetParam());
libtextclassifier3::StatusOr<QueryResults> result_or =
query_processor_->ParseSearch(search_spec,
ScoringSpecProto::RankingStrategy::NONE,
fake_clock_.GetSystemTimeMilliseconds());
EXPECT_THAT(result_or,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
INSTANTIATE_TEST_SUITE_P(
QueryProcessorTest, QueryProcessorTest,
testing::Values(
SearchSpecProto::SearchType::ICING_RAW_QUERY,
SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY));
} // namespace
} // namespace lib
} // namespace icing