| // Copyright (C) 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include <cstdint> |
| #include <limits> |
| #include <memory> |
| #include <string> |
| #include <utility> |
| |
| #include "icing/text_classifier/lib3/utils/base/status.h" |
| #include "gmock/gmock.h" |
| #include "gtest/gtest.h" |
| #include "icing/document-builder.h" |
| #include "icing/file/filesystem.h" |
| #include "icing/file/mock-filesystem.h" |
| #include "icing/file/version-util.h" |
| #include "icing/icing-search-engine.h" |
| #include "icing/index/index-processor.h" |
| #include "icing/index/index.h" |
| #include "icing/index/integer-section-indexing-handler.h" |
| #include "icing/index/numeric/integer-index.h" |
| #include "icing/index/string-section-indexing-handler.h" |
| #include "icing/jni/jni-cache.h" |
| #include "icing/join/doc-join-info.h" |
| #include "icing/join/join-processor.h" |
| #include "icing/join/qualified-id-join-index.h" |
| #include "icing/join/qualified-id-join-indexing-handler.h" |
| #include "icing/legacy/index/icing-filesystem.h" |
| #include "icing/legacy/index/icing-mock-filesystem.h" |
| #include "icing/portable/endian.h" |
| #include "icing/portable/equals-proto.h" |
| #include "icing/portable/platform.h" |
| #include "icing/proto/debug.pb.h" |
| #include "icing/proto/document.pb.h" |
| #include "icing/proto/document_wrapper.pb.h" |
| #include "icing/proto/initialize.pb.h" |
| #include "icing/proto/logging.pb.h" |
| #include "icing/proto/optimize.pb.h" |
| #include "icing/proto/persist.pb.h" |
| #include "icing/proto/reset.pb.h" |
| #include "icing/proto/schema.pb.h" |
| #include "icing/proto/scoring.pb.h" |
| #include "icing/proto/search.pb.h" |
| #include "icing/proto/status.pb.h" |
| #include "icing/proto/storage.pb.h" |
| #include "icing/proto/term.pb.h" |
| #include "icing/proto/usage.pb.h" |
| #include "icing/query/query-features.h" |
| #include "icing/schema-builder.h" |
| #include "icing/schema/schema-store.h" |
| #include "icing/store/document-id.h" |
| #include "icing/store/document-log-creator.h" |
| #include "icing/testing/common-matchers.h" |
| #include "icing/testing/fake-clock.h" |
| #include "icing/testing/icu-data-file-helper.h" |
| #include "icing/testing/jni-test-helpers.h" |
| #include "icing/testing/test-data.h" |
| #include "icing/testing/tmp-directory.h" |
| #include "icing/tokenization/language-segmenter-factory.h" |
| #include "icing/tokenization/language-segmenter.h" |
| #include "icing/transform/normalizer-factory.h" |
| #include "icing/transform/normalizer.h" |
| #include "icing/util/tokenized-document.h" |
| #include "unicode/uloc.h" |
| |
| namespace icing { |
| namespace lib { |
| |
| namespace { |
| |
| using ::icing::lib::portable_equals_proto::EqualsProto; |
| using ::testing::_; |
| using ::testing::AtLeast; |
| using ::testing::DoDefault; |
| using ::testing::EndsWith; |
| using ::testing::Eq; |
| using ::testing::HasSubstr; |
| using ::testing::IsEmpty; |
| using ::testing::Matcher; |
| using ::testing::Ne; |
| using ::testing::Return; |
| using ::testing::SizeIs; |
| |
| constexpr std::string_view kIpsumText = |
| "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis " |
| "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida " |
| "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam " |
| "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo " |
| "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, " |
| "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula " |
| "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et " |
| "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, " |
| "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis " |
| "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. " |
| "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. " |
| "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur " |
| "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh " |
| "placerat semper."; |
| |
| PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader( |
| Filesystem filesystem, const std::string& file_path) { |
| PortableFileBackedProtoLog<DocumentWrapper>::Header header; |
| filesystem.PRead(file_path.c_str(), &header, |
| sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header), |
| /*offset=*/0); |
| return header; |
| } |
| |
| void WriteDocumentLogHeader( |
| Filesystem filesystem, const std::string& file_path, |
| PortableFileBackedProtoLog<DocumentWrapper>::Header& header) { |
| filesystem.Write(file_path.c_str(), &header, |
| sizeof(PortableFileBackedProtoLog<DocumentWrapper>::Header)); |
| } |
| |
| // For mocking purpose, we allow tests to provide a custom Filesystem. |
| class TestIcingSearchEngine : public IcingSearchEngine { |
| public: |
| TestIcingSearchEngine(const IcingSearchEngineOptions& options, |
| std::unique_ptr<const Filesystem> filesystem, |
| std::unique_ptr<const IcingFilesystem> icing_filesystem, |
| std::unique_ptr<Clock> clock, |
| std::unique_ptr<JniCache> jni_cache) |
| : IcingSearchEngine(options, std::move(filesystem), |
| std::move(icing_filesystem), std::move(clock), |
| std::move(jni_cache)) {} |
| }; |
| |
| std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; } |
| |
| // This test is meant to cover all tests relating to |
| // IcingSearchEngine::Initialize. |
| class IcingSearchEngineInitializationTest : public testing::Test { |
| protected: |
| void SetUp() override { |
| if (!IsCfStringTokenization() && !IsReverseJniTokenization()) { |
| // If we've specified using the reverse-JNI method for segmentation (i.e. |
| // not ICU), then we won't have the ICU data file included to set up. |
| // Technically, we could choose to use reverse-JNI for segmentation AND |
| // include an ICU data file, but that seems unlikely and our current BUILD |
| // setup doesn't do this. |
| // File generated via icu_data_file rule in //icing/BUILD. |
| std::string icu_data_file_path = |
| GetTestFilePath("icing/icu.dat"); |
| ICING_ASSERT_OK( |
| icu_data_file_helper::SetUpICUDataFile(icu_data_file_path)); |
| } |
| filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str()); |
| |
| language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| lang_segmenter_, |
| language_segmenter_factory::Create(std::move(segmenter_options))); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| normalizer_, |
| normalizer_factory::Create( |
| /*max_term_byte_size=*/std::numeric_limits<int32_t>::max())); |
| } |
| |
| void TearDown() override { |
| normalizer_.reset(); |
| lang_segmenter_.reset(); |
| filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str()); |
| } |
| |
| const Filesystem* filesystem() const { return &filesystem_; } |
| |
| const IcingFilesystem* icing_filesystem() const { return &icing_filesystem_; } |
| |
| Filesystem filesystem_; |
| IcingFilesystem icing_filesystem_; |
| std::unique_ptr<LanguageSegmenter> lang_segmenter_; |
| std::unique_ptr<Normalizer> normalizer_; |
| }; |
| |
| // Non-zero value so we don't override it to be the current time |
| constexpr int64_t kDefaultCreationTimestampMs = 1575492852000; |
| |
| std::string GetVersionFilename() { return GetTestBaseDir() + "/version"; } |
| |
| std::string GetDocumentDir() { return GetTestBaseDir() + "/document_dir"; } |
| |
| std::string GetIndexDir() { return GetTestBaseDir() + "/index_dir"; } |
| |
| std::string GetIntegerIndexDir() { |
| return GetTestBaseDir() + "/integer_index_dir"; |
| } |
| |
| std::string GetQualifiedIdJoinIndexDir() { |
| return GetTestBaseDir() + "/qualified_id_join_index_dir"; |
| } |
| |
| std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; } |
| |
| std::string GetHeaderFilename() { |
| return GetTestBaseDir() + "/icing_search_engine_header"; |
| } |
| |
| IcingSearchEngineOptions GetDefaultIcingOptions() { |
| IcingSearchEngineOptions icing_options; |
| icing_options.set_base_dir(GetTestBaseDir()); |
| return icing_options; |
| } |
| |
| DocumentProto CreateMessageDocument(std::string name_space, std::string uri) { |
| return DocumentBuilder() |
| .SetKey(std::move(name_space), std::move(uri)) |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| } |
| |
| DocumentProto CreateEmailDocument(const std::string& name_space, |
| const std::string& uri, int score, |
| const std::string& subject_content, |
| const std::string& body_content) { |
| return DocumentBuilder() |
| .SetKey(name_space, uri) |
| .SetSchema("Email") |
| .SetScore(score) |
| .AddStringProperty("subject", subject_content) |
| .AddStringProperty("body", body_content) |
| .Build(); |
| } |
| |
| SchemaTypeConfigProto CreateMessageSchemaTypeConfig() { |
| return SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .Build(); |
| } |
| |
| SchemaTypeConfigProto CreateEmailSchemaTypeConfig() { |
| return SchemaTypeConfigBuilder() |
| .SetType("Email") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("subject") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .Build(); |
| } |
| |
| SchemaProto CreateMessageSchema() { |
| return SchemaBuilder().AddType(CreateMessageSchemaTypeConfig()).Build(); |
| } |
| |
| SchemaProto CreateEmailSchema() { |
| return SchemaBuilder().AddType(CreateEmailSchemaTypeConfig()).Build(); |
| } |
| |
| ScoringSpecProto GetDefaultScoringSpec() { |
| ScoringSpecProto scoring_spec; |
| scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE); |
| return scoring_spec; |
| } |
| |
| // TODO(b/272145329): create SearchSpecBuilder, JoinSpecBuilder, |
| // SearchResultProtoBuilder and ResultProtoBuilder for unit tests and build all |
| // instances by them. |
| |
| TEST_F(IcingSearchEngineInitializationTest, UninitializedInstanceFailsSafely) { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| |
| SchemaProto email_schema = CreateMessageSchema(); |
| EXPECT_THAT(icing.SetSchema(email_schema).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.GetSchema().status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| |
| DocumentProto doc = CreateMessageDocument("namespace", "uri"); |
| EXPECT_THAT(icing.Put(doc).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing |
| .Get(doc.namespace_(), doc.uri(), |
| GetResultSpecProto::default_instance()) |
| .status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type()) |
| .status() |
| .code(), |
| Eq(StatusProto::FAILED_PRECONDITION)); |
| |
| SearchSpecProto search_spec = SearchSpecProto::default_instance(); |
| ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance(); |
| ResultSpecProto result_spec = ResultSpecProto::default_instance(); |
| EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| constexpr int kSomePageToken = 12; |
| EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash. |
| |
| EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| EXPECT_THAT(icing.Optimize().status(), |
| ProtoStatusIs(StatusProto::FAILED_PRECONDITION)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, SimpleInitialization) { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| |
| DocumentProto document = CreateMessageDocument("namespace", "uri"); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk()); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializingAgainSavesNonPersistedData) { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| |
| DocumentProto document = CreateMessageDocument("namespace", "uri"); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = document; |
| |
| ASSERT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| MaxIndexMergeSizeReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(std::numeric_limits<int32_t>::max()); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| NegativeMergeSizeReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(-1); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| ZeroMergeSizeReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(0); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, GoodIndexMergeSizeReturnsOk) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| // One is fine, if a bit weird. It just means that the lite index will be |
| // smaller and will request a merge any time content is added to it. |
| options.set_index_merge_size(1); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| NegativeMaxTokenLenReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_max_token_length(-1); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| ZeroMaxTokenLenReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_max_token_length(0); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| NegativeCompressionLevelReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_compression_level(-1); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| GreaterThanMaxCompressionLevelReturnsInvalidArgument) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_compression_level(10); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INVALID_ARGUMENT)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, GoodCompressionLevelReturnsOk) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_compression_level(0); |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| ReinitializingWithDifferentCompressionLevelReturnsOk) { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_compression_level(3); |
| { |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| |
| DocumentProto document = CreateMessageDocument("namespace", "uri"); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk()); |
| } |
| options.set_compression_level(9); |
| { |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| } |
| options.set_compression_level(0); |
| { |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, FailToCreateDocStore) { |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| // This fails DocumentStore::Create() |
| ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_)) |
| .WillByDefault(Return(false)); |
| |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), |
| ProtoStatusIs(StatusProto::INTERNAL)); |
| EXPECT_THAT(initialize_result_proto.status().message(), |
| HasSubstr("Could not create directory")); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitMarkerFilePreviousFailuresAtThreshold) { |
| Filesystem filesystem; |
| DocumentProto email1 = |
| CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); |
| email1.set_creation_timestamp_ms(10000); |
| DocumentProto email2 = |
| CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); |
| email2.set_creation_timestamp_ms(10000); |
| |
| { |
| // Create an index with a few documents. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoIsOk()); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(0)); |
| ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); |
| } |
| |
| // Write an init marker file with 5 previously failed attempts. |
| std::string marker_filepath = GetTestBaseDir() + "/init_marker"; |
| |
| { |
| ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); |
| int network_init_attempts = GHostToNetworkL(5); |
| // Write the updated number of attempts before we get started. |
| ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, |
| &network_init_attempts, |
| sizeof(network_init_attempts))); |
| ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); |
| } |
| |
| { |
| // Create the index again and verify that initialization succeeds and no |
| // data is thrown out. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoIsOk()); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(5)); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) |
| .document(), |
| EqualsProto(email1)); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) |
| .document(), |
| EqualsProto(email2)); |
| } |
| |
| // The successful init should have thrown out the marker file. |
| ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitMarkerFilePreviousFailuresBeyondThreshold) { |
| Filesystem filesystem; |
| DocumentProto email1 = |
| CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); |
| DocumentProto email2 = |
| CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); |
| |
| { |
| // Create an index with a few documents. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoIsOk()); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(0)); |
| ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); |
| } |
| |
| // Write an init marker file with 6 previously failed attempts. |
| std::string marker_filepath = GetTestBaseDir() + "/init_marker"; |
| |
| { |
| ScopedFd marker_file_fd(filesystem.OpenForWrite(marker_filepath.c_str())); |
| int network_init_attempts = GHostToNetworkL(6); |
| // Write the updated number of attempts before we get started. |
| ASSERT_TRUE(filesystem.PWrite(marker_file_fd.get(), 0, |
| &network_init_attempts, |
| sizeof(network_init_attempts))); |
| ASSERT_TRUE(filesystem.DataSync(marker_file_fd.get())); |
| } |
| |
| { |
| // Create the index again and verify that initialization succeeds and all |
| // data is thrown out. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), |
| ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(6)); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) |
| .status(), |
| ProtoStatusIs(StatusProto::NOT_FOUND)); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) |
| .status(), |
| ProtoStatusIs(StatusProto::NOT_FOUND)); |
| } |
| |
| // The successful init should have thrown out the marker file. |
| ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| SuccessiveInitFailuresIncrementsInitMarker) { |
| Filesystem filesystem; |
| DocumentProto email1 = |
| CreateEmailDocument("namespace", "uri1", 100, "subject1", "body1"); |
| DocumentProto email2 = |
| CreateEmailDocument("namespace", "uri2", 50, "subject2", "body2"); |
| |
| { |
| // 1. Create an index with a few documents. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoIsOk()); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(0)); |
| ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email1).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(email2).status(), ProtoIsOk()); |
| } |
| |
| { |
| // 2. Create an index that will encounter an IO failure when trying to |
| // create the document log. |
| IcingSearchEngineOptions icing_options = GetDefaultIcingOptions(); |
| |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| std::string document_log_filepath = |
| icing_options.base_dir() + "/document_dir/document_log_v1"; |
| ON_CALL(*mock_filesystem, |
| GetFileSize(Matcher<const char*>(Eq(document_log_filepath)))) |
| .WillByDefault(Return(Filesystem::kBadFileSize)); |
| |
| TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| // Fail to initialize six times in a row. |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(0)); |
| |
| init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(1)); |
| |
| init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(2)); |
| |
| init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(3)); |
| |
| init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(4)); |
| |
| init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), ProtoStatusIs(StatusProto::INTERNAL)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(5)); |
| } |
| |
| { |
| // 3. Create the index again and verify that initialization succeeds and all |
| // data is thrown out. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| ASSERT_THAT(init_result.status(), |
| ProtoStatusIs(StatusProto::WARNING_DATA_LOSS)); |
| ASSERT_THAT(init_result.initialize_stats().num_previous_init_failures(), |
| Eq(6)); |
| |
| EXPECT_THAT( |
| icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()) |
| .status(), |
| ProtoStatusIs(StatusProto::NOT_FOUND)); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()) |
| .status(), |
| ProtoStatusIs(StatusProto::NOT_FOUND)); |
| } |
| |
| // The successful init should have thrown out the marker file. |
| std::string marker_filepath = GetTestBaseDir() + "/init_marker"; |
| ASSERT_FALSE(filesystem.FileExists(marker_filepath.c_str())); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, RecoverFromMissingHeaderFile) { |
| SearchSpecProto search_spec; |
| search_spec.set_query("message"); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| CreateMessageDocument("namespace", "uri"); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = |
| CreateMessageDocument("namespace", "uri"); |
| |
| { |
| // Basic initialization/setup |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), |
| ProtoIsOk()); |
| EXPECT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str())); |
| |
| // We should be able to recover from this and access all our previous data |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| // Checks that DocumentLog is still ok |
| EXPECT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| |
| // Checks that the term index is still ok so we can search over it |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Checks that the integer index is still ok so we can search over it |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto search_result_google::protobuf = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Checks that Schema is still since it'll be needed to validate the document |
| EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), |
| ProtoIsOk()); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, UnableToRecoverFromCorruptSchema) { |
| { |
| // Basic initialization/setup |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), |
| ProtoIsOk()); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = |
| CreateMessageDocument("namespace", "uri"); |
| |
| EXPECT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| const std::string schema_file = |
| absl_ports::StrCat(GetSchemaDir(), "/schema.pb"); |
| const std::string corrupt_data = "1234"; |
| EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(), |
| corrupt_data.size())); |
| |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INTERNAL)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| UnableToRecoverFromCorruptDocumentLog) { |
| { |
| // Basic initialization/setup |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(), |
| ProtoIsOk()); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = |
| CreateMessageDocument("namespace", "uri"); |
| |
| EXPECT_THAT( |
| icing.Get("namespace", "uri", GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| const std::string document_log_file = absl_ports::StrCat( |
| GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); |
| const std::string corrupt_data = "1234"; |
| EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(), |
| corrupt_data.data(), corrupt_data.size())); |
| |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), |
| ProtoStatusIs(StatusProto::INTERNAL)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RecoverFromInconsistentSchemaStore) { |
| DocumentProto document1 = CreateMessageDocument("namespace", "uri1"); |
| DocumentProto document2_with_additional_property = |
| DocumentBuilder() |
| .SetKey("namespace", "uri2") |
| .SetSchema("Message") |
| .AddStringProperty("additional", "content") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| { |
| // Initializes folder and schema |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType( |
| SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) |
| // Add non-indexable property "additional" |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("additional") |
| .SetDataType(TYPE_STRING) |
| .SetCardinality(CARDINALITY_OPTIONAL))) |
| .Build(); |
| |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document2_with_additional_property).status(), |
| ProtoIsOk()); |
| |
| // Won't get us anything because "additional" isn't marked as an indexed |
| // property in the schema |
| SearchSpecProto search_spec; |
| search_spec.set_query("additional:content"); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| { |
| // This schema will change the SchemaTypeIds from the previous schema_ |
| // (since SchemaTypeIds are assigned based on order of the types, and this |
| // new schema changes the ordering of previous types) |
| SchemaProto new_schema; |
| auto type = new_schema.add_types(); |
| type->set_schema_type("Email"); |
| |
| // Switching a non-indexable property to indexable changes the SectionIds |
| // (since SectionIds are assigned based on alphabetical order of indexed |
| // sections, marking "additional" as an indexed property will push the |
| // "body" and "indexableInteger" property to different SectionIds) |
| *new_schema.add_types() = |
| SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) |
| .AddProperty( |
| PropertyConfigBuilder() |
| .SetName("additional") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_OPTIONAL)) |
| .Build(); |
| |
| // Write the marker file |
| std::string marker_filepath = |
| absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); |
| ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); |
| ASSERT_TRUE(sfd.is_valid()); |
| |
| // Write the new schema |
| FakeClock fake_clock; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); |
| ICING_EXPECT_OK(schema_store->SetSchema( |
| new_schema, /*ignore_errors_and_delete_documents=*/false, |
| /*allow_circular_schema_definitions=*/false)); |
| } // Will persist new schema |
| |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| // We can insert a Email document since we kept the new schema |
| DocumentProto email_document = |
| DocumentBuilder() |
| .SetKey("namespace", "email_uri") |
| .SetSchema("Email") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk()); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = email_document; |
| |
| EXPECT_THAT(icing.Get("namespace", "email_uri", |
| GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| |
| // Verify term search |
| SearchSpecProto search_spec1; |
| |
| // The section restrict will ensure we are using the correct, updated |
| // SectionId in the Index |
| search_spec1.set_query("additional:content"); |
| |
| // Schema type filter will ensure we're using the correct, updated |
| // SchemaTypeId in the DocumentStore |
| search_spec1.add_schema_type_filters("Message"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| SearchResultProto expected_search_result_proto1; |
| expected_search_result_proto1.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto1.mutable_results()->Add()->mutable_document() = |
| document2_with_additional_property; |
| |
| SearchResultProto search_result_proto1 = |
| icing.Search(search_spec1, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto1)); |
| |
| // Verify numeric (integer) search |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec1.add_schema_type_filters("Message"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto expected_search_result_google::protobuf; |
| expected_search_result_google::protobuf.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() = |
| document2_with_additional_property; |
| *expected_search_result_google::protobuf.mutable_results()->Add()->mutable_document() = |
| document1; |
| |
| SearchResultProto search_result_google::protobuf = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_google::protobuf)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RecoverFromInconsistentDocumentStore) { |
| // Test the following scenario: document store is ahead of term, integer and |
| // qualified id join index. IcingSearchEngine should be able to recover all |
| // indices. Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| // - Still, we need to replay and reindex documents. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message1 = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body one") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message2 = |
| DocumentBuilder() |
| .SetKey("namespace", "message/2") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body two") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| { |
| // Initializes folder and schema, index one document |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message1).status(), ProtoIsOk()); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| { |
| FakeClock fake_clock; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); |
| |
| // Puts message2 into DocumentStore but doesn't index it. |
| ICING_ASSERT_OK_AND_ASSIGN( |
| DocumentStore::CreateResult create_result, |
| DocumentStore::Create( |
| filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(), |
| /*force_recovery_and_revalidate_documents=*/false, |
| /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false, |
| /*use_persistent_hash_map=*/false, |
| PortableFileBackedProtoLog< |
| DocumentWrapper>::kDeflateCompressionLevel, |
| /*initialize_stats=*/nullptr)); |
| std::unique_ptr<DocumentStore> document_store = |
| std::move(create_result.document_store); |
| |
| ICING_EXPECT_OK(document_store->Put(message2)); |
| } |
| |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| EXPECT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Index Restoration should be triggered here and document2 should be |
| // indexed. |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| |
| GetResultProto expected_get_result_proto; |
| expected_get_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_get_result_proto.mutable_document() = message1; |
| |
| // DocumentStore kept the additional document |
| EXPECT_THAT(icing.Get("namespace", "message/1", |
| GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| |
| *expected_get_result_proto.mutable_document() = message2; |
| EXPECT_THAT(icing.Get("namespace", "message/2", |
| GetResultSpecProto::default_instance()), |
| EqualsProto(expected_get_result_proto)); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| message2; |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| message1; |
| |
| // We indexed the additional document in all indices. |
| // Verify term search |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("message"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto search_result_proto1 = |
| icing.Search(search_spec1, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Verify numeric (integer) search |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto search_result_google::protobuf = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Verify join search: join a query for `name:person` with a child query for |
| // `body:message` based on the child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:message"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto expected_join_search_result_proto; |
| expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| SearchResultProto::ResultProto* result_proto = |
| expected_join_search_result_proto.mutable_results()->Add(); |
| *result_proto->mutable_document() = person; |
| *result_proto->mutable_joined_results()->Add()->mutable_document() = message2; |
| *result_proto->mutable_joined_results()->Add()->mutable_document() = message1; |
| |
| SearchResultProto search_result_proto3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( |
| expected_join_search_result_proto)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIndex) { |
| // Test the following scenario: term index is corrupted (e.g. checksum doesn't |
| // match). IcingSearchEngine should be able to recover term index. Several |
| // additional behaviors are also tested: |
| // - Index directory handling: |
| // - Should discard the entire term index directory and start it from |
| // scratch. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect since we start it |
| // from scratch. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| SearchSpecProto search_spec; |
| search_spec.set_query("body:message"); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| message; |
| |
| { |
| // Initializes folder and schema, index one document |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| // Manually corrupt term index |
| { |
| const std::string index_hit_buffer_file = GetIndexDir() + "/idx/lite.hb"; |
| ScopedFd fd(filesystem()->OpenForWrite(index_hit_buffer_file.c_str())); |
| ASSERT_TRUE(fd.is_valid()); |
| ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); |
| } |
| |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should be discarded once. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(1); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory "*/integer_index_dir/*" |
| // should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| EXPECT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Check that our index is ok by searching over the restored index |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, RecoverFromCorruptIntegerIndex) { |
| // Test the following scenario: integer index is corrupted (e.g. checksum |
| // doesn't match). IcingSearchEngine should be able to recover integer index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Should discard the entire integer index directory and start it from |
| // scratch. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded, since we start it from scratch. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| SearchSpecProto search_spec; |
| search_spec.set_query("indexableInteger == 123"); |
| search_spec.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| message; |
| |
| { |
| // Initializes folder and schema, index one document |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| // Manually corrupt integer index |
| { |
| const std::string integer_index_metadata_file = |
| GetIntegerIndexDir() + "/integer_index.m"; |
| ScopedFd fd( |
| filesystem()->OpenForWrite(integer_index_metadata_file.c_str())); |
| ASSERT_TRUE(fd.is_valid()); |
| ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); |
| } |
| |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should be discarded once, and Clear() |
| // should never be called (i.e. storage sub directory "*/integer_index_dir/*" |
| // should never be discarded) since we start it from scratch. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(1); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| EXPECT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Check that our index is ok by searching over the restored index |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RecoverFromIntegerIndexBucketSplitThresholdChange) { |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddInt64Property("indexableInteger", 123) |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with a message document. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Create the index again with different |
| // integer_index_bucket_split_threshold. This should trigger index |
| // restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should be discarded once, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded) since we start it from |
| // scratch. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(1); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| static constexpr int32_t kNewIntegerIndexBucketSplitThreshold = 1000; |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| ASSERT_THAT(kNewIntegerIndexBucketSplitThreshold, |
| Ne(options.integer_index_bucket_split_threshold())); |
| options.set_integer_index_bucket_split_threshold( |
| kNewIntegerIndexBucketSplitThreshold); |
| |
| TestIcingSearchEngine icing(options, std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec; |
| search_spec.set_query("indexableInteger == 123"); |
| search_spec.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results = |
| icing.Search(search_spec, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results.results(), SizeIs(1)); |
| EXPECT_THAT(results.results(0).document().uri(), Eq("message/1")); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RecoverFromCorruptQualifiedIdJoinIndex) { |
| // Test the following scenario: qualified id join index is corrupted (e.g. |
| // checksum doesn't match). IcingSearchEngine should be able to recover |
| // qualified id join index. Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Should discard the entire qualified id join index directory and start |
| // it from scratch. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded, since we start |
| // it from scratch. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // Prepare join search spec to join a query for `name:person` with a child |
| // query for `body:message` based on the child's `senderQualifiedId` field. |
| SearchSpecProto search_spec; |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:message"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec = ResultSpecProto::default_instance(); |
| result_spec.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| SearchResultProto::ResultProto* result_proto = |
| expected_search_result_proto.mutable_results()->Add(); |
| *result_proto->mutable_document() = person; |
| *result_proto->mutable_joined_results()->Add()->mutable_document() = message; |
| |
| { |
| // Initializes folder and schema, index one document |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| // Manually corrupt qualified id join index |
| { |
| const std::string qualified_id_join_index_metadata_file = |
| GetQualifiedIdJoinIndexDir() + "/metadata"; |
| ScopedFd fd(filesystem()->OpenForWrite( |
| qualified_id_join_index_metadata_file.c_str())); |
| ASSERT_TRUE(fd.is_valid()); |
| ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4)); |
| } |
| |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory "*/integer_index_dir/*" |
| // should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should be discarded once, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(1); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| EXPECT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| |
| // Check that our index is ok by searching over the restored index |
| SearchResultProto search_result_proto = |
| icing.Search(search_spec, GetDefaultScoringSpec(), result_spec); |
| EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseTermIndex) { |
| // Test the following scenario: losing the entire term index. Since we need |
| // flash index magic to determine the version, in this test we will throw out |
| // the entire term index and re-initialize an empty one, to bypass |
| // undetermined version state change and correctly trigger "lose term index" |
| // scenario. |
| // IcingSearchEngine should be able to recover term index. Several additional |
| // behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should not be discarded (but instead just being |
| // rebuilt by replaying all docs). |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect since it is empty. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with 3 message documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Delete and re-initialize an empty term index to trigger |
| // RestoreIndexIfNeeded. |
| { |
| std::string idx_subdir = GetIndexDir() + "/idx"; |
| ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create(Index::Options(GetIndexDir(), |
| /*index_merge_size=*/100, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/50), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK(index->PersistToDisk()); |
| } |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded since we've already |
| // lost it. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // All documents should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, RestoreIndexLoseIntegerIndex) { |
| // Test the following scenario: losing the entire integer index directory. |
| // IcingSearchEngine should be able to recover integer index. Several |
| // additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should not be discarded since we've already |
| // lost it. Start it from scratch. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded, since we start it from scratch. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with 3 message documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Delete the integer index file to trigger RestoreIndexIfNeeded. |
| std::string integer_index_dir = GetIntegerIndexDir(); |
| filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str()); |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded since we've |
| // already lost it, and Clear() should never be called (i.e. storage sub |
| // directory "*/integer_index_dir/*" should never be discarded) since we |
| // start it from scratch. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // All documents should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexLoseQualifiedIdJoinIndex) { |
| // Test the following scenario: losing the entire qualified id join index |
| // directory. IcingSearchEngine should be able to recover qualified id join |
| // index. Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should not be discarded since we've |
| // already lost it. Start it from scratch. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded, since we start |
| // it from scratch. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with 3 message documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Delete the qualified id join index file to trigger RestoreIndexIfNeeded. |
| std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir(); |
| filesystem()->DeleteDirectoryRecursively(qualified_id_join_index_dir.c_str()); |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded since we've |
| // already lost it, and Clear() should never be called (i.e. storage sub |
| // directory "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded) |
| // since we start it from scratch. |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // All documents should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateLiteIndexWithoutReindexing) { |
| // Test the following scenario: term lite index is *completely* ahead of |
| // document store. IcingSearchEngine should be able to recover term index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index should take effect and throw out the |
| // entire lite index. This should be sufficient to make term index |
| // consistent with document store, so reindexing should not take place. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with a LiteIndex that will only allow a person and a |
| // message document before needing a merge. |
| { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(person.ByteSizeLong() + |
| message.ByteSizeLong()); |
| TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| // Add two message documents. These should get merged into the main index. |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into term lite index and increment |
| // last_added_document_id, but don't merge into the main index. This will |
| // cause mismatched last_added_document_id with term index. |
| // - Document store: [0, 1, 2] |
| // - Term index |
| // - Main index: [0, 1, 2] |
| // - Lite index: [3] |
| // - Integer index: [0, 1, 2] |
| // - Qualified id join index: [0, 1, 2] |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| Index::Options(GetIndexDir(), |
| /*index_merge_size=*/message.ByteSizeLong(), |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/8), |
| filesystem(), icing_filesystem())); |
| DocumentId original_last_added_doc_id = index->last_added_document_id(); |
| index->set_last_added_document_id(original_last_added_doc_id + 1); |
| Index::Editor editor = |
| index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("foo")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| } |
| |
| // 3. Create the index again. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. since we only call |
| // TruncateTo for term index. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(message.ByteSizeLong()); |
| TestIcingSearchEngine icing(options, std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Since truncating lite index is sufficient to make term index consistent |
| // with document store, replaying documents or reindex shouldn't take place. |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // Only the documents that were in the main index should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(2)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(2)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(2)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/1")); |
| } |
| |
| // 4. Since document 3 doesn't exist, testing query = "foo" is not enough to |
| // verify the correctness of term index restoration. Instead, we have to check |
| // hits for "foo" should not be found in term index. |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| Index::Options(GetIndexDir(), |
| /*index_merge_size=*/message.ByteSizeLong(), |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/8), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, |
| index->GetIterator("foo", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateLiteIndexWithReindexing) { |
| // Test the following scenario: term lite index is *partially* ahead of |
| // document store. IcingSearchEngine should be able to recover term index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index should take effect and throw out the |
| // entire lite index. However, some valid data in term lite index were |
| // discarded together, so reindexing should still take place to recover |
| // them after truncating. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with a LiteIndex that will only allow a person and a |
| // message document before needing a merge. |
| { |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(message.ByteSizeLong()); |
| TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| // Add two message documents. These should get merged into the main index. |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| // Add one document. This one should get remain in the lite index. |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into term lite index and increment |
| // last_added_document_id, but don't merge into the main index. This will |
| // cause mismatched last_added_document_id with term index. |
| // - Document store: [0, 1, 2, 3] |
| // - Term index |
| // - Main index: [0, 1, 2] |
| // - Lite index: [3, 4] |
| // - Integer index: [0, 1, 2, 3] |
| // - Qualified id join index: [0, 1, 2, 3] |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| Index::Options(GetIndexDir(), |
| /*index_merge_size=*/message.ByteSizeLong(), |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/8), |
| filesystem(), icing_filesystem())); |
| DocumentId original_last_added_doc_id = index->last_added_document_id(); |
| index->set_last_added_document_id(original_last_added_doc_id + 1); |
| Index::Editor editor = |
| index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("foo")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| } |
| |
| // 3. Create the index again. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. since we only call |
| // TruncateTo for term index. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| options.set_index_merge_size(message.ByteSizeLong()); |
| TestIcingSearchEngine icing(options, std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Truncating lite index not only deletes data ahead document store, but |
| // also deletes valid data. Therefore, we still have to replay documents and |
| // reindex. |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // Only the documents that were in the main index should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| |
| // 4. Since document 4 doesn't exist, testing query = "foo" is not enough to |
| // verify the correctness of term index restoration. Instead, we have to check |
| // hits for "foo" should not be found in term index. |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| Index::Options(GetIndexDir(), |
| /*index_merge_size=*/message.ByteSizeLong(), |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/8), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, |
| index->GetIterator("foo", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateMainIndexWithoutReindexing) { |
| // Test the following scenario: term main index is *completely* ahead of |
| // document store. IcingSearchEngine should be able to recover term index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index should take effect and throw out the |
| // entire lite and main index. This should be sufficient to make term |
| // index consistent with document store (in this case, document store is |
| // empty as well), so reindexing should not take place. |
| // - "Clear()" should be called for integer index. It is a special case when |
| // document store has no document. Since there is no integer index storage |
| // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be |
| // discarded. |
| // - "Clear()" should be called for qualified id join index. It is a special |
| // case when document store has no document. |
| |
| // 1. Create an index with no document. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into term lite index and increment |
| // last_added_document_id. Merge some of them into the main index and keep |
| // others in the lite index. This will cause mismatched document id with |
| // document store. |
| // - Document store: [] |
| // - Term index |
| // - Main index: [0] |
| // - Lite index: [1] |
| // - Integer index: [] |
| // - Qualified id join index: [] |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| // index merge size is not important here because we will manually |
| // invoke merge below. |
| Index::Options(GetIndexDir(), /*index_merge_size=*/100, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/50), |
| filesystem(), icing_filesystem())); |
| // Add hits for document 0 and merge. |
| ASSERT_THAT(index->last_added_document_id(), kInvalidDocumentId); |
| index->set_last_added_document_id(0); |
| Index::Editor editor = |
| index->Edit(/*document_id=*/0, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("foo")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| ICING_ASSERT_OK(index->Merge()); |
| |
| // Add hits for document 1 and don't merge. |
| index->set_last_added_document_id(1); |
| editor = index->Edit(/*document_id=*/1, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("bar")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| } |
| |
| // 3. Create the index again. This should throw out the lite and main index. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. since we only call |
| // TruncateTo for term index. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded. Even though |
| // Clear() was called, it shouldn't take effect since there is no storage |
| // sub directory ("*/integer_index_dir/*") and nothing will be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded. |
| // Clear() was called and should discard and reinitialize the underlying |
| // mapper. |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(AtLeast(1)); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Since truncating main index is sufficient to make term index consistent |
| // with document store, replaying documents or reindexing shouldn't take |
| // place. |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| } |
| |
| // 4. Since document 0, 1 don't exist, testing queries = "foo", "bar" are not |
| // enough to verify the correctness of term index restoration. Instead, we |
| // have to check hits for "foo", "bar" should not be found in term index. |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/50), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, |
| index->GetIterator("foo", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| doc_hit_info_iter, |
| index->GetIterator("bar", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateMainIndexWithReindexing) { |
| // Test the following scenario: term main index is *partially* ahead of |
| // document store. IcingSearchEngine should be able to recover term index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - In RestoreIndexIfNecessary(): |
| // - "TruncateTo()" for term index should take effect and throw out the |
| // entire lite and main index. However, some valid data in term main index |
| // were discarded together, so reindexing should still take place to |
| // recover them after truncating. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with 3 message documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into term lite index and increment |
| // last_added_document_id. Merge some of them into the main index and keep |
| // others in the lite index. This will cause mismatched document id with |
| // document store. |
| // - Document store: [0, 1, 2, 3] |
| // - Term index |
| // - Main index: [0, 1, 2, 3, 4] |
| // - Lite index: [5] |
| // - Integer index: [0, 1, 2, 3] |
| // - Qualified id join index: [0, 1, 2, 3] |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create( |
| Index::Options(GetIndexDir(), |
| /*index_merge_size=*/message.ByteSizeLong(), |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/8), |
| filesystem(), icing_filesystem())); |
| // Add hits for document 4 and merge. |
| DocumentId original_last_added_doc_id = index->last_added_document_id(); |
| index->set_last_added_document_id(original_last_added_doc_id + 1); |
| Index::Editor editor = |
| index->Edit(original_last_added_doc_id + 1, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("foo")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| ICING_ASSERT_OK(index->Merge()); |
| |
| // Add hits for document 5 and don't merge. |
| index->set_last_added_document_id(original_last_added_doc_id + 2); |
| editor = index->Edit(original_last_added_doc_id + 2, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, /*namespace_id=*/0); |
| ICING_ASSERT_OK(editor.BufferTerm("bar")); |
| ICING_ASSERT_OK(editor.IndexAllBufferedTerms()); |
| } |
| |
| // 3. Create the index again. This should throw out the lite and main index |
| // and trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. since we only call |
| // TruncateTo for term index. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Truncating main index not only deletes data ahead document store, but |
| // also deletes valid data. Therefore, we still have to replay documents and |
| // reindex. |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // Only the first document should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| |
| // 4. Since document 4, 5 don't exist, testing queries = "foo", "bar" are not |
| // enough to verify the correctness of term index restoration. Instead, we |
| // have to check hits for "foo", "bar" should not be found in term index. |
| { |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create(Index::Options(GetIndexDir(), /*index_merge_size=*/100, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/50), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<DocHitInfoIterator> doc_hit_info_iter, |
| index->GetIterator("foo", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| doc_hit_info_iter, |
| index->GetIterator("bar", /*term_start_index=*/0, |
| /*unnormalized_term_length=*/0, kSectionIdMaskAll, |
| TermMatchType::EXACT_ONLY)); |
| EXPECT_THAT(doc_hit_info_iter->Advance(), |
| StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateIntegerIndexWithoutReindexing) { |
| // Test the following scenario: integer index is *completely* ahead of |
| // document store. IcingSearchEngine should be able to recover integer index. |
| // Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" should be called for integer index and throw out all integer |
| // index storages, i.e. all storage sub directories (path_expr = |
| // "*/integer_index_dir/*") should be discarded. This should be sufficient |
| // to make integer index consistent with document store (in this case, |
| // document store is empty as well), so reindexing should not take place. |
| // - "Clear()" should be called for qualified id join index. It is a special |
| // case when document store has no document. |
| |
| // 1. Create an index with no document. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into integer index and increment |
| // last_added_document_id. This will cause mismatched document id with |
| // document store. |
| // - Document store: [] |
| // - Term index: [] |
| // - Integer index: [0] |
| // - Qualified id join index: [] |
| { |
| Filesystem filesystem; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<IntegerIndex> integer_index, |
| IntegerIndex::Create(filesystem, GetIntegerIndexDir(), |
| /*num_data_threshold_for_bucket_split=*/65536, |
| /*pre_mapping_fbv=*/false)); |
| // Add hits for document 0. |
| ASSERT_THAT(integer_index->last_added_document_id(), kInvalidDocumentId); |
| integer_index->set_last_added_document_id(0); |
| std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit( |
| /*property_path=*/"indexableInteger", /*document_id=*/0, |
| /*section_id=*/0); |
| ICING_ASSERT_OK(editor->BufferKey(123)); |
| ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys()); |
| } |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| // Clear() should be called to truncate integer index and thus storage sub |
| // directory (path_expr = "*/integer_index_dir/*") should be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(1); |
| // Ensure qualified id join index directory should never be discarded. |
| // Clear() was called and should discard and reinitialize the underlying |
| // mapper. |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(AtLeast(1)); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| // Since truncating integer index is sufficient to make it consistent with |
| // document store, replaying documents or reindexing shouldn't take place. |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify that numeric query safely wiped out the pre-existing hit for |
| // 'indexableInteger' == 123. Add a new document without that value for |
| // 'indexableInteger' that will take docid=0. If the integer index was not |
| // rebuilt correctly, then it will still have the previously added hit for |
| // 'indexableInteger' == 123 for docid 0 and incorrectly return this new |
| // doc in a query. |
| DocumentProto another_message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 456) |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); |
| // Verify integer index works normally |
| SearchSpecProto search_spec; |
| search_spec.set_query("indexableInteger == 123"); |
| search_spec.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results = |
| icing.Search(search_spec, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results.results(), IsEmpty()); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateIntegerIndexWithReindexing) { |
| // Test the following scenario: integer index is *partially* ahead of document |
| // store. IcingSearchEngine should be able to recover integer index. Several |
| // additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" should be called for integer index and throw out all integer |
| // index storages, i.e. all storage sub directories (path_expr = |
| // "*/integer_index_dir/*") should be discarded. However, some valid data |
| // in integer index were discarded together, so reindexing should still |
| // take place to recover them after clearing. |
| // - "Clear()" shouldn't be called for qualified id join index, i.e. no |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_OPTIONAL)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with message 3 documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into integer index and increment |
| // last_added_document_id. This will cause mismatched document id with |
| // document store. |
| // - Document store: [0, 1, 2, 3] |
| // - Term index: [0, 1, 2, 3] |
| // - Integer index: [0, 1, 2, 3, 4] |
| // - Qualified id join index: [0, 1, 2, 3] |
| { |
| Filesystem filesystem; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<IntegerIndex> integer_index, |
| IntegerIndex::Create(filesystem, GetIntegerIndexDir(), |
| /*num_data_threshold_for_bucket_split=*/65536, |
| /*pre_mapping_fbv=*/false)); |
| // Add hits for document 4. |
| DocumentId original_last_added_doc_id = |
| integer_index->last_added_document_id(); |
| integer_index->set_last_added_document_id(original_last_added_doc_id + 1); |
| std::unique_ptr<NumericIndex<int64_t>::Editor> editor = integer_index->Edit( |
| /*property_path=*/"indexableInteger", |
| /*document_id=*/original_last_added_doc_id + 1, /*section_id=*/0); |
| ICING_ASSERT_OK(editor->BufferKey(456)); |
| ICING_ASSERT_OK(std::move(*editor).IndexAllBufferedKeys()); |
| } |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| // Clear() should be called to truncate integer index and thus storage sub |
| // directory (path_expr = "*/integer_index_dir/*") should be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(1); |
| // Ensure qualified id join index directory should never be discarded, and |
| // Clear() should never be called (i.e. storage sub directory |
| // "*/qualified_id_join_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(0); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // All documents should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| |
| // Verify that numeric index safely wiped out the pre-existing hit for |
| // 'indexableInteger' == 456. Add a new document without that value for |
| // 'indexableInteger' that will take docid=0. If the integer index was not |
| // rebuilt correctly, then it will still have the previously added hit for |
| // 'indexableInteger' == 456 for docid 0 and incorrectly return this new |
| // doc in a query. |
| DocumentProto another_message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/4") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); |
| // Verify integer index works normally |
| SearchSpecProto search_spec; |
| search_spec.set_query("indexableInteger == 456"); |
| search_spec.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results = |
| icing.Search(search_spec, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results.results(), IsEmpty()); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateQualifiedIdJoinIndexWithoutReindexing) { |
| // Test the following scenario: qualified id join index is *completely* ahead |
| // of document store. IcingSearchEngine should be able to recover qualified id |
| // join index. Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" should be called for integer index. It is a special case when |
| // document store has no document. Since there is no integer index storage |
| // sub directories (path_expr = "*/integer_index_dir/*"), nothing will be |
| // discarded. |
| // - "Clear()" should be called for qualified id join index and throw out |
| // all data, i.e. discarding the underlying mapper (path_expr = |
| // "*/qualified_id_join_index_dir/*") and reinitialize. This should be |
| // sufficient to make qualified id join index consistent with document |
| // store (in this case, document store is empty as well), so reindexing |
| // should not take place. |
| |
| // 1. Create an index with no document. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| } |
| |
| // 2. Manually add some data into integer index and increment |
| // last_added_document_id. This will cause mismatched document id with |
| // document store. |
| // - Document store: [] |
| // - Term index: [] |
| // - Integer index: [] |
| // - Qualified id join index: [0] |
| { |
| Filesystem filesystem; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index, |
| QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(), |
| /*pre_mapping_fbv=*/false, |
| /*use_persistent_hash_map=*/false)); |
| // Add data for document 0. |
| ASSERT_THAT(qualified_id_join_index->last_added_document_id(), |
| kInvalidDocumentId); |
| qualified_id_join_index->set_last_added_document_id(0); |
| ICING_ASSERT_OK(qualified_id_join_index->Put( |
| DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0), |
| /*ref_qualified_id_str=*/"namespace#person")); |
| } |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded. Even though |
| // Clear() was called, it shouldn't take effect since there is no storage |
| // sub directory ("*/integer_index_dir/*") and nothing will be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| // Clear() should be called to truncate qualified id join index and thus |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(AtLeast(1)); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| // Since truncating qualified id join index is sufficient to make it |
| // consistent with document store, replaying documents or reindexing |
| // shouldn't take place. |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| } |
| |
| // 4. Since document 0 doesn't exist, testing join query is not enough to |
| // verify the correctness of qualified id join index restoration. Instead, we |
| // have to check the previously added data should not be found in qualified id |
| // join index. |
| { |
| Filesystem filesystem; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index, |
| QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(), |
| /*pre_mapping_fbv=*/false, |
| /*use_persistent_hash_map=*/false)); |
| EXPECT_THAT(qualified_id_join_index->Get( |
| DocJoinInfo(/*document_id=*/0, /*joinable_property_id=*/0)), |
| StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| RestoreIndexTruncateQualifiedIdJoinIndexWithReindexing) { |
| // Test the following scenario: qualified id join index is *partially* ahead |
| // of document store. IcingSearchEngine should be able to recover qualified id |
| // join index. Several additional behaviors are also tested: |
| // - Index directory handling: |
| // - Term index directory should be unaffected. |
| // - Integer index directory should be unaffected. |
| // - Qualified id join index directory should be unaffected. |
| // - Truncate indices: |
| // - "TruncateTo()" for term index shouldn't take effect. |
| // - "Clear()" shouldn't be called for integer index, i.e. no integer index |
| // storage sub directories (path_expr = "*/integer_index_dir/*") should be |
| // discarded. |
| // - "Clear()" should be called for qualified id join index and throw out |
| // all data, i.e. discarding the underlying mapper (path_expr = |
| // "*/qualified_id_join_index_dir/*") and reinitialize. However, some |
| // valid data in qualified id join index were discarded together, so |
| // reindexing should still take place to recover them after clearing. |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_OPTIONAL))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| // 1. Create an index with message 3 documents. |
| { |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/2").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| message = DocumentBuilder(message).SetUri("message/3").Build(); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| DocJoinInfo additional_data_key; |
| // 2. Manually add some data into qualified id join index and increment |
| // last_added_document_id. This will cause mismatched document id with |
| // document store. |
| // - Document store: [0, 1, 2, 3] |
| // - Term index: [0, 1, 2, 3] |
| // - Integer index: [0, 1, 2, 3] |
| // - Qualified id join index: [0, 1, 2, 3, 4] |
| { |
| Filesystem filesystem; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index, |
| QualifiedIdJoinIndex::Create(filesystem, GetQualifiedIdJoinIndexDir(), |
| /*pre_mapping_fbv=*/false, |
| /*use_persistent_hash_map=*/false)); |
| // Add data for document 4. |
| DocumentId original_last_added_doc_id = |
| qualified_id_join_index->last_added_document_id(); |
| qualified_id_join_index->set_last_added_document_id( |
| original_last_added_doc_id + 1); |
| additional_data_key = |
| DocJoinInfo(/*document_id=*/original_last_added_doc_id + 1, |
| /*joinable_property_id=*/0); |
| ICING_ASSERT_OK(qualified_id_join_index->Put( |
| additional_data_key, |
| /*ref_qualified_id_str=*/"namespace#person")); |
| } |
| |
| // 3. Create the index again. This should trigger index restoration. |
| { |
| // Mock filesystem to observe and check the behavior of all indices. |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(_)) |
| .WillRepeatedly(DoDefault()); |
| // Ensure term index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/index_dir"))) |
| .Times(0); |
| // Ensure integer index directory should never be discarded, and Clear() |
| // should never be called (i.e. storage sub directory |
| // "*/integer_index_dir/*" should never be discarded). |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(EndsWith("/integer_index_dir"))) |
| .Times(0); |
| EXPECT_CALL(*mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/integer_index_dir/"))) |
| .Times(0); |
| // Ensure qualified id join index directory should never be discarded. |
| EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively( |
| EndsWith("/qualified_id_join_index_dir"))) |
| .Times(0); |
| // Clear() should be called to truncate qualified id join index and thus |
| // underlying storage sub directory (path_expr = |
| // "*/qualified_id_join_index_dir/*") should be discarded. |
| EXPECT_CALL( |
| *mock_filesystem, |
| DeleteDirectoryRecursively(HasSubstr("/qualified_id_join_index_dir/"))) |
| .Times(AtLeast(1)); |
| |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| ASSERT_THAT(initialize_result.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| |
| // Verify term index works normally |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:consectetur"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto results1 = |
| icing.Search(search_spec1, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(results1.status(), ProtoIsOk()); |
| EXPECT_THAT(results1.next_page_token(), Eq(0)); |
| // All documents should be retrievable. |
| ASSERT_THAT(results1.results(), SizeIs(3)); |
| EXPECT_THAT(results1.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results1.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results1.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify integer index works normally |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto results2 = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| ASSERT_THAT(results2.results(), SizeIs(3)); |
| EXPECT_THAT(results2.results(0).document().uri(), Eq("message/3")); |
| EXPECT_THAT(results2.results(1).document().uri(), Eq("message/2")); |
| EXPECT_THAT(results2.results(2).document().uri(), Eq("message/1")); |
| |
| // Verify qualified id join index works normally: join a query for |
| // `name:person` with a child query for `body:consectetur` based on the |
| // child's `senderQualifiedId` field. |
| |
| // Add document 4 without "senderQualifiedId". If join index is not rebuilt |
| // correctly, then it will still have the previously added senderQualifiedId |
| // for document 4 and include document 4 incorrectly in the right side. |
| DocumentProto another_message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/4") |
| .SetSchema("Message") |
| .AddStringProperty("body", kIpsumText) |
| .AddInt64Property("indexableInteger", 123) |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| EXPECT_THAT(icing.Put(another_message).status(), ProtoIsOk()); |
| |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:consectetur"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto results3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| ASSERT_THAT(results3.results(), SizeIs(1)); |
| EXPECT_THAT(results3.results(0).document().uri(), Eq("person")); |
| EXPECT_THAT(results3.results(0).joined_results(), SizeIs(3)); |
| EXPECT_THAT(results3.results(0).joined_results(0).document().uri(), |
| Eq("message/3")); |
| EXPECT_THAT(results3.results(0).joined_results(1).document().uri(), |
| Eq("message/2")); |
| EXPECT_THAT(results3.results(0).joined_results(2).document().uri(), |
| Eq("message/1")); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| DocumentWithNoIndexedPropertyDoesntCauseRestoreIndex) { |
| // 1. Create an index with a single document in it that has no indexed |
| // content. |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| // Set a schema for a single type that has no indexed properties. |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType( |
| SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("unindexedField") |
| .SetDataTypeString(TERM_MATCH_UNKNOWN, |
| TOKENIZER_NONE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("unindexedInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_UNKNOWN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| // Add a document that contains no indexed properties. |
| DocumentProto document = |
| DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("unindexedField", |
| "Don't you dare search over this!") |
| .AddInt64Property("unindexedInteger", -123) |
| .Build(); |
| EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| // 2. Create the index again. This should NOT trigger a recovery of any kind. |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| EXPECT_THAT(init_result.status(), ProtoIsOk()); |
| EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| init_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) { |
| // 1. Create an index with a single document in it that has no valid indexed |
| // tokens in its content. |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType( |
| SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_OPTIONAL)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_OPTIONAL))) |
| .Build(); |
| // Set a schema for a single type that has no term, integer, join indexed |
| // contents. |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| |
| // Add a document that contains: |
| // - No valid indexed string content - just punctuation |
| // - No integer content - since it is an optional property |
| // - No qualified id content - since it is an optional property |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "?...!") |
| .Build(); |
| EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| // 2. Create the index again. This should NOT trigger a recovery of any kind. |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto init_result = icing.Initialize(); |
| EXPECT_THAT(init_result.status(), ProtoIsOk()); |
| EXPECT_THAT(init_result.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| init_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(init_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogFunctionLatency) { |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogNumberOfDocuments) { |
| DocumentProto document1 = DocumentBuilder() |
| .SetKey("icing", "fake_type/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| DocumentProto document2 = DocumentBuilder() |
| .SetKey("icing", "fake_type/2") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 456) |
| .Build(); |
| |
| { |
| // Initialize and put a document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), |
| Eq(0)); |
| |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk()); |
| } |
| |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), |
| Eq(1)); |
| |
| // Put another document. |
| ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk()); |
| } |
| |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(), |
| Eq(2)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) { |
| // Even though the fake timer will return 10, all the latency numbers related |
| // to recovery / restoration should be 0 during the first-time initialization. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCausePartialDataLoss) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| |
| { |
| // Initialize and put a document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Append a non-checksummed document. This will mess up the checksum of the |
| // proto log, forcing it to rewind and later return a DATA_LOSS error. |
| const std::string serialized_document = document.SerializeAsString(); |
| const std::string document_log_file = absl_ports::StrCat( |
| GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); |
| |
| int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str()); |
| filesystem()->PWrite(document_log_file.c_str(), file_size, |
| serialized_document.data(), |
| serialized_document.size()); |
| } |
| |
| { |
| // Document store will rewind to previous checkpoint. The cause should be |
| // DATA_LOSS and the data status should be PARTIAL_LOSS. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::PARTIAL_LOSS)); |
| // Since document store rewinds to previous checkpoint, last stored doc id |
| // will be consistent with last added document ids in term/integer indices, |
| // so there will be no index restoration. |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseCompleteDataLoss) { |
| DocumentProto document1 = DocumentBuilder() |
| .SetKey("icing", "fake_type/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| |
| const std::string document_log_file = absl_ports::StrCat( |
| GetDocumentDir(), "/", DocumentLogCreator::GetDocumentLogFilename()); |
| int64_t corruptible_offset; |
| |
| { |
| // Initialize and put a document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| |
| // There's some space at the beginning of the file (e.g. header, kmagic, |
| // etc) that is necessary to initialize the FileBackedProtoLog. We can't |
| // corrupt that region, so we need to figure out the offset at which |
| // documents will be written to - which is the file size after |
| // initialization. |
| corruptible_offset = filesystem()->GetFileSize(document_log_file.c_str()); |
| |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk()); |
| } |
| |
| { |
| // "Corrupt" the content written in the log. Make the corrupt document |
| // smaller than our original one so we don't accidentally write past our |
| // file. |
| DocumentProto document = |
| DocumentBuilder().SetKey("invalid_namespace", "invalid_uri").Build(); |
| std::string serialized_document = document.SerializeAsString(); |
| ASSERT_TRUE(filesystem()->PWrite( |
| document_log_file.c_str(), corruptible_offset, |
| serialized_document.data(), serialized_document.size())); |
| |
| PortableFileBackedProtoLog<DocumentWrapper>::Header header = |
| ReadDocumentLogHeader(*filesystem(), document_log_file); |
| |
| // Set dirty bit to true to reflect that something changed in the log. |
| header.SetDirtyFlag(true); |
| header.SetHeaderChecksum(header.CalculateHeaderChecksum()); |
| |
| WriteDocumentLogHeader(*filesystem(), document_log_file, header); |
| } |
| |
| { |
| // Document store will completely rewind. The cause should be DATA_LOSS and |
| // the data status should be COMPLETE_LOSS. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::COMPLETE_LOSS)); |
| // The complete rewind of ground truth causes us to clear the index, but |
| // that's not considered a restoration. |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseIndexInconsistentWithGroundTruth) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| { |
| // Initialize and put a document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Delete and re-initialize an empty index file to trigger |
| // RestoreIndexIfNeeded. |
| std::string idx_subdir = GetIndexDir() + "/idx"; |
| ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str())); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create(Index::Options(GetIndexDir(), |
| /*index_merge_size=*/100, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/50), |
| filesystem(), icing_filesystem())); |
| ICING_ASSERT_OK(index->PersistToDisk()); |
| } |
| |
| { |
| // Index is empty but ground truth is not. Index should be restored due to |
| // the inconsistency. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F( |
| IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseIntegerIndexInconsistentWithGroundTruth) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| { |
| // Initialize and put a document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Delete the integer index file to trigger RestoreIndexIfNeeded. |
| std::string integer_index_dir = GetIntegerIndexDir(); |
| filesystem()->DeleteDirectoryRecursively(integer_index_dir.c_str()); |
| } |
| |
| { |
| // Index is empty but ground truth is not. Index should be restored due to |
| // the inconsistency. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F( |
| IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexInconsistentWithGroundTruth) { |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| { |
| // Initialize and put documents. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Delete the qualified id join index file to trigger RestoreIndexIfNeeded. |
| std::string qualified_id_join_index_dir = GetQualifiedIdJoinIndexDir(); |
| filesystem()->DeleteDirectoryRecursively( |
| qualified_id_join_index_dir.c_str()); |
| } |
| |
| { |
| // Index is empty but ground truth is not. Index should be restored due to |
| // the inconsistency. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseSchemaChangesOutOfSync) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| IcingSearchEngineOptions options = GetDefaultIcingOptions(); |
| { |
| // Initialize and put one document. |
| IcingSearchEngine icing(options, GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Simulate a schema change where power is lost after the schema is written. |
| SchemaProto new_schema = |
| SchemaBuilder() |
| .AddType( |
| SchemaTypeConfigBuilder(CreateMessageSchemaTypeConfig()) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("subject") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_OPTIONAL))) |
| .Build(); |
| // Write the marker file |
| std::string marker_filepath = |
| absl_ports::StrCat(options.base_dir(), "/set_schema_marker"); |
| ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str())); |
| ASSERT_TRUE(sfd.is_valid()); |
| |
| // Write the new schema |
| FakeClock fake_clock; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); |
| ICING_EXPECT_OK(schema_store->SetSchema( |
| new_schema, /*ignore_errors_and_delete_documents=*/false, |
| /*allow_circular_schema_definitions=*/false)); |
| } |
| |
| { |
| // Both document store and index should be recovered from checksum mismatch. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| { |
| // No recovery should be needed. |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseIndexIOError) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| { |
| // Initialize and put one document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| std::string lite_index_buffer_file_path = |
| absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb"); |
| auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>(); |
| EXPECT_CALL(*mock_icing_filesystem, OpenForWrite(_)) |
| .WillRepeatedly(DoDefault()); |
| // This fails Index::Create() once. |
| EXPECT_CALL(*mock_icing_filesystem, |
| OpenForWrite(Eq(lite_index_buffer_file_path))) |
| .WillOnce(Return(-1)) |
| .WillRepeatedly(DoDefault()); |
| |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::move(mock_icing_filesystem), |
| std::move(fake_clock), GetTestJniCache()); |
| |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseIntegerIndexIOError) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| { |
| // Initialize and put one document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| std::string integer_index_metadata_file = |
| absl_ports::StrCat(GetIntegerIndexDir(), "/integer_index.m"); |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, OpenForWrite(_)).WillRepeatedly(DoDefault()); |
| // This fails IntegerIndex::Create() once. |
| EXPECT_CALL(*mock_filesystem, OpenForWrite(Eq(integer_index_metadata_file))) |
| .WillOnce(Return(-1)) |
| .WillRepeatedly(DoDefault()); |
| |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseQualifiedIdJoinIndexIOError) { |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person = |
| DocumentBuilder() |
| .SetKey("namespace", "person") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message/1") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| { |
| // Initialize and put documents. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(person).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(message).status(), ProtoIsOk()); |
| } |
| |
| std::string qualified_id_join_index_metadata_file = |
| absl_ports::StrCat(GetQualifiedIdJoinIndexDir(), "/metadata"); |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, PRead(A<const char*>(), _, _, _)) |
| .WillRepeatedly(DoDefault()); |
| // This fails QualifiedIdJoinIndex::Create() once. |
| EXPECT_CALL( |
| *mock_filesystem, |
| PRead(Matcher<const char*>(Eq(qualified_id_join_index_metadata_file)), _, |
| _, _)) |
| .WillOnce(Return(false)) |
| .WillRepeatedly(DoDefault()); |
| |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseDocStoreIOError) { |
| DocumentProto document = DocumentBuilder() |
| .SetKey("icing", "fake_type/0") |
| .SetSchema("Message") |
| .AddStringProperty("body", "message body") |
| .AddInt64Property("indexableInteger", 123) |
| .Build(); |
| { |
| // Initialize and put one document. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| ASSERT_THAT(icing.Put(document).status(), ProtoIsOk()); |
| } |
| |
| std::string document_store_header_file_path = |
| absl_ports::StrCat(GetDocumentDir(), "/document_store_header"); |
| auto mock_filesystem = std::make_unique<MockFilesystem>(); |
| EXPECT_CALL(*mock_filesystem, Read(A<const char*>(), _, _)) |
| .WillRepeatedly(DoDefault()); |
| // This fails DocumentStore::InitializeDerivedFiles() once. |
| EXPECT_CALL( |
| *mock_filesystem, |
| Read(Matcher<const char*>(Eq(document_store_header_file_path)), _, _)) |
| .WillOnce(Return(false)) |
| .WillRepeatedly(DoDefault()); |
| |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::move(mock_filesystem), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(0)); |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogRecoveryCauseSchemaStoreIOError) { |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| ASSERT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| } |
| |
| { |
| // Delete the schema store type mapper to trigger an I/O error. |
| std::string schema_store_header_file_path = |
| GetSchemaDir() + "/schema_type_mapper"; |
| ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively( |
| schema_store_header_file_path.c_str())); |
| } |
| |
| { |
| auto fake_clock = std::make_unique<FakeClock>(); |
| fake_clock->SetTimerElapsedMilliseconds(10); |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::move(fake_clock), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_cause(), |
| Eq(InitializeStatsProto::IO_ERROR)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .schema_store_recovery_latency_ms(), |
| Eq(10)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .document_store_recovery_latency_ms(), |
| Eq(0)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().document_store_data_status(), |
| Eq(InitializeStatsProto::NO_DATA_LOSS)); |
| EXPECT_THAT( |
| initialize_result_proto.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::NONE)); |
| EXPECT_THAT(initialize_result_proto.initialize_stats() |
| .index_restoration_latency_ms(), |
| Eq(0)); |
| } |
| } |
| |
| TEST_F(IcingSearchEngineInitializationTest, |
| InitializeShouldLogNumberOfSchemaTypes) { |
| { |
| // Initialize an empty storage. |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| // There should be 0 schema types. |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), |
| Eq(0)); |
| |
| // Set a schema with one type config. |
| ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk()); |
| } |
| |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| // There should be 1 schema type. |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), |
| Eq(1)); |
| |
| // Create and set a schema with two type configs: Email and Message. |
| SchemaProto schema = CreateEmailSchema(); |
| *schema.add_types() = CreateMessageSchemaTypeConfig(); |
| |
| ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| } |
| |
| { |
| IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache()); |
| InitializeResultProto initialize_result_proto = icing.Initialize(); |
| EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk()); |
| EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(), |
| Eq(2)); |
| } |
| } |
| |
| class IcingSearchEngineInitializationVersionChangeTest |
| : public IcingSearchEngineInitializationTest, |
| public ::testing::WithParamInterface<version_util::VersionInfo> {}; |
| |
| TEST_P(IcingSearchEngineInitializationVersionChangeTest, |
| RecoverFromVersionChange) { |
| // TODO(b/280697513): test backup schema migration |
| // Test the following scenario: version change. All derived data should be |
| // rebuilt. We test this by manually adding some invalid derived data and |
| // verifying they're removed due to rebuild. |
| SchemaProto schema = |
| SchemaBuilder() |
| .AddType(SchemaTypeConfigBuilder().SetType("Person").AddProperty( |
| PropertyConfigBuilder() |
| .SetName("name") |
| .SetDataTypeString(TERM_MATCH_PREFIX, TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .AddType(SchemaTypeConfigBuilder() |
| .SetType("Message") |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("body") |
| .SetDataTypeString(TERM_MATCH_PREFIX, |
| TOKENIZER_PLAIN) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("indexableInteger") |
| .SetDataTypeInt64(NUMERIC_MATCH_RANGE) |
| .SetCardinality(CARDINALITY_REQUIRED)) |
| .AddProperty(PropertyConfigBuilder() |
| .SetName("senderQualifiedId") |
| .SetDataTypeJoinableString( |
| JOINABLE_VALUE_TYPE_QUALIFIED_ID) |
| .SetCardinality(CARDINALITY_REQUIRED))) |
| .Build(); |
| |
| DocumentProto person1 = |
| DocumentBuilder() |
| .SetKey("namespace", "person/1") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto person2 = |
| DocumentBuilder() |
| .SetKey("namespace", "person/2") |
| .SetSchema("Person") |
| .AddStringProperty("name", "person") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| DocumentProto message = |
| DocumentBuilder() |
| .SetKey("namespace", "message") |
| .SetSchema("Message") |
| .AddStringProperty("body", "correct message") |
| .AddInt64Property("indexableInteger", 123) |
| .AddStringProperty("senderQualifiedId", "namespace#person/1") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| |
| { |
| // Initializes folder and schema, index person1 and person2 |
| TestIcingSearchEngine icing( |
| GetDefaultIcingOptions(), std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), std::make_unique<FakeClock>(), |
| GetTestJniCache()); |
| EXPECT_THAT(icing.Initialize().status(), ProtoIsOk()); |
| EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person1).status(), ProtoIsOk()); |
| EXPECT_THAT(icing.Put(person2).status(), ProtoIsOk()); |
| } // This should shut down IcingSearchEngine and persist anything it needs to |
| |
| { |
| // Manually: |
| // - Put message into DocumentStore |
| // - But add some incorrect data for message into 3 indices |
| // - Change version file |
| // |
| // These will make sure last_added_document_id is consistent with |
| // last_stored_document_id, so if Icing didn't handle version change |
| // correctly, then the index won't be rebuilt. |
| FakeClock fake_clock; |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock)); |
| |
| // Put message into DocumentStore |
| ICING_ASSERT_OK_AND_ASSIGN( |
| DocumentStore::CreateResult create_result, |
| DocumentStore::Create( |
| filesystem(), GetDocumentDir(), &fake_clock, schema_store.get(), |
| /*force_recovery_and_revalidate_documents=*/false, |
| /*namespace_id_fingerprint=*/false, /*pre_mapping_fbv=*/false, |
| /*use_persistent_hash_map=*/false, |
| PortableFileBackedProtoLog< |
| DocumentWrapper>::kDeflateCompressionLevel, |
| /*initialize_stats=*/nullptr)); |
| std::unique_ptr<DocumentStore> document_store = |
| std::move(create_result.document_store); |
| ICING_ASSERT_OK_AND_ASSIGN(DocumentId doc_id, document_store->Put(message)); |
| |
| // Index doc_id with incorrect data |
| Index::Options options(GetIndexDir(), /*index_merge_size=*/1024 * 1024, |
| /*lite_index_sort_at_indexing=*/true, |
| /*lite_index_sort_size=*/1024 * 8); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<Index> index, |
| Index::Create(options, filesystem(), icing_filesystem())); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<IntegerIndex> integer_index, |
| IntegerIndex::Create(*filesystem(), GetIntegerIndexDir(), |
| /*num_data_threshold_for_bucket_split=*/65536, |
| /*pre_mapping_fbv=*/false)); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<QualifiedIdJoinIndex> qualified_id_join_index, |
| QualifiedIdJoinIndex::Create( |
| *filesystem(), GetQualifiedIdJoinIndexDir(), |
| /*pre_mapping_fbv=*/false, /*use_persistent_hash_map=*/false)); |
| |
| ICING_ASSERT_OK_AND_ASSIGN( |
| std::unique_ptr<StringSectionIndexingHandler> |
| string_section_indexing_handler, |
| StringSectionIndexingHandler::Create(&fake_clock, normalizer_.get(), |
| index.get())); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<IntegerSectionIndexingHandler> |
| integer_section_indexing_handler, |
| IntegerSectionIndexingHandler::Create( |
| &fake_clock, integer_index.get())); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<QualifiedIdJoinIndexingHandler> |
| qualified_id_join_indexing_handler, |
| QualifiedIdJoinIndexingHandler::Create( |
| &fake_clock, qualified_id_join_index.get())); |
| std::vector<std::unique_ptr<DataIndexingHandler>> handlers; |
| handlers.push_back(std::move(string_section_indexing_handler)); |
| handlers.push_back(std::move(integer_section_indexing_handler)); |
| handlers.push_back(std::move(qualified_id_join_indexing_handler)); |
| IndexProcessor index_processor(std::move(handlers), &fake_clock); |
| |
| DocumentProto incorrect_message = |
| DocumentBuilder() |
| .SetKey("namespace", "message") |
| .SetSchema("Message") |
| .AddStringProperty("body", "wrong message") |
| .AddInt64Property("indexableInteger", 456) |
| .AddStringProperty("senderQualifiedId", "namespace#person/2") |
| .SetCreationTimestampMs(kDefaultCreationTimestampMs) |
| .Build(); |
| ICING_ASSERT_OK_AND_ASSIGN( |
| TokenizedDocument tokenized_document, |
| TokenizedDocument::Create(schema_store.get(), lang_segmenter_.get(), |
| std::move(incorrect_message))); |
| ICING_ASSERT_OK(index_processor.IndexDocument(tokenized_document, doc_id)); |
| |
| // Change existing data's version file |
| const version_util::VersionInfo& existing_version_info = GetParam(); |
| ICING_ASSERT_OK(version_util::WriteVersion( |
| *filesystem(), GetVersionFilename(), existing_version_info)); |
| } |
| |
| // Mock filesystem to observe and check the behavior of all indices. |
| TestIcingSearchEngine icing(GetDefaultIcingOptions(), |
| std::make_unique<Filesystem>(), |
| std::make_unique<IcingFilesystem>(), |
| std::make_unique<FakeClock>(), GetTestJniCache()); |
| InitializeResultProto initialize_result = icing.Initialize(); |
| EXPECT_THAT(initialize_result.status(), ProtoIsOk()); |
| // Index Restoration should be triggered here. Incorrect data should be |
| // deleted and correct data of message should be indexed. |
| EXPECT_THAT( |
| initialize_result.initialize_stats().document_store_recovery_cause(), |
| Eq(InitializeStatsProto::VERSION_CHANGED)); |
| EXPECT_THAT(initialize_result.initialize_stats().index_restoration_cause(), |
| Eq(InitializeStatsProto::VERSION_CHANGED)); |
| EXPECT_THAT( |
| initialize_result.initialize_stats().integer_index_restoration_cause(), |
| Eq(InitializeStatsProto::VERSION_CHANGED)); |
| EXPECT_THAT(initialize_result.initialize_stats() |
| .qualified_id_join_index_restoration_cause(), |
| Eq(InitializeStatsProto::VERSION_CHANGED)); |
| |
| // Manually check version file |
| ICING_ASSERT_OK_AND_ASSIGN( |
| version_util::VersionInfo version_info_after_init, |
| version_util::ReadVersion(*filesystem(), GetVersionFilename(), |
| GetIndexDir())); |
| EXPECT_THAT(version_info_after_init.version, Eq(version_util::kVersion)); |
| EXPECT_THAT(version_info_after_init.max_version, |
| Eq(std::max(version_util::kVersion, GetParam().max_version))); |
| |
| SearchResultProto expected_search_result_proto; |
| expected_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| *expected_search_result_proto.mutable_results()->Add()->mutable_document() = |
| message; |
| |
| // Verify term search |
| SearchSpecProto search_spec1; |
| search_spec1.set_query("body:correct"); |
| search_spec1.set_term_match_type(TermMatchType::EXACT_ONLY); |
| SearchResultProto search_result_proto1 = |
| icing.Search(search_spec1, GetDefaultScoringSpec(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_proto1, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Verify numeric (integer) search |
| SearchSpecProto search_spec2; |
| search_spec2.set_query("indexableInteger == 123"); |
| search_spec2.set_search_type( |
| SearchSpecProto::SearchType::EXPERIMENTAL_ICING_ADVANCED_QUERY); |
| search_spec2.add_enabled_features(std::string(kNumericSearchFeature)); |
| |
| SearchResultProto search_result_google::protobuf = |
| icing.Search(search_spec2, ScoringSpecProto::default_instance(), |
| ResultSpecProto::default_instance()); |
| EXPECT_THAT(search_result_google::protobuf, EqualsSearchResultIgnoreStatsAndScores( |
| expected_search_result_proto)); |
| |
| // Verify join search: join a query for `name:person` with a child query for |
| // `body:message` based on the child's `senderQualifiedId` field. |
| SearchSpecProto search_spec3; |
| search_spec3.set_term_match_type(TermMatchType::EXACT_ONLY); |
| search_spec3.set_query("name:person"); |
| JoinSpecProto* join_spec = search_spec3.mutable_join_spec(); |
| join_spec->set_parent_property_expression( |
| std::string(JoinProcessor::kQualifiedIdExpr)); |
| join_spec->set_child_property_expression("senderQualifiedId"); |
| join_spec->set_aggregation_scoring_strategy( |
| JoinSpecProto::AggregationScoringStrategy::COUNT); |
| JoinSpecProto::NestedSpecProto* nested_spec = |
| join_spec->mutable_nested_spec(); |
| SearchSpecProto* nested_search_spec = nested_spec->mutable_search_spec(); |
| nested_search_spec->set_term_match_type(TermMatchType::EXACT_ONLY); |
| nested_search_spec->set_query("body:message"); |
| *nested_spec->mutable_scoring_spec() = GetDefaultScoringSpec(); |
| *nested_spec->mutable_result_spec() = ResultSpecProto::default_instance(); |
| |
| ResultSpecProto result_spec3 = ResultSpecProto::default_instance(); |
| result_spec3.set_max_joined_children_per_parent_to_return( |
| std::numeric_limits<int32_t>::max()); |
| |
| SearchResultProto expected_join_search_result_proto; |
| expected_join_search_result_proto.mutable_status()->set_code(StatusProto::OK); |
| // Person 1 with message |
| SearchResultProto::ResultProto* result_proto = |
| expected_join_search_result_proto.mutable_results()->Add(); |
| *result_proto->mutable_document() = person1; |
| *result_proto->mutable_joined_results()->Add()->mutable_document() = message; |
| // Person 2 without children |
| *expected_join_search_result_proto.mutable_results() |
| ->Add() |
| ->mutable_document() = person2; |
| |
| SearchResultProto search_result_proto3 = icing.Search( |
| search_spec3, ScoringSpecProto::default_instance(), result_spec3); |
| EXPECT_THAT(search_result_proto3, EqualsSearchResultIgnoreStatsAndScores( |
| expected_join_search_result_proto)); |
| } |
| |
| INSTANTIATE_TEST_SUITE_P( |
| IcingSearchEngineInitializationVersionChangeTest, |
| IcingSearchEngineInitializationVersionChangeTest, |
| testing::Values( |
| // Manually change existing data set's version to kVersion + 1. When |
| // initializing, it will detect "rollback". |
| version_util::VersionInfo( |
| /*version_in=*/version_util::kVersion + 1, |
| /*max_version_in=*/version_util::kVersion + 1), |
| |
| // Currently we don't have any "upgrade" that requires rebuild derived |
| // files, so skip this case until we have a case for it. |
| |
| // Manually change existing data set's version to kVersion - 1 and |
| // max_version to kVersion. When initializing, it will detect "roll |
| // forward". |
| version_util::VersionInfo( |
| /*version_in=*/version_util::kVersion - 1, |
| /*max_version_in=*/version_util::kVersion), |
| |
| // Manually change existing data set's version to 0 and max_version to |
| // 0. When initializing, it will detect "version 0 upgrade". |
| // |
| // Note: in reality, version 0 won't be written into version file, but |
| // it is ok here since it is hack to simulate version 0 situation. |
| version_util::VersionInfo( |
| /*version_in=*/0, |
| /*max_version_in=*/0), |
| |
| // Manually change existing data set's version to 0 and max_version to |
| // kVersion. When initializing, it will detect "version 0 roll forward". |
| // |
| // Note: in reality, version 0 won't be written into version file, but |
| // it is ok here since it is hack to simulate version 0 situation. |
| version_util::VersionInfo( |
| /*version_in=*/0, |
| /*max_version_in=*/version_util::kVersion))); |
| |
| } // namespace |
| } // namespace lib |
| } // namespace icing |