blob: c2460ff3be115bb8e222003863513cdf36d4540b [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/index/main/posting-list-hit-accessor.h"
#include <cstdint>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/flash-index-storage.h"
#include "icing/file/posting_list/posting-list-accessor.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/file/posting_list/posting-list-identifier.h"
#include "icing/index/hit/hit.h"
#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/hit-test-utils.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
namespace {
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Lt;
using ::testing::SizeIs;
class PostingListHitAccessorTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/test_dir";
file_name_ = test_dir_ + "/test_file.idx.index";
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
serializer_ = std::make_unique<PostingListHitSerializer>();
ICING_ASSERT_OK_AND_ASSIGN(
FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name_, &filesystem_, serializer_.get()));
flash_index_storage_ =
std::make_unique<FlashIndexStorage>(std::move(flash_index_storage));
}
void TearDown() override {
flash_index_storage_.reset();
serializer_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
}
Filesystem filesystem_;
std::string test_dir_;
std::string file_name_;
std::unique_ptr<PostingListHitSerializer> serializer_;
std::unique_ptr<FlashIndexStorage> flash_index_storage_;
};
TEST_F(PostingListHitAccessorTest, HitsAddAndRetrieveProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result.status);
EXPECT_THAT(result.id.block_index(), Eq(1));
EXPECT_THAT(result.id.posting_list_index(), Eq(0));
// Retrieve some hits.
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage_->GetPostingList(result.id));
EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
EXPECT_THAT(pl_holder.next_block_index, Eq(kInvalidBlockIndex));
}
TEST_F(PostingListHitAccessorTest, PreexistingPLKeepOnSameBlock) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
// Add a single hit. This will fit in a min-sized posting list.
Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result1.status);
// Should have been allocated to the first block.
EXPECT_THAT(result1.id.block_index(), Eq(1));
EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
// Add one more hit. The minimum size for a posting list must be able to fit
// at least two hits, so this should NOT cause the previous pl to be
// reallocated.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
PostingListHitAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/1);
ICING_ASSERT_OK(pl_accessor->PrependHit(hit2));
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result2.status);
// Should have been allocated to the same posting list as the first hit.
EXPECT_THAT(result2.id, Eq(result1.id));
// The posting list at result2.id should hold all of the hits that have been
// added.
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage_->GetPostingList(result2.id));
EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAre(hit2, hit1)));
}
TEST_F(PostingListHitAccessorTest, PreexistingPLReallocateToLargerPL) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
// Use a small posting list of 30 bytes. The first 17 hits will be compressed
// to one byte each and will be able to fit in the 18 byte padded region. The
// last hit will fit in one of the special hits. The posting list will be
// ALMOST_FULL and can fit at most 2 more hits.
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/18, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result1.status);
// Should have been allocated to the first block.
EXPECT_THAT(result1.id.block_index(), Eq(1));
EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
// Now let's add some more hits!
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
PostingListHitAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
// The current posting list can fit at most 2 more hits.
std::vector<Hit> hits2 = CreateHits(
/*last_hit=*/hits1.back(), /*num_hits=*/2,
/*desired_byte_length=*/1);
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result2.status);
// The 2 hits should still fit on the first block
EXPECT_THAT(result1.id.block_index(), Eq(1));
EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
// Add one more hit
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
PostingListHitAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result2.id));
// The current posting list should be FULL. Adding more hits should result in
// these hits being moved to a larger posting list.
Hit single_hit =
CreateHit(/*last_hit=*/hits2.back(), /*desired_byte_length=*/1);
ICING_ASSERT_OK(pl_accessor->PrependHit(single_hit));
PostingListAccessor::FinalizeResult result3 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result3.status);
// Should have been allocated to the second (new) block because the posting
// list should have grown beyond the size that the first block maintains.
EXPECT_THAT(result3.id.block_index(), Eq(2));
EXPECT_THAT(result3.id.posting_list_index(), Eq(0));
// The posting list at result3.id should hold all of the hits that have been
// added.
for (const Hit& hit : hits2) {
hits1.push_back(hit);
}
hits1.push_back(single_hit);
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage_->GetPostingList(result3.id));
EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
}
TEST_F(PostingListHitAccessorTest, MultiBlockChainsBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result1.status);
PostingListIdentifier second_block_id = result1.id;
// Should have been allocated to the second block, which holds a max-sized
// posting list.
EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
/*block_index=*/2, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0)));
// Now let's retrieve them!
ICING_ASSERT_OK_AND_ASSIGN(
PostingListHolder pl_holder,
flash_index_storage_->GetPostingList(second_block_id));
// This pl_holder will only hold a posting list with the hits that didn't fit
// on the first block.
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
serializer_->GetHits(&pl_holder.posting_list));
ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
EXPECT_THAT(second_block_hits,
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0);
ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
flash_index_storage_->GetPostingList(pl_id));
EXPECT_THAT(
serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
}
TEST_F(PostingListHitAccessorTest, PreexistingMultiBlockReusesBlocksProperly) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result1.status);
PostingListIdentifier first_add_id = result1.id;
EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
/*block_index=*/2, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0)));
// Now add a couple more hits. These should fit on the existing, not full
// second block.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor,
PostingListHitAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), first_add_id));
std::vector<Hit> hits2 = CreateHits(
/*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/50,
/*desired_byte_length=*/1);
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
}
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor).Finalize();
ICING_EXPECT_OK(result2.status);
PostingListIdentifier second_add_id = result2.id;
EXPECT_THAT(second_add_id, Eq(first_add_id));
// We should be able to retrieve all 5050 hits.
for (const Hit& hit : hits2) {
hits1.push_back(hit);
}
ICING_ASSERT_OK_AND_ASSIGN(
PostingListHolder pl_holder,
flash_index_storage_->GetPostingList(second_add_id));
// This pl_holder will only hold a posting list with the hits that didn't fit
// on the first block.
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
serializer_->GetHits(&pl_holder.posting_list));
ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
EXPECT_THAT(second_block_hits,
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
uint32_t first_block_id = pl_holder.next_block_index;
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0);
ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
flash_index_storage_->GetPostingList(pl_id));
EXPECT_THAT(
serializer_->GetHits(&pl_holder.posting_list),
IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
}
TEST_F(PostingListHitAccessorTest, InvalidHitReturnsInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
Hit invalid_hit(Hit::kInvalidValue);
EXPECT_THAT(pl_accessor->PrependHit(invalid_hit),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(PostingListHitAccessorTest, HitsNotDecreasingReturnsInvalidArgument) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(pl_accessor->PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(pl_accessor->PrependHit(hit3),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(PostingListHitAccessorTest, NewPostingListNoHitsAdded) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
EXPECT_THAT(result1.status,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST_F(PostingListHitAccessorTest, PreexistingPostingListNoHitsAdded) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor,
PostingListHitAccessor::Create(flash_index_storage_.get(),
serializer_.get()));
Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
std::move(*pl_accessor).Finalize();
ICING_ASSERT_OK(result1.status);
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<PostingListHitAccessor> pl_accessor2,
PostingListHitAccessor::CreateFromExisting(
flash_index_storage_.get(), serializer_.get(), result1.id));
PostingListAccessor::FinalizeResult result2 =
std::move(*pl_accessor2).Finalize();
ICING_ASSERT_OK(result2.status);
}
} // namespace
} // namespace lib
} // namespace icing