blob: d841e793768fcd3e6f096c77c17fb2c38c1fe51f [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/file/posting_list/index-block.h"
#include <memory>
#include <string>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/file/posting_list/posting-list-common.h"
#include "icing/index/hit/hit.h"
#include "icing/index/main/posting-list-hit-serializer.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
namespace {
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsFalse;
using ::testing::IsTrue;
static constexpr int kBlockSize = 4096;
class IndexBlockTest : public ::testing::Test {
protected:
void SetUp() override {
test_dir_ = GetTestTempDir() + "/flash";
flash_file_ = test_dir_ + "/0";
ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(test_dir_.c_str()));
sfd_ = std::make_unique<ScopedFd>(
filesystem_.OpenForWrite(flash_file_.c_str()));
ASSERT_TRUE(sfd_->is_valid());
// Grow the file by one block for the IndexBlock to use.
ASSERT_TRUE(filesystem_.Grow(sfd_->get(), kBlockSize));
// TODO: test different serializers
serializer_ = std::make_unique<PostingListHitSerializer>();
}
void TearDown() override {
serializer_.reset();
sfd_.reset();
ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(test_dir_.c_str()));
}
Filesystem filesystem_;
std::string test_dir_;
std::string flash_file_;
std::unique_ptr<ScopedFd> sfd_;
std::unique_ptr<PostingListHitSerializer> serializer_;
};
TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
constexpr int kPostingListBytes = 24;
{
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
constexpr int kPostingListBytes1 = 24;
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes1));
EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes1));
// There should be (4096 - 12) / 24 = 170 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 170 in only 8
// bits.
EXPECT_THAT(block.max_num_posting_lists(), Eq(170));
EXPECT_THAT(block.posting_list_index_bits(), Eq(8));
constexpr int kPostingListBytes2 = 240;
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(), /*offset=*/0,
kBlockSize, kPostingListBytes2));
EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes2));
// There should be (4096 - 12) / 240 = 17 posting lists
// (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
// bits.
EXPECT_THAT(block.max_num_posting_lists(), Eq(17));
EXPECT_THAT(block.posting_list_index_bits(), Eq(5));
}
TEST_F(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
constexpr int kPostingListBytes = 2004;
std::vector<Hit> test_hits{
Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
PostingListIndex allocated_index;
{
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info,
block.AllocatePostingList());
for (const Hit& hit : test_hits) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info.posting_list_used, hit));
}
EXPECT_THAT(
serializer_->GetHits(&alloc_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
ICING_ASSERT_OK(block.WritePostingListToDisk(
alloc_info.posting_list_used, alloc_info.posting_list_index));
allocated_index = alloc_info.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock::PostingListAndBlockInfo pl_block_info,
block.GetAllocatedPostingList(allocated_index));
EXPECT_THAT(
serializer_->GetHits(&pl_block_info.posting_list_used),
IsOkAndHolds(ElementsAreArray(test_hits.rbegin(), test_hits.rend())));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
}
}
TEST_F(IndexBlockTest, IndexBlockMultiplePostingLists) {
constexpr int kPostingListBytes = 2004;
std::vector<Hit> hits_in_posting_list1{
Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
std::vector<Hit> hits_in_posting_list2{
Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
PostingListIndex allocated_index_1;
PostingListIndex allocated_index_2;
{
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list1) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
// Add hits to the second posting list.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list2) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
// Write both posting lists to disk.
ICING_ASSERT_OK(block.WritePostingListToDisk(
alloc_info_1.posting_list_used, alloc_info_1.posting_list_index));
ICING_ASSERT_OK(block.WritePostingListToDisk(
alloc_info_2.posting_list_used, alloc_info_2.posting_list_index));
allocated_index_1 = alloc_info_1.posting_list_index;
allocated_index_2 = alloc_info_2.posting_list_index;
}
{
// Create an IndexBlock from the previously allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock::PostingListAndBlockInfo pl_block_info_1,
block.GetAllocatedPostingList(allocated_index_1));
EXPECT_THAT(serializer_->GetHits(&pl_block_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock::PostingListAndBlockInfo pl_block_info_2,
block.GetAllocatedPostingList(allocated_index_2));
EXPECT_THAT(serializer_->GetHits(&pl_block_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
}
TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
constexpr int kPostingListBytes = 2004;
// Create an IndexBlock from this newly allocated file block.
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
// Add hits to the first posting list.
std::vector<Hit> hits_in_posting_list1{
Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list1) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info_1.posting_list_used, hit));
}
EXPECT_THAT(serializer_->GetHits(&alloc_info_1.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list1.rbegin(),
hits_in_posting_list1.rend())));
// Add hits to the second posting list.
std::vector<Hit> hits_in_posting_list2{
Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
block.AllocatePostingList());
for (const Hit& hit : hits_in_posting_list2) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info_2.posting_list_used, hit));
}
EXPECT_THAT(serializer_->GetHits(&alloc_info_2.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list2.rbegin(),
hits_in_posting_list2.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
// Now free the first posting list. Then, reallocate it and fill it with a
// different set of hits.
ICING_ASSERT_OK(block.FreePostingList(alloc_info_1.posting_list_index));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
std::vector<Hit> hits_in_posting_list3{
Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
};
ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_3,
block.AllocatePostingList());
EXPECT_THAT(alloc_info_3.posting_list_index,
Eq(alloc_info_3.posting_list_index));
for (const Hit& hit : hits_in_posting_list3) {
ICING_ASSERT_OK(
serializer_->PrependHit(&alloc_info_3.posting_list_used, hit));
}
EXPECT_THAT(serializer_->GetHits(&alloc_info_3.posting_list_used),
IsOkAndHolds(ElementsAreArray(hits_in_posting_list3.rbegin(),
hits_in_posting_list3.rend())));
EXPECT_THAT(block.AllocatePostingList(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsFalse()));
}
TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
constexpr int kPostingListBytes = 2004;
constexpr int kSomeBlockIndex = 22;
{
// Create an IndexBlock from this newly allocated file block and set the
// next block index.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
EXPECT_THAT(block.SetNextBlockIndex(kSomeBlockIndex), IsOk());
EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock from this previously allocated file block and make
// sure that next_block_index is still set properly.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromPreexistingIndexBlockRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize));
EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kSomeBlockIndex));
}
{
// Create an IndexBlock, treating this file block as uninitialized. This
// reset the next_block_index to kInvalidBlockIndex.
ICING_ASSERT_OK_AND_ASSIGN(
IndexBlock block, IndexBlock::CreateFromUninitializedRegion(
&filesystem_, serializer_.get(), sfd_->get(),
/*offset=*/0, kBlockSize, kPostingListBytes));
EXPECT_THAT(block.GetNextBlockIndex(), IsOkAndHolds(kInvalidBlockIndex));
}
}
} // namespace
} // namespace lib
} // namespace icing