blob: ea135efd661a555b3461b9b2e72940d2dddcc94d [file] [log] [blame]
// Copyright (C) 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/index/main/posting-list-hit-serializer.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <deque>
#include <iterator>
#include <limits>
#include <vector>
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/posting_list/posting-list-used.h"
#include "icing/index/hit/hit.h"
#include "icing/legacy/index/icing-bit-util.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/hit-test-utils.h"
using testing::ElementsAre;
using testing::ElementsAreArray;
using testing::Eq;
using testing::Gt;
using testing::IsEmpty;
using testing::IsFalse;
using testing::IsTrue;
using testing::Le;
using testing::Lt;
namespace icing {
namespace lib {
namespace {
struct HitElt {
HitElt() = default;
explicit HitElt(const Hit &hit_in) : hit(hit_in) {}
static Hit get_hit(const HitElt &hit_elt) { return hit_elt.hit; }
Hit hit;
};
TEST(PostingListHitSerializerTest, PostingListUsedPrependHitNotFull) {
PostingListHitSerializer serializer;
static const int kNumHits = 2551;
static const size_t kHitsSize = kNumHits * sizeof(Hit);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// Make used.
Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/56,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
// Size = sizeof(uncompressed hit0::Value)
// + sizeof(hit0::Flags)
// + sizeof(hit0::TermFrequency)
int expected_size =
sizeof(Hit::Value) + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
Hit hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = PostingListHitSerializer::EncodeNextHitValue(
/*next_hit_value=*/hit1.value(),
/*curr_hit_value=*/hit0.value(), delta_buf);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
// Size = sizeof(uncompressed hit1::Value)
// + sizeof(hit0-hit1)
// + sizeof(hit0::Flags)
// + sizeof(hit0::TermFrequency)
expected_size += delta_len;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit1, hit0)));
Hit hit2(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/56,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
delta_len = PostingListHitSerializer::EncodeNextHitValue(
/*next_hit_value=*/hit2.value(),
/*curr_hit_value=*/hit1.value(), delta_buf);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
// Size = sizeof(uncompressed hit2::Value) + sizeof(hit2::Flags)
// + sizeof(hit2::TermFrequency)
// + sizeof(hit1-hit2)
// + sizeof(hit0-hit1)
// + sizeof(hit0::flags)
// + sizeof(hit0::term_frequency)
expected_size += delta_len + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
Hit hit3(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
delta_len = PostingListHitSerializer::EncodeNextHitValue(
/*next_hit_value=*/hit3.value(),
/*curr_hit_value=*/hit2.value(), delta_buf);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
// Size = sizeof(uncompressed hit3::Value)
// + sizeof(hit2-hit3) + sizeof(hit2::Flags)
// + sizeof(hit2::TermFrequency)
// + sizeof(hit1-hit2)
// + sizeof(hit0-hit1)
// + sizeof(hit0::flags)
// + sizeof(hit0::term_frequency)
expected_size += delta_len;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
}
TEST(PostingListHitSerializerTest,
PostingListUsedPrependHitAlmostFull_withFlags) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Fill up the compressed region.
// Transitions:
// Adding hit0: EMPTY -> NOT_FULL
// Adding hit1: NOT_FULL -> NOT_FULL
// Adding hit2: NOT_FULL -> NOT_FULL
Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/true);
EXPECT_THAT(hit2.has_flags(), IsTrue());
EXPECT_THAT(hit2.has_term_frequency(), IsTrue());
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
// Size used will be 4 (hit2::Value) + 1 (hit2::Flags) + 1
// (hit2::TermFrequency) + 2 (hit1-hit2) + 3 (hit0-hit1) = 11 bytes
int expected_size = sizeof(Hit::Value) + sizeof(Hit::Flags) +
sizeof(Hit::TermFrequency) + 2 + 3;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
// Add one more hit to transition NOT_FULL -> ALMOST_FULL
Hit hit3 =
CreateHit(hit2, /*desired_byte_length=*/3, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(hit3.has_flags(), IsFalse());
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
// Storing them in the compressed region requires 4 (hit3::Value) + 3
// (hit2-hit3) + 1 (hit2::Flags) + 1 (hit2::TermFrequency) + 2 (hit1-hit2) + 3
// (hit0-hit1) = 14 bytes, but there are only 12 bytes in the compressed
// region. So instead, the posting list will transition to ALMOST_FULL.
// The in-use compressed region will actually shrink from 11 bytes to 10 bytes
// because the uncompressed version of hit2 will be overwritten with the
// compressed delta of hit2. hit3 will be written to one of the special hits.
// Because we're in ALMOST_FULL, the expected size is the size of the pl minus
// the one hit used to mark the posting list as ALMOST_FULL.
expected_size = pl_size - sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4));
// There are currently 10 bytes in use in the compressed region. hit3 will
// have a 2-byte delta, which fits in the compressed region. Hit3 will be
// moved from the special hit to the compressed region (which will have 12
// bytes in use after adding hit3). Hit4 will be placed in one of the special
// hits and the posting list will remain in ALMOST_FULL.
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> FULL
Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5));
// There are currently 12 bytes in use in the compressed region. hit4 will
// have a 2-byte delta which will not fit in the compressed region. So hit4
// will remain in one of the special hits and hit5 will occupy the other,
// making the posting list FULL.
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
// The posting list is FULL. Adding another hit should fail.
Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
EXPECT_THAT(serializer.PrependHit(&pl_used, hit6),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Fill up the compressed region.
// Transitions:
// Adding hit0: EMPTY -> NOT_FULL
// Adding hit1: NOT_FULL -> NOT_FULL
// Adding hit2: NOT_FULL -> NOT_FULL
Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/3);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
// Size used will be 4 (hit2::Value) + 3 (hit1-hit2) + 3 (hit0-hit1)
// = 10 bytes
int expected_size = sizeof(Hit::Value) + 3 + 3;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
// Add one more hit to transition NOT_FULL -> ALMOST_FULL
Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
// Storing them in the compressed region requires 4 (hit3::Value) + 3
// (hit2-hit3) + 3 (hit1-hit2) + 3 (hit0-hit1) = 13 bytes, but there are only
// 12 bytes in the compressed region. So instead, the posting list will
// transition to ALMOST_FULL.
// The in-use compressed region will actually shrink from 10 bytes to 9 bytes
// because the uncompressed version of hit2 will be overwritten with the
// compressed delta of hit2. hit3 will be written to one of the special hits.
// Because we're in ALMOST_FULL, the expected size is the size of the pl minus
// the one hit used to mark the posting list as ALMOST_FULL.
expected_size = pl_size - sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4));
// There are currently 9 bytes in use in the compressed region. Hit3 will
// have a 2-byte delta, which fits in the compressed region. Hit3 will be
// moved from the special hit to the compressed region (which will have 11
// bytes in use after adding hit3). Hit4 will be placed in one of the special
// hits and the posting list will remain in ALMOST_FULL.
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
// Add one more hit to transition ALMOST_FULL -> FULL
Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5));
// There are currently 11 bytes in use in the compressed region. Hit4 will
// have a 2-byte delta which will not fit in the compressed region. So hit4
// will remain in one of the special hits and hit5 will occupy the other,
// making the posting list FULL.
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
// The posting list is FULL. Adding another hit should fail.
Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
EXPECT_THAT(serializer.PrependHit(&pl_used, hit6),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
TEST(PostingListHitSerializerTest, PrependHitsWithSameValue) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Fill up the compressed region.
Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/true);
// Create hit3 with the same value but different flags as hit2 (hit3_flags
// is set to have all currently-defined flags enabled)
Hit::Flags hit3_flags = 0;
for (int i = 0; i < Hit::kNumFlagsInFlagsField; ++i) {
hit3_flags |= (1 << i);
}
Hit hit3(hit2.value(), /*term_frequency=*/hit2.term_frequency(),
/*flags=*/hit3_flags);
// hit3 is larger than hit2 (its flag value is larger), and so needs to be
// prepended first
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
// Posting list is now ALMOST_FULL
// ----------------------
// 23-21 delta(Hit #0)
// 20-19 delta(Hit #1)
// 18 term-frequency(Hit #2)
// 17 flags(Hit #2)
// 16 delta(Hit #2) = 0
// 15-12 <unused padding = 0>
// 11-6 Hit #3
// 5-0 kSpecialHit
// ----------------------
int bytes_used = pl_size - sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit2, hit3, hit1, hit0)));
}
TEST(PostingListHitSerializerTest, PostingListUsedMinSize) {
PostingListHitSerializer serializer;
// Min size = 12
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
&serializer, serializer.GetMinPostingListSize()));
// PL State: EMPTY
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
// Add a hit, PL should shift to ALMOST_FULL state
Hit hit0(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/0,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
// Add the smallest hit possible - no term_frequency, non-prefix hit and a
// delta of 0b10. PL should shift to FULL state.
Hit hit1(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false);
ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
// Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
expected_size += sizeof(Hit);
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit1, hit0)));
// Try to add the smallest hit possible. Should fail
Hit hit2(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false);
EXPECT_THAT(serializer.PrependHit(&pl_used, hit2),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAre(hit1, hit0)));
}
TEST(PostingListHitSerializerTest,
PostingListPrependHitArrayMinSizePostingList) {
PostingListHitSerializer serializer;
// Min Size = 12
int pl_size = serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0,
Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
std::reverse(hits_in.begin(), hits_in.end());
// Add five hits. The PL is in the empty state and an empty min size PL can
// only fit two hits. So PrependHitArray should fail.
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t num_can_prepend,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
EXPECT_THAT(num_can_prepend, Eq(2));
int can_fit_hits = num_can_prepend;
// The PL has room for 2 hits. We should be able to add them without any
// problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL
const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
ICING_ASSERT_OK_AND_ASSIGN(
num_can_prepend,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, hits_in_ptr, can_fit_hits, /*keep_prepended=*/false)));
EXPECT_THAT(num_can_prepend, Eq(can_fit_hits));
EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used)));
std::deque<Hit> hits_pushed;
std::transform(hits_in.rbegin(),
hits_in.rend() - hits_in.size() + can_fit_hits,
std::front_inserter(hits_pushed), HitElt::get_hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
PostingListHitSerializer serializer;
// Size = 36
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<HitElt> hits_in;
hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0,
Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
std::reverse(hits_in.begin(), hits_in.end());
// The last hit is uncompressed and the four before it should only take one
// byte. Total use = 8 bytes.
// ----------------------
// 35 delta(Hit #0)
// 34 delta(Hit #1)
// 33 delta(Hit #2)
// 32 delta(Hit #3)
// 31-28 Hit #4
// 27-12 <unused>
// 11-6 kSpecialHit
// 5-0 Offset=28
// ----------------------
int byte_size = sizeof(Hit::Value) + hits_in.size() - 1;
// Add five hits. The PL is in the empty state and should be able to fit all
// five hits without issue, transitioning the PL from EMPTY -> NOT_FULL.
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
std::deque<Hit> hits_pushed;
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
Hit first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
hits_in.clear();
hits_in.emplace_back(first_hit);
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
std::reverse(hits_in.begin(), hits_in.end());
// Size increased by the deltas of these hits (1+2+1+2+3+2+3) = 14 bytes
// ----------------------
// 35 delta(Hit #0)
// 34 delta(Hit #1)
// 33 delta(Hit #2)
// 32 delta(Hit #3)
// 31 delta(Hit #4)
// 30-29 delta(Hit #5)
// 28 delta(Hit #6)
// 27-26 delta(Hit #7)
// 25-23 delta(Hit #8)
// 22-21 delta(Hit #9)
// 20-18 delta(Hit #10)
// 17-14 Hit #11
// 13-12 <unused>
// 11-6 kSpecialHit
// 5-0 Offset=14
// ----------------------
byte_size += 14;
// Add these 7 hits. The PL is currently in the NOT_FULL state and should
// remain in the NOT_FULL state.
ICING_ASSERT_OK_AND_ASSIGN(
num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/3);
hits_in.clear();
hits_in.emplace_back(first_hit);
// ----------------------
// 35 delta(Hit #0)
// 34 delta(Hit #1)
// 33 delta(Hit #2)
// 32 delta(Hit #3)
// 31 delta(Hit #4)
// 30-29 delta(Hit #5)
// 28 delta(Hit #6)
// 27-26 delta(Hit #7)
// 25-23 delta(Hit #8)
// 22-21 delta(Hit #9)
// 20-18 delta(Hit #10)
// 17-15 delta(Hit #11)
// 14-12 <unused>
// 11-6 Hit #12
// 5-0 kSpecialHit
// ----------------------
byte_size = 30; // 36 - 6
// Add this 1 hit. The PL is currently in the NOT_FULL state and should
// transition to the ALMOST_FULL state - even though there is still some
// unused space. This is because the unused space (3 bytes) is less than
// the size of a uncompressed Hit.
ICING_ASSERT_OK_AND_ASSIGN(
num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
first_hit = CreateHit(hits_in.begin()->hit, /*desired_byte_length=*/1);
hits_in.clear();
hits_in.emplace_back(first_hit);
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
std::reverse(hits_in.begin(), hits_in.end());
// ----------------------
// 35 delta(Hit #0)
// 34 delta(Hit #1)
// 33 delta(Hit #2)
// 32 delta(Hit #3)
// 31 delta(Hit #4)
// 30-29 delta(Hit #5)
// 28 delta(Hit #6)
// 27-26 delta(Hit #7)
// 25-23 delta(Hit #8)
// 22-21 delta(Hit #9)
// 20-18 delta(Hit #10)
// 17-15 delta(Hit #11)
// 14 delta(Hit #12)
// 13-12 <unused>
// 11-6 Hit #13
// 5-0 Hit #14
// ----------------------
// Add these 2 hits.
// - The PL is currently in the ALMOST_FULL state. Adding the first hit should
// keep the PL in ALMOST_FULL because the delta between
// Hit #13 and Hit #14 (1 byte) can fit in the unused area (3 bytes).
// - Adding the second hit should transition to the FULL state because the
// delta between Hit #14 and Hit #15 (3 bytes) is larger than the remaining
// unused area (2 byte).
ICING_ASSERT_OK_AND_ASSIGN(
num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used)));
// All hits from hits_in were added.
std::transform(hits_in.rbegin(), hits_in.rend(),
std::front_inserter(hits_pushed), HitElt::get_hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
TEST(PostingListHitSerializerTest, PostingListPrependHitArrayTooManyHits) {
PostingListHitSerializer serializer;
static constexpr int kNumHits = 128;
static constexpr int kDeltaSize = 1;
static constexpr size_t kHitsSize =
((kNumHits - 2) * kDeltaSize + (2 * sizeof(Hit))) / sizeof(Hit) *
sizeof(Hit);
// Create an array with one too many hits
std::vector<Hit> hits_in_too_many =
CreateHits(kNumHits + 1, /*desired_byte_length=*/1);
std::vector<HitElt> hit_elts_in_too_many;
for (const Hit &hit : hits_in_too_many) {
hit_elts_in_too_many.emplace_back(hit);
}
// Reverse so that hits are inserted in descending order
std::reverse(hit_elts_in_too_many.begin(), hit_elts_in_too_many.end());
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(
&serializer, serializer.GetMinPostingListSize()));
// PrependHitArray should fail because hit_elts_in_too_many is far too large
// for the minimum size pl.
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
/*keep_prepended=*/false)));
ASSERT_THAT(num_could_fit, Eq(2));
ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
ICING_ASSERT_OK_AND_ASSIGN(
pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
// PrependHitArray should fail because hit_elts_in_too_many is one hit too
// large for this pl.
ICING_ASSERT_OK_AND_ASSIGN(
num_could_fit,
(serializer.PrependHitArray<HitElt, HitElt::get_hit>(
&pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
/*keep_prepended=*/false)));
ASSERT_THAT(num_could_fit, Eq(hit_elts_in_too_many.size() - 1));
ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
}
TEST(PostingListHitSerializerTest,
PostingListStatusJumpFromNotFullToFullAndBack) {
PostingListHitSerializer serializer;
// Size = 18
const uint32_t pl_size = 3 * sizeof(Hit);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
Hit max_valued_hit(kMaxSectionId, kMinDocumentId, Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
ICING_ASSERT_OK(serializer.PrependHit(&pl, max_valued_hit));
uint32_t bytes_used = serializer.GetBytesUsed(&pl);
ASSERT_THAT(bytes_used, sizeof(Hit::Value) + sizeof(Hit::Flags) +
sizeof(Hit::TermFrequency));
// Status not full.
ASSERT_THAT(bytes_used,
Le(pl_size - PostingListHitSerializer::kSpecialHitsSize));
Hit min_valued_hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = PostingListHitSerializer::EncodeNextHitValue(
/*next_hit_value=*/min_valued_hit.value(),
/*curr_hit_value=*/max_valued_hit.value(), delta_buf);
// The compressed region available is pl_size - 2 * sizeof(specialHits) = 6
// We need to also fit max_valued_hit's flags and term-frequency fields, which
// each take 1 byte
// So we'll jump directly to FULL if the varint-encoded delta of the 2 hits >
// 6 - sizeof(Hit::Flags) - sizeof(Hit::TermFrequency) = 4
ASSERT_THAT(delta_len, Gt(4));
ICING_ASSERT_OK(serializer.PrependHit(
&pl, Hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true)));
// Status should jump to full directly.
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size));
ICING_ASSERT_OK(serializer.PopFrontHits(&pl, 1));
// Status should return to not full as before.
ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(bytes_used));
}
TEST(PostingListHitSerializerTest, DeltaOverflow) {
PostingListHitSerializer serializer;
const uint32_t pl_size = 4 * sizeof(Hit);
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
static const Hit::Value kMaxHitValue = std::numeric_limits<Hit::Value>::max();
static const Hit::Value kOverflow[4] = {
kMaxHitValue >> 2,
(kMaxHitValue >> 2) * 2,
(kMaxHitValue >> 2) * 3,
kMaxHitValue - 1,
};
// Fit at least 4 ordinary values.
for (Hit::Value v = 0; v < 4; v++) {
ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(4 - v)));
}
// Cannot fit 4 overflow values.
ICING_ASSERT_OK_AND_ASSIGN(
pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
Hit::Flags has_term_frequency_flags = 0b1;
ICING_EXPECT_OK(serializer.PrependHit(
&pl, Hit(/*value=*/kOverflow[3], has_term_frequency_flags,
/*term_frequency=*/8)));
ICING_EXPECT_OK(serializer.PrependHit(
&pl, Hit(/*value=*/kOverflow[2], has_term_frequency_flags,
/*term_frequency=*/8)));
// Can fit only one more.
ICING_EXPECT_OK(serializer.PrependHit(
&pl, Hit(/*value=*/kOverflow[1], has_term_frequency_flags,
/*term_frequency=*/8)));
EXPECT_THAT(serializer.PrependHit(
&pl, Hit(/*value=*/kOverflow[0], has_term_frequency_flags,
/*term_frequency=*/8)),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
}
TEST(PostingListHitSerializerTest, GetMinPostingListToFitForNotFullPL) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Create and add some hits to make pl_used NOT_FULL
std::vector<Hit> hits_in =
CreateHits(/*num_hits=*/7, /*desired_byte_length=*/1);
for (const Hit &hit : hits_in) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
}
// ----------------------
// 23 delta(Hit #0)
// 22 delta(Hit #1)
// 21 delta(Hit #2)
// 20 delta(Hit #3)
// 19 delta(Hit #4)
// 18 delta(Hit #5)
// 17-14 Hit #6
// 13-12 <unused>
// 11-6 kSpecialHit
// 5-0 Offset=14
// ----------------------
int bytes_used = 10;
// Check that all hits have been inserted
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
// Get the min size to fit for the hits in pl_used. Moving the hits in pl_used
// into a posting list with this min size should make it ALMOST_FULL, which we
// can see should have size = 18.
// ----------------------
// 17 delta(Hit #0)
// 16 delta(Hit #1)
// 15 delta(Hit #2)
// 14 delta(Hit #3)
// 13 delta(Hit #4)
// 12 delta(Hit #5)
// 11-6 Hit #6
// 5-0 kSpecialHit
// ----------------------
int expected_min_size = 18;
uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
EXPECT_THAT(min_size_to_fit, Eq(expected_min_size));
// Also check that this min size to fit posting list actually does fit all the
// hits and can only hit one more hit in the ALMOST_FULL state.
ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed min_size_to_fit_pl,
PostingListUsed::CreateFromUnitializedRegion(
&serializer, min_size_to_fit));
for (const Hit &hit : hits_in) {
ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit));
}
// Adding another hit to the min-size-to-fit posting list should succeed
Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1);
ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit));
// Adding any other hits should fail with RESOURCE_EXHAUSTED error.
EXPECT_THAT(serializer.PrependHit(&min_size_to_fit_pl,
CreateHit(hit, /*desired_byte_length=*/1)),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
// Check that all hits have been inserted and the min-fit posting list is now
// FULL.
EXPECT_THAT(serializer.GetBytesUsed(&min_size_to_fit_pl),
Eq(min_size_to_fit));
hits_pushed.emplace_front(hit);
EXPECT_THAT(serializer.GetHits(&min_size_to_fit_pl),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
}
TEST(PostingListHitSerializerTest, GetMinPostingListToFitForTwoHits) {
PostingListHitSerializer serializer;
// Size = 36
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Create and add 2 hits
Hit first_hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
std::vector<Hit> hits_in =
CreateHits(first_hit, /*num_hits=*/2, /*desired_byte_length=*/4);
for (const Hit &hit : hits_in) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
}
// ----------------------
// 35 term-frequency(Hit #0)
// 34 flags(Hit #0)
// 33-30 delta(Hit #0)
// 29 term-frequency(Hit #1)
// 28 flags(Hit #1)
// 27-24 Hit #1
// 23-12 <unused>
// 11-6 kSpecialHit
// 5-0 Offset=24
// ----------------------
int bytes_used = 12;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
// GetMinPostingListSizeToFit should return min posting list size.
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
Eq(serializer.GetMinPostingListSize()));
}
TEST(PostingListHitSerializerTest, GetMinPostingListToFitForThreeSmallHits) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Create and add 3 small hits that fit in the size range where we should be
// checking for whether the PL has only 2 hits
std::vector<Hit> hits_in =
CreateHits(/*num_hits=*/3, /*desired_byte_length=*/1);
for (const Hit &hit : hits_in) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
}
// ----------------------
// 23 delta(Hit #0)
// 22 delta(Hit #1)
// 21-18 Hit #2
// 17-12 <unused>
// 11-6 kSpecialHit
// 5-0 Offset=18
// ----------------------
int bytes_used = 6;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
// Get the min size to fit for the hits in pl_used. Moving the hits in pl_used
// into a posting list with this min size should make it ALMOST_FULL, which we
// can see should have size = 14. This should not return the min posting list
// size.
// ----------------------
// 13 delta(Hit #0)
// 12 delta(Hit #1)
// 11-6 Hit #2
// 5-0 kSpecialHit
// ----------------------
int expected_min_size = 14;
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
Gt(serializer.GetMinPostingListSize()));
EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
Eq(expected_min_size));
}
TEST(PostingListHitSerializerTest,
GetMinPostingListToFitForAlmostFullAndFullPLReturnsSameSize) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// Create and add some hits to make pl_used ALMOST_FULL
std::vector<Hit> hits_in =
CreateHits(/*num_hits=*/7, /*desired_byte_length=*/2);
for (const Hit &hit : hits_in) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
}
// ----------------------
// 23-22 delta(Hit #0)
// 21-20 delta(Hit #1)
// 19-18 delta(Hit #2)
// 17-16 delta(Hit #3)
// 15-14 delta(Hit #4)
// 13-12 delta(Hit #5)
// 11-6 Hit #6
// 5-0 kSpecialHit
// ----------------------
int bytes_used = 18;
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
// GetMinPostingListSizeToFit should return the same size as pl_used.
uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
EXPECT_THAT(min_size_to_fit, Eq(pl_size));
// Add another hit to make the posting list FULL
Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
hits_pushed.emplace_front(hit);
EXPECT_THAT(serializer.GetHits(&pl_used),
IsOkAndHolds(ElementsAreArray(hits_pushed)));
// GetMinPostingListSizeToFit should still be the same size as pl_used.
min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
EXPECT_THAT(min_size_to_fit, Eq(pl_size));
}
TEST(PostingListHitSerializerTest, MoveFrom) {
PostingListHitSerializer serializer;
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
}
ICING_ASSERT_OK(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1));
EXPECT_THAT(serializer.GetHits(&pl_used2),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
EXPECT_THAT(serializer.GetHits(&pl_used1), IsOkAndHolds(IsEmpty()));
}
TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) {
PostingListHitSerializer serializer;
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used1, /*src=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(serializer.GetHits(&pl_used1),
IsOkAndHolds(ElementsAreArray(hits.rbegin(), hits.rend())));
}
TEST(PostingListHitSerializerTest,
MoveFromInvalidPostingListReturnsInvalidArgument) {
PostingListHitSerializer serializer;
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
}
// Write invalid hits to the beginning of pl_used1 to make it invalid.
Hit invalid_hit(Hit::kInvalidValue);
Hit *first_hit = reinterpret_cast<Hit *>(pl_used1.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(serializer.GetHits(&pl_used2),
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
TEST(PostingListHitSerializerTest,
MoveToInvalidPostingListReturnsFailedPrecondition) {
PostingListHitSerializer serializer;
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
}
// Write invalid hits to the beginning of pl_used2 to make it invalid.
Hit invalid_hit(Hit::kInvalidValue);
Hit *first_hit = reinterpret_cast<Hit *>(pl_used2.posting_list_buffer());
*first_hit = invalid_hit;
++first_hit;
*first_hit = invalid_hit;
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(serializer.GetHits(&pl_used1),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
}
TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) {
PostingListHitSerializer serializer;
int pl_size = 3 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used1,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit &hit : hits1) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
}
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used2,
PostingListUsed::CreateFromUnitializedRegion(
&serializer, serializer.GetMinPostingListSize()));
std::vector<Hit> hits2 =
CreateHits(/*num_hits=*/1, /*desired_byte_length=*/2);
for (const Hit &hit : hits2) {
ICING_ASSERT_OK(serializer.PrependHit(&pl_used2, hit));
}
EXPECT_THAT(serializer.MoveFrom(/*dst=*/&pl_used2, /*src=*/&pl_used1),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
EXPECT_THAT(serializer.GetHits(&pl_used1),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
EXPECT_THAT(serializer.GetHits(&pl_used2),
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
TEST(PostingListHitSerializerTest, PopHitsWithTermFrequenciesAndFlags) {
PostingListHitSerializer serializer;
// Size = 24
int pl_size = 2 * serializer.GetMinPostingListSize();
ICING_ASSERT_OK_AND_ASSIGN(
PostingListUsed pl_used,
PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
// This posting list is 24-bytes. Create four hits that will have deltas of
// two bytes each and all of whom will have a non-default term-frequency. This
// posting list will be almost_full.
//
// ----------------------
// 23 term-frequency(Hit #0)
// 22 flags(Hit #0)
// 21-20 delta(Hit #0)
// 19 term-frequency(Hit #1)
// 18 flags(Hit #1)
// 17-16 delta(Hit #1)
// 15 term-frequency(Hit #2)
// 14 flags(Hit #2)
// 13-12 delta(Hit #2)
// 11-6 Hit #3
// 5-0 kInvalidHit
// ----------------------
int bytes_used = 18;
Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/5,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2);
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out,
serializer.GetHits(&pl_used));
EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
// Now, pop the last hit. The posting list should contain the first three
// hits.
//
// ----------------------
// 23 term-frequency(Hit #0)
// 22 flags(Hit #0)
// 21-20 delta(Hit #0)
// 19 term-frequency(Hit #1)
// 18 flags(Hit #1)
// 17-16 delta(Hit #1)
// 15-12 <unused>
// 11-6 Hit #2
// 5-0 kInvalidHit
// ----------------------
ICING_ASSERT_OK(serializer.PopFrontHits(&pl_used, 1));
ICING_ASSERT_OK_AND_ASSIGN(hits_out, serializer.GetHits(&pl_used));
EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0));
EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
}
} // namespace
} // namespace lib
} // namespace icing