Update Icing from upstream. Descriptions: ======================================================================== Allow adding duplicate hits with the same value into the posting list. ======================================================================== Support an extension to the encoded Hit in Icing's posting list. ======================================================================== Adds description fields to SchemaTypeConfigProto and PropertyConfigProto ======================================================================== Support polymorphism in type property filters ======================================================================== Fix posting list GetMinPostingListSizeToFit size calculation bug ======================================================================== Add instructions to error message for advanced query backward compat. ======================================================================== BUG: 294274922 BUG: 321107391 BUG: 324908653 BUG: 326987971 Change-Id: I9e8e0589a9682ee14ac34ded32b959c1ef64d84d

commit: 29d4712b67d1ade154739e5fa9a9a7970afe6c0a [log] [tgz]
author: Tim Barron <tjbarron@google.com> Thu Mar 07 00:42:57 2024 +0000
committer: Tim Barron <tjbarron@google.com> Tue Mar 12 15:41:59 2024 +0000
tree: 0a26da402c204c50e001539cffc89abbe51b08b6
parent: 030b99d2648aea03e5c2e3809a13fdfacdccde11 [diff]
diff --git a/icing/file/posting_list/flash-index-storage-header.h b/icing/file/posting_list/flash-index-storage-header.h
index 6bbf1ba..f7b331c 100644
--- a/icing/file/posting_list/flash-index-storage-header.h
+++ b/icing/file/posting_list/flash-index-storage-header.h

@@ -33,7 +33,7 @@
   // The class used to access the actual header.
   struct Header {
     // A magic used to mark the beginning of a valid header.
-    static constexpr int kMagic = 0xb0780cf4;
+    static constexpr int kMagic = 0xd1b7b293;
     int magic;
     int block_size;
     int last_indexed_docid;

diff --git a/icing/file/posting_list/flash-index-storage_test.cc b/icing/file/posting_list/flash-index-storage_test.cc
index ef60037..203041e 100644
--- a/icing/file/posting_list/flash-index-storage_test.cc
+++ b/icing/file/posting_list/flash-index-storage_test.cc

@@ -16,9 +16,9 @@
 
 #include <unistd.h>
 
-#include <algorithm>
-#include <cstdlib>
-#include <limits>
+#include <cstdint>
+#include <memory>
+#include <string>
 #include <utility>
 #include <vector>
 
@@ -27,6 +27,7 @@
 #include "gtest/gtest.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/posting_list/flash-index-storage-header.h"
+#include "icing/file/posting_list/posting-list-identifier.h"
 #include "icing/index/hit/hit.h"
 #include "icing/index/main/posting-list-hit-serializer.h"
 #include "icing/store/document-id.h"
@@ -213,10 +214,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits1 = {
-        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
-        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
-        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits1) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
@@ -237,10 +242,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits2 = {
-        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
-        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
-        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits2) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
@@ -273,10 +282,14 @@
     EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
                 IsOkAndHolds(IsEmpty()));
     std::vector<Hit> hits3 = {
-        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
-        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
-        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
-        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits3) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
@@ -314,10 +327,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits1 = {
-        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
-        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
-        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits1) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
@@ -338,10 +355,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits2 = {
-        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
-        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
-        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits2) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
@@ -374,10 +395,14 @@
     EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
                 IsOkAndHolds(IsEmpty()));
     std::vector<Hit> hits3 = {
-        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
-        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
-        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
-        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+        Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits3) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
@@ -417,10 +442,14 @@
       EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
       std::vector<Hit> hits1 = {
-          Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
-          Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
-          Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
-          Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+          Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
       for (const Hit& hit : hits1) {
         ICING_ASSERT_OK(
             serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
@@ -441,10 +470,14 @@
       EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
       std::vector<Hit> hits2 = {
-          Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
-          Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
-          Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
-          Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+          Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
       for (const Hit& hit : hits2) {
         ICING_ASSERT_OK(
             serializer_->PrependHit(&posting_list_holder2.posting_list, hit));
@@ -492,10 +525,14 @@
       EXPECT_THAT(serializer_->GetHits(&posting_list_holder3.posting_list),
                   IsOkAndHolds(IsEmpty()));
       std::vector<Hit> hits3 = {
-          Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
-          Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
-          Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
-          Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
+          Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+          Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
       for (const Hit& hit : hits3) {
         ICING_ASSERT_OK(
             serializer_->PrependHit(&posting_list_holder3.posting_list, hit));
@@ -534,10 +571,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits1 = {
-        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
-        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
-        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
+        Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits1) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder1.posting_list, hit));
@@ -561,10 +602,14 @@
     EXPECT_THAT(flash_index_storage.empty(), IsFalse());
 
     std::vector<Hit> hits2 = {
-        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
-        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
-        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
-        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
+        Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+        Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false)};
     for (const Hit& hit : hits2) {
       ICING_ASSERT_OK(
           serializer_->PrependHit(&posting_list_holder2.posting_list, hit));

diff --git a/icing/file/posting_list/index-block_test.cc b/icing/file/posting_list/index-block_test.cc
index ebc9ba4..d841e79 100644
--- a/icing/file/posting_list/index-block_test.cc
+++ b/icing/file/posting_list/index-block_test.cc

@@ -14,11 +14,16 @@
 
 #include "icing/file/posting_list/index-block.h"
 
+#include <memory>
+#include <string>
+#include <vector>
+
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/file/filesystem.h"
-#include "icing/file/posting_list/posting-list-used.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/index/hit/hit.h"
 #include "icing/index/main/posting-list-hit-serializer.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
@@ -67,7 +72,7 @@
 };
 
 TEST_F(IndexBlockTest, CreateFromUninitializedRegionProducesEmptyBlock) {
-  constexpr int kPostingListBytes = 20;
+  constexpr int kPostingListBytes = 24;
 
   {
     // Create an IndexBlock from this newly allocated file block.
@@ -80,7 +85,7 @@
 }
 
 TEST_F(IndexBlockTest, SizeAccessorsWorkCorrectly) {
-  constexpr int kPostingListBytes1 = 20;
+  constexpr int kPostingListBytes1 = 24;
 
   // Create an IndexBlock from this newly allocated file block.
   ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
@@ -88,13 +93,13 @@
                                  &filesystem_, serializer_.get(), sfd_->get(),
                                  /*offset=*/0, kBlockSize, kPostingListBytes1));
   EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes1));
-  // There should be (4096 - 12) / 20 = 204 posting lists
-  // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 203 in only 8
+  // There should be (4096 - 12) / 24 = 170 posting lists
+  // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 170 in only 8
   // bits.
-  EXPECT_THAT(block.max_num_posting_lists(), Eq(204));
+  EXPECT_THAT(block.max_num_posting_lists(), Eq(170));
   EXPECT_THAT(block.posting_list_index_bits(), Eq(8));
 
-  constexpr int kPostingListBytes2 = 200;
+  constexpr int kPostingListBytes2 = 240;
 
   // Create an IndexBlock from this newly allocated file block.
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -102,22 +107,27 @@
                  &filesystem_, serializer_.get(), sfd_->get(), /*offset=*/0,
                  kBlockSize, kPostingListBytes2));
   EXPECT_THAT(block.posting_list_bytes(), Eq(kPostingListBytes2));
-  // There should be (4096 - 12) / 200 = 20 posting lists
+  // There should be (4096 - 12) / 240 = 17 posting lists
   // (sizeof(BlockHeader)==12). We can store a PostingListIndex of 19 in only 5
   // bits.
-  EXPECT_THAT(block.max_num_posting_lists(), Eq(20));
+  EXPECT_THAT(block.max_num_posting_lists(), Eq(17));
   EXPECT_THAT(block.posting_list_index_bits(), Eq(5));
 }
 
 TEST_F(IndexBlockTest, IndexBlockChangesPersistAcrossInstances) {
-  constexpr int kPostingListBytes = 2000;
+  constexpr int kPostingListBytes = 2004;
 
   std::vector<Hit> test_hits{
-      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
-      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
-      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   PostingListIndex allocated_index;
   {
@@ -158,21 +168,31 @@
 }
 
 TEST_F(IndexBlockTest, IndexBlockMultiplePostingLists) {
-  constexpr int kPostingListBytes = 2000;
+  constexpr int kPostingListBytes = 2004;
 
   std::vector<Hit> hits_in_posting_list1{
-      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
-      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
-      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   std::vector<Hit> hits_in_posting_list2{
-      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
-      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
-      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
-      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   PostingListIndex allocated_index_1;
   PostingListIndex allocated_index_2;
@@ -242,7 +262,7 @@
 }
 
 TEST_F(IndexBlockTest, IndexBlockReallocatingPostingLists) {
-  constexpr int kPostingListBytes = 2000;
+  constexpr int kPostingListBytes = 2004;
 
   // Create an IndexBlock from this newly allocated file block.
   ICING_ASSERT_OK_AND_ASSIGN(IndexBlock block,
@@ -252,11 +272,16 @@
 
   // Add hits to the first posting list.
   std::vector<Hit> hits_in_posting_list1{
-      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
-      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
-      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_1,
                              block.AllocatePostingList());
@@ -270,11 +295,16 @@
 
   // Add hits to the second posting list.
   std::vector<Hit> hits_in_posting_list2{
-      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
-      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
-      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
-      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
+      Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_2,
                              block.AllocatePostingList());
@@ -296,9 +326,12 @@
   EXPECT_THAT(block.HasFreePostingLists(), IsOkAndHolds(IsTrue()));
 
   std::vector<Hit> hits_in_posting_list3{
-      Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
-      Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
-      Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
+      Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
+      Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2,
+            /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false),
   };
   ICING_ASSERT_OK_AND_ASSIGN(IndexBlock::PostingListAndBlockInfo alloc_info_3,
                              block.AllocatePostingList());
@@ -317,7 +350,7 @@
 }
 
 TEST_F(IndexBlockTest, IndexBlockNextBlockIndex) {
-  constexpr int kPostingListBytes = 2000;
+  constexpr int kPostingListBytes = 2004;
   constexpr int kSomeBlockIndex = 22;
 
   {

diff --git a/icing/icing-search-engine_search_test.cc b/icing/icing-search-engine_search_test.cc
index 22f42e7..d815f61 100644
--- a/icing/icing-search-engine_search_test.cc
+++ b/icing/icing-search-engine_search_test.cc

@@ -3716,6 +3716,109 @@
   EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
 }
 
+TEST_P(IcingSearchEngineSearchTest, SearchWithPropertyFiltersPolymorphism) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("Artist")
+                       .AddParentType("Person")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("name")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("emailAddress")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("company")
+                                        .SetDataTypeString(TERM_MATCH_PREFIX,
+                                                           TOKENIZER_PLAIN)
+                                        .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  // Add a person document and an artist document
+  DocumentProto document_person =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Person")
+          .AddStringProperty("name", "Meg Ryan")
+          .AddStringProperty("emailAddress", "shopgirl@aol.com")
+          .Build();
+  DocumentProto document_artist =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Artist")
+          .AddStringProperty("name", "Meg Artist")
+          .AddStringProperty("emailAddress", "artist@aol.com")
+          .AddStringProperty("company", "company")
+          .Build();
+  ASSERT_THAT(icing.Put(document_person).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document_artist).status(), ProtoIsOk());
+
+  // Set a query with property filters of "name" in Person and "emailAddress"
+  // in Artist. By polymorphism, "name" should also apply to Artist.
+  auto search_spec = std::make_unique<SearchSpecProto>();
+  search_spec->set_term_match_type(TermMatchType::PREFIX);
+  search_spec->set_search_type(GetParam());
+  TypePropertyMask* person_type_property_mask =
+      search_spec->add_type_property_filters();
+  person_type_property_mask->set_schema_type("Person");
+  person_type_property_mask->add_paths("name");
+  TypePropertyMask* artist_type_property_mask =
+      search_spec->add_type_property_filters();
+  artist_type_property_mask->set_schema_type("Artist");
+  artist_type_property_mask->add_paths("emailAddress");
+
+  auto result_spec = std::make_unique<ResultSpecProto>();
+  auto scoring_spec = std::make_unique<ScoringSpecProto>();
+  *scoring_spec = GetDefaultScoringSpec();
+
+  // Verify that the property filter for "name" in Person is also applied to
+  // Artist.
+  search_spec->set_query("Meg");
+  SearchResultProto results =
+      icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_person));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_artist));
+
+  // Verify that the property filter for "emailAddress" in Artist is only
+  // applied to Artist.
+  search_spec->set_query("aol");
+  results = icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), SizeIs(1));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_artist));
+
+  // Verify that the "company" property is filtered out, since it is not
+  // specified in the property filter.
+  search_spec->set_query("company");
+  results = icing.Search(*search_spec, *scoring_spec, *result_spec);
+  EXPECT_THAT(results.status(), ProtoIsOk());
+  EXPECT_THAT(results.results(), IsEmpty());
+}
+
 TEST_P(IcingSearchEngineSearchTest, EmptySearchWithPropertyFilter) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());

diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 493e62b..7b7f2f6 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc

@@ -14,42 +14,19 @@
 
 #include "icing/index/hit/hit.h"
 
+#include <cstring>
+#include <limits>
+
+#include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/util/bit-util.h"
+#include "icing/util/logging.h"
 
 namespace icing {
 namespace lib {
 
 namespace {
 
-enum FlagOffset {
-  // This hit, whether exact or not, came from a prefixed section and will
-  // need to be backfilled into branching posting lists if/when those are
-  // created.
-  kInPrefixSection = 0,
-  // This hit represents a prefix of a longer term. If exact matches are
-  // required, then this hit should be ignored.
-  kPrefixHit = 1,
-  // Whether or not the hit has a term_frequency  other than
-  // kDefaultTermFrequency.
-  kHasTermFrequency = 2,
-  kNumFlags = 3,
-};
-
-static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <
-                  sizeof(Hit::Value) * 8,
-              "Hit::kInvalidValue contains risky value and we should have at "
-              "least one unused bit to avoid potential bugs. Please follow the "
-              "process mentioned in hit.h to correct the value of "
-              "Hit::kInvalidValue and remove this static_assert afterwards.");
-
-static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
-                  sizeof(Hit::Value) * 8,
-              "HitOverflow");
-static_assert(kDocumentIdBits == 22, "");
-static_assert(kSectionIdBits == 6, "");
-static_assert(kNumFlags == 3, "");
-
 inline DocumentId InvertDocumentId(DocumentId document_id) {
   static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1),
                 "(kMaxDocumentId + 1) must not overflow.");
@@ -88,10 +65,28 @@
                                /*len=*/kSectionIdBits);
 }
 
+Hit::Hit(Value value, Flags flags, TermFrequency term_frequency)
+    : flags_(flags), term_frequency_(term_frequency) {
+  memcpy(value_.data(), &value, sizeof(value));
+  if (!CheckFlagsAreConsistent()) {
+    ICING_VLOG(1)
+        << "Creating Hit that has inconsistent flag values across its fields: "
+        << "Hit(value=" << value << ", flags=" << flags
+        << "term_frequency=" << term_frequency << ")";
+  }
+}
+
 Hit::Hit(SectionId section_id, DocumentId document_id,
          Hit::TermFrequency term_frequency, bool is_in_prefix_section,
          bool is_prefix_hit)
     : term_frequency_(term_frequency) {
+  // We compute flags first as the value's has_flags bit depends on the flags_
+  // field.
+  Flags temp_flags = 0;
+  bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
+                        kHasTermFrequency, /*len=*/1, &temp_flags);
+  flags_ = temp_flags;
+
   // Values are stored so that when sorted, they appear in document_id
   // descending, section_id ascending, order. Also, all else being
   // equal, non-prefix hits sort before prefix hits. So inverted
@@ -99,30 +94,29 @@
   // (uninverted) section_id.
   Value temp_value = 0;
   bit_util::BitfieldSet(InvertDocumentId(document_id),
-                        kSectionIdBits + kNumFlags, kDocumentIdBits,
+                        kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits,
                         &temp_value);
-  bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
-  bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
-                        kHasTermFrequency, /*len=*/1, &temp_value);
+  bit_util::BitfieldSet(section_id, kNumFlagsInValueField, kSectionIdBits,
+                        &temp_value);
   bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
   bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
                         /*len=*/1, &temp_value);
-  value_ = temp_value;
+
+  bool has_flags = flags_ != kNoEnabledFlags;
+  bit_util::BitfieldSet(has_flags, kHasFlags, /*len=*/1, &temp_value);
+
+  memcpy(value_.data(), &temp_value, sizeof(temp_value));
 }
 
 DocumentId Hit::document_id() const {
   DocumentId inverted_document_id = bit_util::BitfieldGet(
-      value(), kSectionIdBits + kNumFlags, kDocumentIdBits);
+      value(), kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits);
   // Undo the document_id inversion.
   return InvertDocumentId(inverted_document_id);
 }
 
 SectionId Hit::section_id() const {
-  return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
-}
-
-bool Hit::has_term_frequency() const {
-  return bit_util::BitfieldGet(value(), kHasTermFrequency, 1);
+  return bit_util::BitfieldGet(value(), kNumFlagsInValueField, kSectionIdBits);
 }
 
 bool Hit::is_prefix_hit() const {
@@ -133,6 +127,27 @@
   return bit_util::BitfieldGet(value(), kInPrefixSection, 1);
 }
 
+bool Hit::has_flags() const {
+  return bit_util::BitfieldGet(value(), kHasFlags, 1);
+}
+
+bool Hit::has_term_frequency() const {
+  return bit_util::BitfieldGet(flags(), kHasTermFrequency, 1);
+}
+
+bool Hit::CheckFlagsAreConsistent() const {
+  bool has_flags = flags_ != kNoEnabledFlags;
+  bool has_flags_enabled_in_value =
+      bit_util::BitfieldGet(value(), kHasFlags, /*len=*/1);
+
+  bool has_term_frequency = term_frequency_ != kDefaultTermFrequency;
+  bool has_term_frequency_enabled_in_flags =
+      bit_util::BitfieldGet(flags(), kHasTermFrequency, /*len=*/1);
+
+  return has_flags == has_flags_enabled_in_value &&
+         has_term_frequency == has_term_frequency_enabled_in_flags;
+}
+
 Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) {
   return Hit(old_hit.section_id(), new_document_id, old_hit.term_frequency(),
              old_hit.is_in_prefix_section(), old_hit.is_prefix_hit());
@@ -140,7 +155,8 @@
 
 bool Hit::EqualsDocumentIdAndSectionId::operator()(const Hit& hit1,
                                                    const Hit& hit2) const {
-  return (hit1.value() >> kNumFlags) == (hit2.value() >> kNumFlags);
+  return (hit1.value() >> kNumFlagsInValueField) ==
+         (hit2.value() >> kNumFlagsInValueField);
 }
 
 }  // namespace lib

diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index 111b320..83fff12 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h

@@ -17,6 +17,7 @@
 
 #include <array>
 #include <cstdint>
+#include <cstring>
 #include <limits>
 
 #include "icing/legacy/core/icing-packed-pod.h"
@@ -70,6 +71,9 @@
 
  private:
   // Value bits layout: 4 unused + 22 document_id + 6 section id.
+  //
+  // The Value is guaranteed to be an unsigned integer, but its size and the
+  // information it stores may change if the Hit's encoding format is changed.
   Value value_;
 } __attribute__((packed));
 static_assert(sizeof(BasicHit) == 4, "");
@@ -78,51 +82,83 @@
 // consists of:
 // - a DocumentId
 // - a SectionId
-// referring to the document and section that the hit corresponds to, as well as
-// metadata about the hit:
-// - whether the Hit has a TermFrequency other than the default value
-// - whether the Hit does not appear exactly in the document, but instead
-//   represents a term that is a prefix of a term in the document
-// - whether the Hit came from a section that has prefix expansion enabled
-//   and a term frequency for the hit.
+//   - referring to the document and section that the hit corresponds to
+// - Metadata about the hit:
+//   - whether the Hit does not appear exactly in the document, but instead
+//     represents a term that is a prefix of a term in the document
+//     (is_prefix_hit)
+//   - whether the Hit came from a section that has prefix expansion enabled
+//     (is_in_prefix_section)
+//   - whether the Hit has set any bitmask flags (has_flags)
+//   - bitmasks in flags fields:
+//     - whether the Hit has a TermFrequency other than the default value
+//       (has_term_frequency)
+//   - a term frequency for the hit
 //
 // The hit is the most basic unit of the index and, when grouped together by
 // term, can be used to encode what terms appear in what documents.
 class Hit {
  public:
-  // The datatype used to encode Hit information: the document_id, section_id
-  // and the has_term_frequency, prefix hit and in prefix section flags.
+  // The datatype used to encode Hit information: the document_id, section_id,
+  // and 3 flags: is_prefix_hit, is_hit_in_prefix_section and has_flags flag.
+  //
+  // The Value is guaranteed to be an unsigned integer, but its size and the
+  // information it stores may change if the Hit's encoding format is changed.
   using Value = uint32_t;
 
   // WARNING: Changing this value will invalidate any pre-existing posting lists
   // on user devices.
   //
-  // WARNING:
-  // - Hit::kInvalidValue should contain inverted kInvalidDocumentId, which is
-  //   b'00...0. However, currently we set it as UINT32_MAX and actually it
-  //   contains b'11...1, which is the inverted document_id 0.
-  // - It means Hit::kInvalidValue contains valid (document_id, section_id,
-  //   flags), so we potentially cannot distinguish if a Hit is invalid or not.
-  //   The invalidity is an essential feature for posting list since we use it
-  //   to determine the state of the posting list.
-  // - The reason why it won't break the current posting list is because the
-  //   unused bit(s) are set as 1 for Hit::kInvalidValue and 0 for all valid
-  //   Hits. In other words, the unused bit(s) are actually serving as "invalid
-  //   flag".
-  // - If we want to exhaust all unused bits in the future, then we have to
-  //   change Hit::kInvalidValue to set the inverted document_id section
-  //   correctly (b'00...0, refer to BasicHit::kInvalidValue as an example).
-  // - Also this problem is guarded by static_assert in hit.cc. If exhausting
-  //   all unused bits, then the static_assert will detect and fail. We can
-  //   safely remove the static_assert check after following the above process
-  //   to resolve the incorrect Hit::kInvalidValue issue.
-  static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
+  // kInvalidValue contains:
+  // - 0 for unused bits. Note that unused bits are always 0 for both valid and
+  //   invalid Hit values.
+  // - Inverted kInvalidDocumentId
+  // - SectionId 0 (valid), which is ok because inverted kInvalidDocumentId has
+  //   already invalidated the value. In fact, we currently use all 2^6 section
+  //   ids and there is no "invalid section id", so it doesn't matter what
+  //   SectionId we set for kInvalidValue.
+  static constexpr Value kInvalidValue = 0;
   // Docs are sorted in reverse, and 0 is never used as the inverted
   // DocumentId (because it is the inverse of kInvalidValue), so it is always
   // the max in a descending sort.
   static constexpr Value kMaxDocumentIdSortValue = 0;
 
+  enum FlagOffsetsInFlagsField {
+    // Whether or not the hit has a term_frequency other than
+    // kDefaultTermFrequency.
+    kHasTermFrequency = 0,
+    kNumFlagsInFlagsField = 1,
+  };
+
+  enum FlagOffsetsInValueField {
+    // Whether or not the hit has a flags value other than kNoEnabledFlags (i.e.
+    // it has flags enabled in the flags field)
+    kHasFlags = 0,
+    // This hit, whether exact or not, came from a prefixed section and will
+    // need to be backfilled into branching posting lists if/when those are
+    // created.
+    kInPrefixSection = 1,
+    // This hit represents a prefix of a longer term. If exact matches are
+    // required, then this hit should be ignored.
+    kPrefixHit = 2,
+    kNumFlagsInValueField = 3,
+  };
+  static_assert(kDocumentIdBits + kSectionIdBits + kNumFlagsInValueField <=
+                    sizeof(Hit::Value) * 8,
+                "HitOverflow");
+  static_assert(kDocumentIdBits == 22, "");
+  static_assert(kSectionIdBits == 6, "");
+  static_assert(kNumFlagsInValueField == 3, "");
+
+  // The datatype used to encode additional bit-flags in the Hit.
+  // This is guaranteed to be an unsigned integer, but its size may change if
+  // more flags are introduced in the future and require more bits to encode.
+  using Flags = uint8_t;
+  static constexpr Flags kNoEnabledFlags = 0;
+
   // The Term Frequency of a Hit.
+  // This is guaranteed to be an unsigned integer, but its size may change if we
+  // need to expand the max term-frequency.
   using TermFrequency = uint8_t;
   using TermFrequencyArray = std::array<Hit::TermFrequency, kTotalNumSections>;
   // Max TermFrequency is 255.
@@ -131,40 +167,67 @@
   static constexpr TermFrequency kDefaultTermFrequency = 1;
   static constexpr TermFrequency kNoTermFrequency = 0;
 
-  explicit Hit(Value value = kInvalidValue,
-               TermFrequency term_frequency = kDefaultTermFrequency)
-      : value_(value), term_frequency_(term_frequency) {}
-  Hit(SectionId section_id, DocumentId document_id,
-      TermFrequency term_frequency, bool is_in_prefix_section = false,
-      bool is_prefix_hit = false);
+  explicit Hit(Value value)
+      : Hit(value, kNoEnabledFlags, kDefaultTermFrequency) {}
+  explicit Hit(Value value, Flags flags, TermFrequency term_frequency);
+  explicit Hit(SectionId section_id, DocumentId document_id,
+               TermFrequency term_frequency, bool is_in_prefix_section,
+               bool is_prefix_hit);
 
   bool is_valid() const { return value() != kInvalidValue; }
-  Value value() const { return value_; }
+
+  Value value() const {
+    Value value;
+    memcpy(&value, value_.data(), sizeof(value));
+    return value;
+  }
+
   DocumentId document_id() const;
   SectionId section_id() const;
-  // Whether or not the hit contains a valid term frequency.
-  bool has_term_frequency() const;
-  TermFrequency term_frequency() const { return term_frequency_; }
   bool is_prefix_hit() const;
   bool is_in_prefix_section() const;
+  // Whether or not the hit has any flags set to true.
+  bool has_flags() const;
+
+  Flags flags() const { return flags_; }
+  // Whether or not the hit contains a valid term frequency.
+  bool has_term_frequency() const;
+
+  TermFrequency term_frequency() const { return term_frequency_; }
+
+  // Returns true if the flags values across the Hit's value_, term_frequency_
+  // and flags_ fields are consistent.
+  bool CheckFlagsAreConsistent() const;
 
   // Creates a new hit based on old_hit but with new_document_id set.
   static Hit TranslateHit(Hit old_hit, DocumentId new_document_id);
 
-  bool operator<(const Hit& h2) const { return value() < h2.value(); }
-  bool operator==(const Hit& h2) const { return value() == h2.value(); }
+  bool operator<(const Hit& h2) const {
+    if (value() != h2.value()) {
+      return value() < h2.value();
+    }
+    return flags() < h2.flags();
+  }
+  bool operator==(const Hit& h2) const {
+    return value() == h2.value() && flags() == h2.flags();
+  }
 
   struct EqualsDocumentIdAndSectionId {
     bool operator()(const Hit& hit1, const Hit& hit2) const;
   };
 
  private:
-  // Value and TermFrequency must be in this order.
-  // Value bits layout: 1 unused + 22 document_id + 6 section id + 3 flags.
-  Value value_;
+  // Value, Flags and TermFrequency must be in this order.
+  // Value bits layout: 1 unused + 22 document_id + 6 section_id + 1
+  // is_prefix_hit + 1 is_in_prefix_section + 1 has_flags.
+  std::array<char, sizeof(Value)> value_;
+  // Flags bits layout: 1 reserved + 6 unused + 1 has_term_frequency.
+  // The left-most bit is reserved for chaining additional fields in case of
+  // future hit expansions.
+  Flags flags_;
   TermFrequency term_frequency_;
-} __attribute__((packed));
-static_assert(sizeof(Hit) == 5, "");
+};
+static_assert(sizeof(Hit) == 6, "");
 // TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
 static_assert(icing_is_packed_pod<Hit>::value, "go/icing-ubsan");
 

diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
index 0086d91..1233e00 100644
--- a/icing/index/hit/hit_test.cc
+++ b/icing/index/hit/hit_test.cc

@@ -14,6 +14,9 @@
 
 #include "icing/index/hit/hit.h"
 
+#include <algorithm>
+#include <vector>
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/schema/section.h"
@@ -94,77 +97,127 @@
 }
 
 TEST(HitTest, HasTermFrequencyFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h1.has_term_frequency(), IsFalse());
   EXPECT_THAT(h1.term_frequency(), Eq(Hit::kDefaultTermFrequency));
 
-  Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency);
+  Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h2.has_term_frequency(), IsTrue());
   EXPECT_THAT(h2.term_frequency(), Eq(kSomeTermFrequency));
 }
 
 TEST(HitTest, IsPrefixHitFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
 
   Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
-         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+         /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
 
   Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
          /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
   EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
+
+  Hit h4(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
+  EXPECT_THAT(h4.is_prefix_hit(), IsFalse());
 }
 
 TEST(HitTest, IsInPrefixSectionFlag) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
 
   Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
-         /*is_in_prefix_section=*/false);
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
 
   Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
-         /*is_in_prefix_section=*/true);
+         /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
 }
 
+TEST(HitTest, HasFlags) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  EXPECT_THAT(h1.has_flags(), IsFalse());
+
+  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
+  EXPECT_THAT(h2.has_flags(), IsFalse());
+
+  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
+         /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
+  EXPECT_THAT(h3.has_flags(), IsFalse());
+
+  Hit h4(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  EXPECT_THAT(h4.has_flags(), IsTrue());
+
+  Hit h5(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_prefix_hit=*/true, /*is_in_prefix_section=*/true);
+  EXPECT_THAT(h5.has_flags(), IsTrue());
+
+  Hit h6(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_prefix_hit=*/false, /*is_in_prefix_section=*/true);
+  EXPECT_THAT(h6.has_flags(), IsTrue());
+
+  Hit h7(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_prefix_hit=*/true, /*is_in_prefix_section=*/false);
+  EXPECT_THAT(h7.has_flags(), IsTrue());
+}
+
 TEST(HitTest, Accessors) {
-  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+  Hit h1(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+         /*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
   EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
   EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
+  EXPECT_THAT(h1.term_frequency(), Eq(kSomeTermFrequency));
+  EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
+  EXPECT_THAT(h1.is_prefix_hit(), IsTrue());
 }
 
 TEST(HitTest, Valid) {
-  Hit def;
+  Hit def(Hit::kInvalidValue);
   EXPECT_THAT(def.is_valid(), IsFalse());
-
-  Hit explicit_invalid(Hit::kInvalidValue);
+  Hit explicit_invalid(Hit::kInvalidValue, Hit::kNoEnabledFlags,
+                       Hit::kDefaultTermFrequency);
   EXPECT_THAT(explicit_invalid.is_valid(), IsFalse());
 
   static constexpr Hit::Value kSomeValue = 65372;
-  Hit explicit_valid(kSomeValue);
+  Hit explicit_valid(kSomeValue, Hit::kNoEnabledFlags,
+                     Hit::kDefaultTermFrequency);
   EXPECT_THAT(explicit_valid.is_valid(), IsTrue());
 
-  Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId,
-                              kSomeTermFrequency);
+  Hit maximum_document_id_hit(
+      kSomeSectionid, kMaxDocumentId, kSomeTermFrequency,
+      /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
 
-  Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId,
-                             kSomeTermFrequency);
+  Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeTermFrequency,
+                             /*is_in_prefix_section=*/false,
+                             /*is_prefix_hit=*/false);
   EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
 
-  Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency);
+  Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency,
+                              /*is_in_prefix_section=*/false,
+                              /*is_prefix_hit=*/false);
   EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
 
-  Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency);
+  Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency,
+                             /*is_in_prefix_section=*/false,
+                             /*is_prefix_hit=*/false);
   EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
 
-  // We use Hit with value Hit::kMaxDocumentIdSortValue for std::lower_bound in
-  // the lite index. Verify that the value of the smallest valid Hit (which
+  // We use Hit with value Hit::kMaxDocumentIdSortValue for std::lower_bound
+  // in the lite index. Verify that the value of the smallest valid Hit (which
   // contains kMinSectionId, kMaxDocumentId and 3 flags = false) is >=
   // Hit::kMaxDocumentIdSortValue.
-  Hit smallest_hit(kMinSectionId, kMaxDocumentId, Hit::kDefaultTermFrequency);
+  Hit smallest_hit(kMinSectionId, kMaxDocumentId, Hit::kDefaultTermFrequency,
+                   /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ASSERT_THAT(smallest_hit.is_valid(), IsTrue());
   ASSERT_THAT(smallest_hit.has_term_frequency(), IsFalse());
   ASSERT_THAT(smallest_hit.is_prefix_hit(), IsFalse());
@@ -173,39 +226,78 @@
 }
 
 TEST(HitTest, Comparison) {
-  Hit hit(1, 243, Hit::kDefaultTermFrequency);
+  Hit hit(/*section_id=*/1, /*document_id=*/243, Hit::kDefaultTermFrequency,
+          /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // DocumentIds are sorted in ascending order. So a hit with a lower
   // document_id should be considered greater than one with a higher
   // document_id.
-  Hit higher_document_id_hit(1, 2409, Hit::kDefaultTermFrequency);
-  Hit higher_section_id_hit(15, 243, Hit::kDefaultTermFrequency);
+  Hit higher_document_id_hit(
+      /*section_id=*/1, /*document_id=*/2409, Hit::kDefaultTermFrequency,
+      /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit higher_section_id_hit(/*section_id=*/15, /*document_id=*/243,
+                            Hit::kDefaultTermFrequency,
+                            /*is_in_prefix_section=*/false,
+                            /*is_prefix_hit=*/false);
   // Whether or not a term frequency was set is considered, but the term
   // frequency itself is not.
-  Hit term_frequency_hit(1, 243, 12);
-  Hit prefix_hit(1, 243, Hit::kDefaultTermFrequency,
+  Hit term_frequency_hit(/*section_id=*/1, 243, /*term_frequency=*/12,
+                         /*is_in_prefix_section=*/false,
+                         /*is_prefix_hit=*/false);
+  Hit prefix_hit(/*section_id=*/1, 243, Hit::kDefaultTermFrequency,
                  /*is_in_prefix_section=*/false,
                  /*is_prefix_hit=*/true);
-  Hit hit_in_prefix_section(1, 243, Hit::kDefaultTermFrequency,
+  Hit hit_in_prefix_section(/*section_id=*/1, 243, Hit::kDefaultTermFrequency,
                             /*is_in_prefix_section=*/true,
                             /*is_prefix_hit=*/false);
+  Hit hit_with_all_flags_enabled(/*section_id=*/1, 243, 56,
+                                 /*is_in_prefix_section=*/true,
+                                 /*is_prefix_hit=*/true);
 
   std::vector<Hit> hits{hit,
                         higher_document_id_hit,
                         higher_section_id_hit,
                         term_frequency_hit,
                         prefix_hit,
-                        hit_in_prefix_section};
+                        hit_in_prefix_section,
+                        hit_with_all_flags_enabled};
   std::sort(hits.begin(), hits.end());
-  EXPECT_THAT(
-      hits, ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
-                        prefix_hit, term_frequency_hit, higher_section_id_hit));
+  EXPECT_THAT(hits,
+              ElementsAre(higher_document_id_hit, hit, term_frequency_hit,
+                          hit_in_prefix_section, prefix_hit,
+                          hit_with_all_flags_enabled, higher_section_id_hit));
 
-  Hit higher_term_frequency_hit(1, 243, 108);
+  Hit higher_term_frequency_hit(/*section_id=*/1, 243, /*term_frequency=*/108,
+                                /*is_in_prefix_section=*/false,
+                                /*is_prefix_hit=*/false);
   // The term frequency value is not considered when comparing hits.
   EXPECT_THAT(term_frequency_hit, Not(Lt(higher_term_frequency_hit)));
   EXPECT_THAT(higher_term_frequency_hit, Not(Lt(term_frequency_hit)));
 }
 
+TEST(HitTest, CheckFlagsAreConsistent) {
+  Hit::Value value_without_flags = 1 << 30;
+  Hit::Value value_with_flags = value_without_flags + 1;
+  Hit::Flags flags_with_term_freq = 1;
+
+  Hit consistent_hit_no_flags(value_without_flags, Hit::kNoEnabledFlags,
+                              Hit::kDefaultTermFrequency);
+  Hit consistent_hit_with_term_frequency(value_with_flags, flags_with_term_freq,
+                                         kSomeTermFrequency);
+  EXPECT_THAT(consistent_hit_no_flags.CheckFlagsAreConsistent(), IsTrue());
+  EXPECT_THAT(consistent_hit_with_term_frequency.CheckFlagsAreConsistent(),
+              IsTrue());
+
+  Hit inconsistent_hit_1(value_with_flags, Hit::kNoEnabledFlags,
+                         Hit::kDefaultTermFrequency);
+  Hit inconsistent_hit_2(value_with_flags, Hit::kNoEnabledFlags,
+                         kSomeTermFrequency);
+  Hit inconsistent_hit_3(value_with_flags, flags_with_term_freq,
+                         Hit::kDefaultTermFrequency);
+  EXPECT_THAT(inconsistent_hit_1.CheckFlagsAreConsistent(), IsFalse());
+  EXPECT_THAT(inconsistent_hit_2.CheckFlagsAreConsistent(), IsFalse());
+  EXPECT_THAT(inconsistent_hit_3.CheckFlagsAreConsistent(), IsFalse());
+}
+
 }  // namespace
 
 }  // namespace lib

diff --git a/icing/index/index.cc b/icing/index/index.cc
index 31dcc7e..f917aa6 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc

@@ -65,10 +65,9 @@
         "Requested hit buffer size %d is too large.",
         options.index_merge_size));
   }
-  return LiteIndex::Options(options.base_dir + "/idx/lite.",
-                            options.index_merge_size,
-                            options.lite_index_sort_at_indexing,
-                            options.lite_index_sort_size);
+  return LiteIndex::Options(
+      options.base_dir + "/idx/lite.", options.index_merge_size,
+      options.lite_index_sort_at_indexing, options.lite_index_sort_size);
 }
 
 std::string MakeMainIndexFilepath(const std::string& base_dir) {
@@ -345,7 +344,8 @@
 libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() {
   for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) {
     Hit hit(section_id_, document_id_, /*term_frequency=*/itr->second,
-            term_match_type_ == TermMatchType::PREFIX);
+            /*is_in_prefix_section=*/term_match_type_ == TermMatchType::PREFIX,
+            /*is_prefix_hit=*/false);
     ICING_ASSIGN_OR_RETURN(
         uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE));
     ICING_RETURN_IF_ERROR(lite_index_->AddHit(term_id, hit));

diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index ab550e1..50b65ad 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc

@@ -2736,7 +2736,8 @@
   EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
   EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
   EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
-  // There should be 1 block for the header and 1 block for two posting lists.
+  // There should be 1 block for the header and 1 block for three posting lists
+  // ("fo", "foo", "foul").
   EXPECT_THAT(storage_info.num_blocks(), Eq(2));
   EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
 }

diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 35dc0b9..682b752 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc

@@ -113,12 +113,12 @@
     const DocumentStore* document_store, const SchemaStore* schema_store,
     const SearchSpecProto& search_spec, int64_t current_time_ms) {
   std::unordered_map<std::string, std::set<std::string>> type_property_filters;
-  // TODO(b/294274922): Add support for polymorphism in type property filters.
-  for (const TypePropertyMask& type_property_mask :
-       search_spec.type_property_filters()) {
-    type_property_filters[type_property_mask.schema_type()] =
-        std::set<std::string>(type_property_mask.paths().begin(),
-                              type_property_mask.paths().end());
+  for (const SchemaStore::ExpandedTypePropertyMask& type_property_mask :
+       schema_store->ExpandTypePropertyMasks(
+           search_spec.type_property_filters())) {
+    type_property_filters[type_property_mask.schema_type] =
+        std::set<std::string>(type_property_mask.paths.begin(),
+                              type_property_mask.paths.end());
   }
   auto data = std::make_unique<SectionRestrictData>(
       document_store, schema_store, current_time_ms, type_property_filters);

diff --git a/icing/index/lite/lite-index-header.h b/icing/index/lite/lite-index-header.h
index 6208cb6..741d173 100644
--- a/icing/index/lite/lite-index-header.h
+++ b/icing/index/lite/lite-index-header.h

@@ -53,7 +53,7 @@
 class LiteIndex_HeaderImpl : public LiteIndex_Header {
  public:
   struct HeaderData {
-    static const uint32_t kMagic = 0x01c61418;
+    static const uint32_t kMagic = 0xC2EAD682;
 
     uint32_t lite_index_crc;
     uint32_t magic;

diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 4c5cb37..3aed7e4 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc

@@ -74,7 +74,7 @@
 }  // namespace
 
 const TermIdHitPair::Value TermIdHitPair::kInvalidValue =
-    TermIdHitPair(0, Hit()).value();
+    TermIdHitPair(0, Hit(Hit::kInvalidValue)).value();
 
 libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create(
     const LiteIndex::Options& options, const IcingFilesystem* filesystem) {
@@ -517,7 +517,8 @@
 
   // Do binary search over the sorted section and repeat the above steps.
   TermIdHitPair target_term_id_hit_pair(
-      term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
+      term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kNoEnabledFlags,
+                   Hit::kDefaultTermFrequency));
   for (const TermIdHitPair* ptr = std::lower_bound(
            array, array + header_->searchable_end(), target_term_id_hit_pair);
        ptr < array + header_->searchable_end(); ++ptr) {

diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
index 7ff006e..f8ea94a 100644
--- a/icing/index/lite/lite-index_test.cc
+++ b/icing/index/lite/lite-index_test.cc

@@ -27,7 +27,6 @@
 #include "icing/index/hit/hit.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
-#include "icing/index/lite/lite-index-header.h"
 #include "icing/index/lite/term-id-hit-pair.h"
 #include "icing/index/term-id-codec.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
@@ -73,15 +72,25 @@
 
 constexpr NamespaceId kNamespace0 = 0;
 
+TEST_F(LiteIndexTest, TermIdHitPairInvalidValue) {
+  TermIdHitPair invalidTermHitPair(TermIdHitPair::kInvalidValue);
+
+  EXPECT_THAT(invalidTermHitPair.term_id(), Eq(0));
+  EXPECT_THAT(invalidTermHitPair.hit().value(), Eq(Hit::kInvalidValue));
+  EXPECT_THAT(invalidTermHitPair.hit().term_frequency(),
+              Eq(Hit::kDefaultTermFrequency));
+}
+
 TEST_F(LiteIndexTest,
        LiteIndexFetchHits_sortAtQuerying_unsortedHitsBelowSortThreshold) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/false,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  // Unsorted tail can contain a max of 8 TermIdHitPairs.
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/false,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -97,9 +106,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
   Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
 
@@ -109,9 +118,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
                              term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
   Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
   ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
 
@@ -154,15 +163,16 @@
        LiteIndexFetchHits_sortAtIndexing_unsortedHitsBelowSortThreshold) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
   // However note that in these tests we're unable to sort hits after
   // indexing, as sorting performed by the string-section-indexing-handler
   // after indexing all hits in an entire document, rather than after each
   // AddHits() operation.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/true,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/true,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -178,9 +188,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
   Hit foo_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit foo_hit1(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit0));
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, foo_hit1));
 
@@ -190,9 +200,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t bar_term_id,
                              term_id_codec_->EncodeTvi(bar_tvi, TviType::LITE));
   Hit bar_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit bar_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit0));
   ICING_ASSERT_OK(lite_index->AddHit(bar_term_id, bar_hit1));
 
@@ -237,15 +247,16 @@
     LiteIndexFetchHits_sortAtQuerying_unsortedHitsExceedingSortAtIndexThreshold) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
   // However note that in these tests we're unable to sort hits after
   // indexing, as sorting performed by the string-section-indexing-handler
   // after indexing all hits in an entire document, rather than after each
   // AddHits() operation.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/false,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/false,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -257,36 +268,36 @@
   // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
   // Doc 0
   Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 1
   Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 2
   Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 3
   Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
 
   // Create terms
   // Foo
@@ -398,11 +409,12 @@
     LiteIndexFetchHits_sortAtIndexing_unsortedHitsExceedingSortAtIndexThreshold) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/true,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/true,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -414,49 +426,49 @@
   // Create 4 hits for docs 0-2, and 2 hits for doc 3 -- 14 in total
   // Doc 0
   Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit1(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit2(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 1
   Hit doc1_hit0(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit2(/*section_id=*/1, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit3(/*section_id=*/2, /*document_id=*/1, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 2
   Hit doc2_hit0(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit1(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit2(/*section_id=*/1, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc2_hit3(/*section_id=*/2, /*document_id=*/2, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 3
   Hit doc3_hit0(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc3_hit1(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc3_hit2(/*section_id=*/1, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc3_hit3(/*section_id=*/2, /*document_id=*/3, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   // Doc 4
   Hit doc4_hit0(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc4_hit1(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc4_hit2(/*section_id=*/1, /*document_id=*/4, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc4_hit3(/*section_id=*/2, /*document_id=*/4, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
 
   // Create terms
   // Foo
@@ -590,11 +602,12 @@
 TEST_F(LiteIndexTest, LiteIndexIterator) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/true,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/true,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -610,9 +623,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   SectionIdMask doc0_section_id_mask = 0b11;
   std::unordered_map<SectionId, Hit::TermFrequency>
       expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
@@ -620,9 +633,9 @@
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
 
   Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   SectionIdMask doc1_section_id_mask = 0b110;
   std::unordered_map<SectionId, Hit::TermFrequency>
       expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
@@ -658,11 +671,12 @@
 TEST_F(LiteIndexTest, LiteIndexIterator_sortAtIndexingDisabled) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/false,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/false,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -678,9 +692,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   Hit doc0_hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc0_hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   SectionIdMask doc0_section_id_mask = 0b11;
   std::unordered_map<SectionId, Hit::TermFrequency>
       expected_section_ids_tf_map0 = {{0, 3}, {1, 5}};
@@ -688,9 +702,9 @@
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, doc0_hit1));
 
   Hit doc1_hit1(/*section_id=*/1, /*document_id=*/1, /*term_frequency=*/7,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit doc1_hit2(/*section_id=*/2, /*document_id=*/1, /*term_frequency=*/11,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   SectionIdMask doc1_section_id_mask = 0b110;
   std::unordered_map<SectionId, Hit::TermFrequency>
       expected_section_ids_tf_map1 = {{1, 7}, {2, 11}};
@@ -726,11 +740,12 @@
 TEST_F(LiteIndexTest, LiteIndexHitBufferSize) {
   // Set up LiteIndex and TermIdCodec
   std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
-  // At 64 bytes the unsorted tail can contain a max of 8 TermHitPairs.
-  LiteIndex::Options options(lite_index_file_name,
-                             /*hit_buffer_want_merge_bytes=*/1024 * 1024,
-                             /*hit_buffer_sort_at_indexing=*/true,
-                             /*hit_buffer_sort_threshold_bytes=*/64);
+  // The unsorted tail can contain a max of 8 TermIdHitPairs.
+  LiteIndex::Options options(
+      lite_index_file_name,
+      /*hit_buffer_want_merge_bytes=*/1024 * 1024,
+      /*hit_buffer_sort_at_indexing=*/true,
+      /*hit_buffer_sort_threshold_bytes=*/sizeof(TermIdHitPair) * 8);
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LiteIndex> lite_index,
                              LiteIndex::Create(options, &icing_filesystem_));
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -746,9 +761,9 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/3,
-           /*is_in_prefix_section=*/false);
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit hit1(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
-           /*is_in_prefix_section=*/false);
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit0));
   ICING_ASSERT_OK(lite_index->AddHit(foo_term_id, hit1));
 

diff --git a/icing/index/lite/lite-index_thread-safety_test.cc b/icing/index/lite/lite-index_thread-safety_test.cc
index 53aa6cd..a73ca28 100644
--- a/icing/index/lite/lite-index_thread-safety_test.cc
+++ b/icing/index/lite/lite-index_thread-safety_test.cc

@@ -13,15 +13,25 @@
 // limitations under the License.
 
 #include <array>
+#include <cstdint>
+#include <memory>
 #include <string>
+#include <string_view>
 #include <thread>
 #include <vector>
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
 #include "icing/index/lite/lite-index.h"
 #include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
 #include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -109,9 +119,11 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
   Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
-               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+               /*is_prefix_hit=*/false);
   Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
-               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+               /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
 
@@ -155,9 +167,11 @@
     ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
                                term_id_codec_->EncodeTvi(tvi, TviType::LITE));
     Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
-                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                 /*is_prefix_hit=*/false);
     Hit doc_hit1(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId1,
-                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                 /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
     ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
   }
@@ -208,7 +222,8 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
   Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
-               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+               /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
 
   // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
@@ -228,7 +243,8 @@
     } else {
       // Odd-numbered thread calls AddHit.
       Hit doc_hit(/*section_id=*/thread_id / 2, /*document_id=*/kDocumentId0,
-                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                  /*is_prefix_hit=*/false);
       ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
     }
   };
@@ -273,7 +289,8 @@
   ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
   Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
-               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+               Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+               /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
 
   // Create kNumThreads threads. Every even-numbered thread calls FetchHits and
@@ -302,7 +319,8 @@
       // Odd-numbered thread calls AddHit.
       // AddHit to section 0 of a new doc.
       Hit doc_hit(/*section_id=*/kSectionId0, /*document_id=*/thread_id / 2,
-                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                  Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                  /*is_prefix_hit=*/false);
       ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
     }
   };
@@ -335,9 +353,11 @@
     ICING_ASSERT_OK_AND_ASSIGN(uint32_t term_id,
                                term_id_codec_->EncodeTvi(tvi, TviType::LITE));
     Hit doc_hit0(/*section_id=*/kSectionId0, /*document_id=*/kDocumentId0,
-                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                 /*is_prefix_hit=*/false);
     Hit doc_hit1(/*section_id=*/kSectionId1, /*document_id=*/kDocumentId0,
-                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false);
+                 Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                 /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit0));
     ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit1));
   }
@@ -370,7 +390,7 @@
       // AddHit to section (thread_id % 5 + 1) of doc 0.
       Hit doc_hit(/*section_id=*/thread_id % 5 + 1,
                   /*document_id=*/kDocumentId0, Hit::kDefaultTermFrequency,
-                  /*is_in_prefix_section=*/false);
+                  /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
       ICING_ASSERT_OK(lite_index_->AddHit(term_id, doc_hit));
     }
   };

diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 82bd010..760aa76 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h

@@ -15,25 +15,22 @@
 #ifndef ICING_INDEX_TERM_ID_HIT_PAIR_H_
 #define ICING_INDEX_TERM_ID_HIT_PAIR_H_
 
+#include <array>
 #include <cstdint>
-#include <limits>
-#include <memory>
-#include <string>
-#include <vector>
 
 #include "icing/index/hit/hit.h"
-#include "icing/util/bit-util.h"
 
 namespace icing {
 namespace lib {
 
 class TermIdHitPair {
  public:
-  // Layout bits: 24 termid + 32 hit value + 8 hit term frequency.
-  using Value = uint64_t;
+  // Layout bits: 24 termid + 32 hit value + 8 hit flags + 8 hit term frequency.
+  using Value = std::array<uint8_t, 9>;
 
   static constexpr int kTermIdBits = 24;
   static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
+  static constexpr int kHitFlagsBits = sizeof(Hit::Flags) * 8;
   static constexpr int kHitTermFrequencyBits = sizeof(Hit::TermFrequency) * 8;
 
   static const Value kInvalidValue;
@@ -41,33 +38,48 @@
   explicit TermIdHitPair(Value v = kInvalidValue) : value_(v) {}
 
   TermIdHitPair(uint32_t term_id, const Hit& hit) {
-    static_assert(kTermIdBits + kHitValueBits + kHitTermFrequencyBits <=
-                      sizeof(Value) * 8,
-                  "TermIdHitPairTooBig");
+    static_assert(
+        kTermIdBits + kHitValueBits + kHitFlagsBits + kHitTermFrequencyBits <=
+            sizeof(Value) * 8,
+        "TermIdHitPairTooBig");
 
-    value_ = 0;
-    // Term id goes into the most significant bits because it takes
-    // precedent in sorts.
-    bit_util::BitfieldSet(term_id, kHitValueBits + kHitTermFrequencyBits,
-                          kTermIdBits, &value_);
-    bit_util::BitfieldSet(hit.value(), kHitTermFrequencyBits, kHitValueBits,
-                          &value_);
-    bit_util::BitfieldSet(hit.term_frequency(), 0, kHitTermFrequencyBits,
-                          &value_);
+    // Set termId. Term id takes 3 bytes and goes into value_[0:2] (most
+    // significant bits) because it takes precedent in sorts.
+    value_[0] = static_cast<uint8_t>((term_id >> 16) & 0xff);
+    value_[1] = static_cast<uint8_t>((term_id >> 8) & 0xff);
+    value_[2] = static_cast<uint8_t>((term_id >> 0) & 0xff);
+
+    // Set hit value. Hit value takes 4 bytes and goes into value_[3:6]
+    value_[3] = static_cast<uint8_t>((hit.value() >> 24) & 0xff);
+    value_[4] = static_cast<uint8_t>((hit.value() >> 16) & 0xff);
+    value_[5] = static_cast<uint8_t>((hit.value() >> 8) & 0xff);
+    value_[6] = static_cast<uint8_t>((hit.value() >> 0) & 0xff);
+
+    // Set flags in value_[7].
+    value_[7] = hit.flags();
+
+    // Set term-frequency in value_[8]
+    value_[8] = hit.term_frequency();
   }
 
   uint32_t term_id() const {
-    return bit_util::BitfieldGet(value_, kHitValueBits + kHitTermFrequencyBits,
-                                 kTermIdBits);
+    return (static_cast<uint32_t>(value_[0]) << 16) |
+           (static_cast<uint32_t>(value_[1]) << 8) |
+           (static_cast<uint32_t>(value_[2]) << 0);
   }
 
   Hit hit() const {
-    return Hit(
-        bit_util::BitfieldGet(value_, kHitTermFrequencyBits, kHitValueBits),
-        bit_util::BitfieldGet(value_, 0, kHitTermFrequencyBits));
+    Hit::Value hit_value = (static_cast<uint32_t>(value_[3]) << 24) |
+                           (static_cast<uint32_t>(value_[4]) << 16) |
+                           (static_cast<uint32_t>(value_[5]) << 8) |
+                           (static_cast<uint32_t>(value_[6]) << 0);
+    Hit::Flags hit_flags = value_[7];
+    Hit::TermFrequency term_frequency = value_[8];
+
+    return Hit(hit_value, hit_flags, term_frequency);
   }
 
-  Value value() const { return value_; }
+  const Value& value() const { return value_; }
 
   bool operator==(const TermIdHitPair& rhs) const {
     return value_ == rhs.value_;

diff --git a/icing/index/lite/term-id-hit-pair_test.cc b/icing/index/lite/term-id-hit-pair_test.cc
new file mode 100644
index 0000000..28855b4
--- /dev/null
+++ b/icing/index/lite/term-id-hit-pair_test.cc

@@ -0,0 +1,95 @@
+// Copyright (C) 2024 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/term-id-hit-pair.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/hit/hit.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+
+static constexpr DocumentId kSomeDocumentId = 24;
+static constexpr SectionId kSomeSectionid = 5;
+static constexpr Hit::TermFrequency kSomeTermFrequency = 57;
+static constexpr uint32_t kSomeTermId = 129;
+static constexpr uint32_t kSomeSmallerTermId = 1;
+static constexpr uint32_t kSomeLargerTermId = 0b101010101111111100000001;
+
+TEST(TermIdHitPairTest, Accessors) {
+  Hit hit1(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit hit2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+           /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+  Hit hit3(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit invalid_hit(Hit::kInvalidValue);
+
+  TermIdHitPair term_id_hit_pair_1(kSomeTermId, hit1);
+  EXPECT_THAT(term_id_hit_pair_1.term_id(), Eq(kSomeTermId));
+  EXPECT_THAT(term_id_hit_pair_1.hit(), Eq(hit1));
+
+  TermIdHitPair term_id_hit_pair_2(kSomeLargerTermId, hit2);
+  EXPECT_THAT(term_id_hit_pair_2.term_id(), Eq(kSomeLargerTermId));
+  EXPECT_THAT(term_id_hit_pair_2.hit(), Eq(hit2));
+
+  TermIdHitPair term_id_hit_pair_3(kSomeTermId, invalid_hit);
+  EXPECT_THAT(term_id_hit_pair_3.term_id(), Eq(kSomeTermId));
+  EXPECT_THAT(term_id_hit_pair_3.hit(), Eq(invalid_hit));
+}
+
+TEST(TermIdHitPairTest, Comparison) {
+  Hit hit(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency,
+          /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit smaller_hit(/*section_id=*/1, /*document_id=*/100, /*term_frequency=*/1,
+                  /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+
+  TermIdHitPair term_id_hit_pair(kSomeTermId, hit);
+  TermIdHitPair term_id_hit_pair_equal(kSomeTermId, hit);
+  TermIdHitPair term_id_hit_pair_smaller_hit(kSomeTermId, smaller_hit);
+  TermIdHitPair term_id_hit_pair_smaller_term_id(kSomeSmallerTermId, hit);
+  TermIdHitPair term_id_hit_pair_larger_term_id(kSomeLargerTermId, hit);
+  TermIdHitPair term_id_hit_pair_smaller_term_id_and_hit(kSomeSmallerTermId,
+                                                         smaller_hit);
+
+  std::vector<TermIdHitPair> term_id_hit_pairs{
+      term_id_hit_pair,
+      term_id_hit_pair_equal,
+      term_id_hit_pair_smaller_hit,
+      term_id_hit_pair_smaller_term_id,
+      term_id_hit_pair_larger_term_id,
+      term_id_hit_pair_smaller_term_id_and_hit};
+  std::sort(term_id_hit_pairs.begin(), term_id_hit_pairs.end());
+  EXPECT_THAT(term_id_hit_pairs,
+              ElementsAre(term_id_hit_pair_smaller_term_id_and_hit,
+                          term_id_hit_pair_smaller_term_id,
+                          term_id_hit_pair_smaller_hit, term_id_hit_pair_equal,
+                          term_id_hit_pair, term_id_hit_pair_larger_term_id));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
index c26a6d7..cc130c2 100644
--- a/icing/index/main/main-index-merger.cc
+++ b/icing/index/main/main-index-merger.cc

@@ -14,14 +14,20 @@
 
 #include "icing/index/main/main-index-merger.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <cstring>
-#include <memory>
 #include <unordered_map>
+#include <utility>
+#include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
-#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-common.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/lite/lite-index.h"
 #include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/main/main-index.h"
 #include "icing/index/term-id-codec.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/util/logging.h"

diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 37e14fc..333e338 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc

@@ -13,19 +13,23 @@
 // limitations under the License.
 #include "icing/index/main/main-index-merger.h"
 
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/absl_ports/canonical_errors.h"
 #include "icing/file/filesystem.h"
-#include "icing/index/iterator/doc-hit-info-iterator.h"
-#include "icing/index/main/doc-hit-info-iterator-term-main.h"
-#include "icing/index/main/main-index-merger.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/lite/lite-index.h"
+#include "icing/index/lite/term-id-hit-pair.h"
 #include "icing/index/main/main-index.h"
 #include "icing/index/term-id-codec.h"
-#include "icing/index/term-property-id.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
-#include "icing/schema/section.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
@@ -89,10 +93,10 @@
       term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
 
   // 2. Build up a fake LexiconMergeOutputs
@@ -128,10 +132,10 @@
       term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
 
   // 2. Build up a fake LexiconMergeOutputs
@@ -191,11 +195,11 @@
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
 
   Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
-                    /*is_in_prefix_section=*/true);
+                    /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
   Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
-                   Hit::kDefaultTermFrequency,
-                   /*is_in_prefix_section=*/true);
+                   Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/true,
+                   /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
 
   // 2. Build up a fake LexiconMergeOutputs
@@ -255,10 +259,10 @@
                              term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
 
   Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
-                    /*is_in_prefix_section=*/true);
+                    /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
   Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
-                   /*is_in_prefix_section=*/true);
+                   /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
   // The prefix hit should take the sum as term_frequency - 114.
   Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
@@ -320,11 +324,11 @@
 
   Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0,
                     /*term_frequency=*/Hit::kMaxTermFrequency,
-                    /*is_in_prefix_section=*/true);
+                    /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
   Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0,
                     Hit::kDefaultTermFrequency,
-                    /*is_in_prefix_section=*/true);
+                    /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit));
 
   // 2. Build up a fake LexiconMergeOutputs

diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index aae60c6..85ee4dc 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc

@@ -651,7 +651,7 @@
     ICING_ASSIGN_OR_RETURN(tmp, backfill_accessor->GetNextHitsBatch());
   }
 
-  Hit last_added_hit;
+  Hit last_added_hit(Hit::kInvalidValue);
   // The hits in backfill_hits are in the reverse order of how they were added.
   // Iterate in reverse to add them to this new posting list in the correct
   // order.

diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index fa96e6c..db9dbe2 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc

@@ -14,23 +14,33 @@
 
 #include "icing/index/main/main-index.h"
 
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/absl_ports/canonical_errors.h"
 #include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite/lite-index.h"
 #include "icing/index/lite/term-id-hit-pair.h"
 #include "icing/index/main/doc-hit-info-iterator-term-main.h"
 #include "icing/index/main/main-index-merger.h"
 #include "icing/index/term-id-codec.h"
-#include "icing/index/term-property-id.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
 #include "icing/schema/section.h"
+#include "icing/store/document-id.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/tmp-directory.h"
+#include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
@@ -152,7 +162,7 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -178,7 +188,7 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -217,7 +227,7 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -254,18 +264,18 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
 
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
 
   Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
 
@@ -332,18 +342,18 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
 
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
 
   Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
 
@@ -387,14 +397,14 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit));
 
   Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit));
   ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit));
 
@@ -449,15 +459,15 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
 
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
 
   Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -500,15 +510,15 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
 
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
 
   Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -554,17 +564,17 @@
         document_id % Hit::kMaxTermFrequency + 1);
     Hit doc_hit0(
         /*section_id=*/0, /*document_id=*/document_id, term_frequency,
-        /*is_in_prefix_section=*/false);
+        /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
 
     Hit doc_hit1(
         /*section_id=*/1, /*document_id=*/document_id, term_frequency,
-        /*is_in_prefix_section=*/false);
+        /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
 
     Hit doc_hit2(
         /*section_id=*/2, /*document_id=*/document_id, term_frequency,
-        /*is_in_prefix_section=*/false);
+        /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
   }
 
@@ -619,7 +629,7 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/true);
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
 
   // 2. Create the main index. It should have no entries in its lexicon.
@@ -648,7 +658,7 @@
                              term_id_codec_->EncodeTvi(tvi, TviType::LITE));
 
   Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
-               /*is_in_prefix_section=*/false);
+               /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
 
   // 5. Merge the index. The main index should now contain "fool", "foot"
@@ -682,7 +692,7 @@
   uint32_t num_docs = 2038;
   for (DocumentId document_id = 0; document_id < num_docs; ++document_id) {
     Hit doc_hit(section_id, document_id, Hit::kDefaultTermFrequency,
-                /*is_in_prefix_section=*/false);
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
     ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit));
   }
 

diff --git a/icing/index/main/posting-list-hit-accessor_test.cc b/icing/index/main/posting-list-hit-accessor_test.cc
index 1127814..c2460ff 100644
--- a/icing/index/main/posting-list-hit-accessor_test.cc
+++ b/icing/index/main/posting-list-hit-accessor_test.cc

@@ -15,14 +15,19 @@
 #include "icing/index/main/posting-list-hit-accessor.h"
 
 #include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/posting_list/flash-index-storage.h"
-#include "icing/file/posting_list/index-block.h"
+#include "icing/file/posting_list/posting-list-accessor.h"
+#include "icing/file/posting_list/posting-list-common.h"
 #include "icing/file/posting_list/posting-list-identifier.h"
-#include "icing/file/posting_list/posting-list-used.h"
 #include "icing/index/hit/hit.h"
 #include "icing/index/main/posting-list-hit-serializer.h"
 #include "icing/testing/common-matchers.h"
@@ -102,7 +107,8 @@
       PostingListHitAccessor::Create(flash_index_storage_.get(),
                                      serializer_.get()));
   // Add a single hit. This will fit in a min-sized posting list.
-  Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency);
+  Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
   PostingListAccessor::FinalizeResult result1 =
       std::move(*pl_accessor).Finalize();
@@ -139,12 +145,12 @@
       std::unique_ptr<PostingListHitAccessor> pl_accessor,
       PostingListHitAccessor::Create(flash_index_storage_.get(),
                                      serializer_.get()));
-  // The smallest posting list size is 15 bytes. The first four hits will be
-  // compressed to one byte each and will be able to fit in the 5 byte padded
-  // region. The last hit will fit in one of the special hits. The posting list
-  // will be ALMOST_FULL and can fit at most 2 more hits.
+  // Use a small posting list of 30 bytes. The first 17 hits will be compressed
+  // to one byte each and will be able to fit in the 18 byte padded region. The
+  // last hit will fit in one of the special hits. The posting list will be
+  // ALMOST_FULL and can fit at most 2 more hits.
   std::vector<Hit> hits1 =
-      CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
+      CreateHits(/*num_hits=*/18, /*desired_byte_length=*/1);
   for (const Hit& hit : hits1) {
     ICING_ASSERT_OK(pl_accessor->PrependHit(hit));
   }
@@ -160,10 +166,9 @@
       pl_accessor,
       PostingListHitAccessor::CreateFromExisting(
           flash_index_storage_.get(), serializer_.get(), result1.id));
-  // The current posting list can fit at most 2 more hits. Adding 12 more hits
-  // should result in these hits being moved to a larger posting list.
+  // The current posting list can fit at most 2 more hits.
   std::vector<Hit> hits2 = CreateHits(
-      /*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/12,
+      /*last_hit=*/hits1.back(), /*num_hits=*/2,
       /*desired_byte_length=*/1);
 
   for (const Hit& hit : hits2) {
@@ -172,18 +177,36 @@
   PostingListAccessor::FinalizeResult result2 =
       std::move(*pl_accessor).Finalize();
   ICING_EXPECT_OK(result2.status);
+  // The 2 hits should still fit on the first block
+  EXPECT_THAT(result1.id.block_index(), Eq(1));
+  EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
+
+  // Add one more hit
+  ICING_ASSERT_OK_AND_ASSIGN(
+      pl_accessor,
+      PostingListHitAccessor::CreateFromExisting(
+          flash_index_storage_.get(), serializer_.get(), result2.id));
+  // The current posting list should be FULL. Adding more hits should result in
+  // these hits being moved to a larger posting list.
+  Hit single_hit =
+      CreateHit(/*last_hit=*/hits2.back(), /*desired_byte_length=*/1);
+  ICING_ASSERT_OK(pl_accessor->PrependHit(single_hit));
+  PostingListAccessor::FinalizeResult result3 =
+      std::move(*pl_accessor).Finalize();
+  ICING_EXPECT_OK(result3.status);
   // Should have been allocated to the second (new) block because the posting
   // list should have grown beyond the size that the first block maintains.
-  EXPECT_THAT(result2.id.block_index(), Eq(2));
-  EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
+  EXPECT_THAT(result3.id.block_index(), Eq(2));
+  EXPECT_THAT(result3.id.posting_list_index(), Eq(0));
 
-  // The posting list at result2.id should hold all of the hits that have been
+  // The posting list at result3.id should hold all of the hits that have been
   // added.
   for (const Hit& hit : hits2) {
     hits1.push_back(hit);
   }
+  hits1.push_back(single_hit);
   ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
-                             flash_index_storage_->GetPostingList(result2.id));
+                             flash_index_storage_->GetPostingList(result3.id));
   EXPECT_THAT(serializer_->GetHits(&pl_holder.posting_list),
               IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
 }
@@ -307,7 +330,7 @@
       std::unique_ptr<PostingListHitAccessor> pl_accessor,
       PostingListHitAccessor::Create(flash_index_storage_.get(),
                                      serializer_.get()));
-  Hit invalid_hit;
+  Hit invalid_hit(Hit::kInvalidValue);
   EXPECT_THAT(pl_accessor->PrependHit(invalid_hit),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
@@ -317,14 +340,17 @@
       std::unique_ptr<PostingListHitAccessor> pl_accessor,
       PostingListHitAccessor::Create(flash_index_storage_.get(),
                                      serializer_.get()));
-  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
 
-  Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(pl_accessor->PrependHit(hit2),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
-  Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency);
+  Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   EXPECT_THAT(pl_accessor->PrependHit(hit3),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
@@ -345,7 +371,8 @@
       std::unique_ptr<PostingListHitAccessor> pl_accessor,
       PostingListHitAccessor::Create(flash_index_storage_.get(),
                                      serializer_.get()));
-  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
+  Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(pl_accessor->PrependHit(hit1));
   PostingListAccessor::FinalizeResult result1 =
       std::move(*pl_accessor).Finalize();

diff --git a/icing/index/main/posting-list-hit-serializer.cc b/icing/index/main/posting-list-hit-serializer.cc
index e14a0c0..88c0754 100644
--- a/icing/index/main/posting-list-hit-serializer.cc
+++ b/icing/index/main/posting-list-hit-serializer.cc

@@ -19,8 +19,11 @@
 #include <limits>
 #include <vector>
 
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
 #include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/hit/hit.h"
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/legacy/index/icing-bit-util.h"
 #include "icing/util/logging.h"
@@ -35,6 +38,10 @@
   return hit.has_term_frequency() ? sizeof(Hit::TermFrequency) : 0;
 }
 
+uint32_t GetFlagsByteSize(const Hit& hit) {
+  return hit.has_flags() ? sizeof(Hit::Flags) : 0;
+}
+
 }  // namespace
 
 uint32_t PostingListHitSerializer::GetBytesUsed(
@@ -55,14 +62,45 @@
     return posting_list_used->size_in_bytes();
   }
 
-  // In NOT_FULL status BytesUsed contains no special hits. The minimum sized
-  // posting list that would be guaranteed to fit these hits would be
-  // ALMOST_FULL, with kInvalidHit in special_hit(0), the uncompressed Hit in
-  // special_hit(1) and the n compressed hits in the compressed region.
-  // BytesUsed contains one uncompressed Hit and n compressed hits. Therefore,
-  // fitting these hits into a posting list would require BytesUsed plus one
-  // extra hit.
-  return GetBytesUsed(posting_list_used) + sizeof(Hit);
+  // Edge case: if number of hits <= 2, and posting_list_used is NOT_FULL, we
+  // should return kMinPostingListSize as we know that two hits is able to fit
+  // in the 2 special hits position of a min-sized posting list.
+  // - Checking this scenario requires deserializing posting_list_used, which is
+  //   an expensive operation. However this is only possible when
+  //   GetBytesUsed(posting_list_used) <=
+  //   sizeof(uncompressed hit) + max(delta encoded value size) +
+  //   (sizeof(Hit) + (sizeof(Hit) - sizeof(Hit::Value)), so we don't need to do
+  //   this when the size exceeds this number.
+  if (GetBytesUsed(posting_list_used) <=
+      2 * sizeof(Hit) + 5 - sizeof(Hit::Value)) {
+    // Check if we're able to get more than 2 hits from posting_list_used
+    std::vector<Hit> hits;
+    libtextclassifier3::Status status =
+        GetHitsInternal(posting_list_used, /*limit=*/3, /*pop=*/false, &hits);
+    if (status.ok() && hits.size() <= 2) {
+      return GetMinPostingListSize();
+    }
+  }
+
+  // - In NOT_FULL status, BytesUsed contains no special hits. For a posting
+  //   list in the NOT_FULL state with n hits, we would have n-1 compressed hits
+  //   and 1 uncompressed hit.
+  // - The minimum sized posting list that would be guaranteed to fit these hits
+  //   would be FULL, but calculating the size required for the FULL posting
+  //   list would require deserializing the last two added hits, so instead we
+  //   will calculate the size of an ALMOST_FULL posting list to fit.
+  // - An ALMOST_FULL posting list would have kInvalidHit in special_hit(0), the
+  //   full uncompressed Hit in special_hit(1), and the n-1 compressed hits in
+  //   the compressed region.
+  // - Currently BytesUsed contains one uncompressed Hit and n-1 compressed
+  //   hits. However, it's possible that the uncompressed Hit is not a full hit,
+  //   but rather only the Hit::Value (this is the case if
+  //   !hit.has_term_frequency()).
+  // - Therefore, fitting these hits into a posting list would require
+  //   BytesUsed + one extra full hit + byte difference between a full hit and
+  //   Hit::Value. i.e:
+  //   ByteUsed + sizeof(Hit) + (sizeof(Hit) - sizeof(Hit::Value)).
+  return GetBytesUsed(posting_list_used) + 2 * sizeof(Hit) - sizeof(Hit::Value);
 }
 
 void PostingListHitSerializer::Clear(PostingListUsed* posting_list_used) const {
@@ -160,27 +198,37 @@
   // in the padded area and put new hit at the special position 1.
   // Calling ValueOrDie is safe here because 1 < kNumSpecialData.
   Hit cur = GetSpecialHit(posting_list_used, /*index=*/1).ValueOrDie();
-  if (cur.value() <= hit.value()) {
+  if (cur < hit || cur == hit) {
     return absl_ports::InvalidArgumentError(
         "Hit being prepended must be strictly less than the most recent Hit");
   }
-  uint64_t delta = cur.value() - hit.value();
   uint8_t delta_buf[VarInt::kMaxEncodedLen64];
-  size_t delta_len = VarInt::Encode(delta, delta_buf);
+  size_t delta_len =
+      EncodeNextHitValue(/*next_hit_value=*/hit.value(),
+                         /*curr_hit_value=*/cur.value(), delta_buf);
+  uint32_t cur_flags_bytes = GetFlagsByteSize(cur);
   uint32_t cur_term_frequency_bytes = GetTermFrequencyByteSize(cur);
 
   uint32_t pad_end = GetPadEnd(posting_list_used,
                                /*offset=*/kSpecialHitsSize);
 
-  if (pad_end >= kSpecialHitsSize + delta_len + cur_term_frequency_bytes) {
-    // Pad area has enough space for delta and term_frequency of existing hit
-    // (cur). Write delta at pad_end - delta_len - cur_term_frequency_bytes.
+  if (pad_end >= kSpecialHitsSize + delta_len + cur_flags_bytes +
+                     cur_term_frequency_bytes) {
+    // Pad area has enough space for delta, flags and term_frequency of existing
+    // hit (cur). Write delta at pad_end - delta_len - cur_flags_bytes -
+    // cur_term_frequency_bytes.
     uint8_t* delta_offset = posting_list_used->posting_list_buffer() + pad_end -
-                            delta_len - cur_term_frequency_bytes;
+                            delta_len - cur_flags_bytes -
+                            cur_term_frequency_bytes;
     memcpy(delta_offset, delta_buf, delta_len);
-    // Now copy term_frequency.
+
+    // Now copy flags.
+    Hit::Flags flags = cur.flags();
+    uint8_t* flags_offset = delta_offset + delta_len;
+    memcpy(flags_offset, &flags, cur_flags_bytes);
+    // Copy term_frequency.
     Hit::TermFrequency term_frequency = cur.term_frequency();
-    uint8_t* term_frequency_offset = delta_offset + delta_len;
+    uint8_t* term_frequency_offset = flags_offset + cur_flags_bytes;
     memcpy(term_frequency_offset, &term_frequency, cur_term_frequency_bytes);
 
     // Now first hit is the new hit, at special position 1. Safe to ignore the
@@ -231,23 +279,36 @@
     return absl_ports::FailedPreconditionError(
         "Posting list is in an invalid state.");
   }
+
+  // Retrieve the last added (cur) hit's value and flags and compare to the hit
+  // we're adding.
   Hit::Value cur_value;
-  memcpy(&cur_value, posting_list_used->posting_list_buffer() + offset,
-         sizeof(Hit::Value));
-  if (cur_value <= hit.value()) {
-    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "Hit %d being prepended must be strictly less than the most recent "
-        "Hit %d",
-        hit.value(), cur_value));
+  uint8_t* cur_value_offset = posting_list_used->posting_list_buffer() + offset;
+  memcpy(&cur_value, cur_value_offset, sizeof(Hit::Value));
+  Hit::Flags cur_flags = Hit::kNoEnabledFlags;
+  if (GetFlagsByteSize(Hit(cur_value)) > 0) {
+    uint8_t* cur_flags_offset = cur_value_offset + sizeof(Hit::Value);
+    memcpy(&cur_flags, cur_flags_offset, sizeof(Hit::Flags));
   }
-  uint64_t delta = cur_value - hit.value();
+  // Term-frequency is not used for hit comparison so it's ok to pass in the
+  // default term-frequency here.
+  Hit cur(cur_value, cur_flags, Hit::kDefaultTermFrequency);
+  if (cur < hit || cur == hit) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Hit (value=%d, flags=%d) being prepended must be strictly less than "
+        "the most recent Hit (value=%d, flags=%d)",
+        hit.value(), hit.flags(), cur_value, cur_flags));
+  }
   uint8_t delta_buf[VarInt::kMaxEncodedLen64];
-  size_t delta_len = VarInt::Encode(delta, delta_buf);
+  size_t delta_len =
+      EncodeNextHitValue(/*next_hit_value=*/hit.value(),
+                         /*curr_hit_value=*/cur.value(), delta_buf);
+  uint32_t hit_flags_bytes = GetFlagsByteSize(hit);
   uint32_t hit_term_frequency_bytes = GetTermFrequencyByteSize(hit);
 
   // offset now points to one past the end of the first hit.
   offset += sizeof(Hit::Value);
-  if (kSpecialHitsSize + sizeof(Hit::Value) + delta_len +
+  if (kSpecialHitsSize + sizeof(Hit::Value) + delta_len + hit_flags_bytes +
           hit_term_frequency_bytes <=
       offset) {
     // Enough space for delta in compressed area.
@@ -257,9 +318,9 @@
     memcpy(posting_list_used->posting_list_buffer() + offset, delta_buf,
            delta_len);
 
-    // Prepend new hit with (possibly) its term_frequency. We know that there is
-    // room for 'hit' because of the if statement above, so calling ValueOrDie
-    // is safe.
+    // Prepend new hit with (possibly) its flags and term_frequency. We know
+    // that there is room for 'hit' because of the if statement above, so
+    // calling ValueOrDie is safe.
     offset =
         PrependHitUncompressed(posting_list_used, hit, offset).ValueOrDie();
     // offset is guaranteed to be valid here. So it's safe to ignore the return
@@ -295,13 +356,13 @@
     // (i.e. varint delta encoding expanded required storage). We
     // move first hit to special position 1 and put new hit in
     // special position 0.
-    Hit cur(cur_value);
-    // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
+
+    // Offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
     // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
-    // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
+    // list size must be divisible by sizeof(Hit) (6), it is guaranteed that
     // offset < size_in_bytes, so it is safe to ignore the return value here.
-    ICING_RETURN_IF_ERROR(
-        ConsumeTermFrequencyIfPresent(posting_list_used, &cur, &offset));
+    ICING_RETURN_IF_ERROR(ConsumeFlagsAndTermFrequencyIfPresent(
+        posting_list_used, &cur, &offset));
     // Safe to ignore the return value of PadToEnd because offset must be less
     // than posting_list_used->size_in_bytes(). Otherwise, this function
     // already would have returned FAILED_PRECONDITION.
@@ -463,19 +524,19 @@
       offset += sizeof(Hit::Value);
     } else {
       // Now we have delta encoded subsequent hits. Decode and push.
-      uint64_t delta;
-      offset += VarInt::Decode(
-          posting_list_used->posting_list_buffer() + offset, &delta);
-      val += delta;
+      DecodedHitInfo decoded_hit_info = DecodeNextHitValue(
+          posting_list_used->posting_list_buffer() + offset, val);
+      offset += decoded_hit_info.encoded_size;
+      val = decoded_hit_info.hit_value;
     }
     Hit hit(val);
     libtextclassifier3::Status status =
-        ConsumeTermFrequencyIfPresent(posting_list_used, &hit, &offset);
+        ConsumeFlagsAndTermFrequencyIfPresent(posting_list_used, &hit, &offset);
     if (!status.ok()) {
       // This posting list has been corrupted somehow. The first hit of the
-      // posting list claims to have a term frequency, but there's no more room
-      // in the posting list for that term frequency to exist. Return an empty
-      // vector and zero to indicate no hits retrieved.
+      // posting list claims to have a term frequency or flag, but there's no
+      // more room in the posting list for that term frequency or flag to exist.
+      // Return an empty vector and zero to indicate no hits retrieved.
       if (out != nullptr) {
         out->clear();
       }
@@ -497,10 +558,10 @@
       // In the compressed area. Pop and reconstruct. offset/val is
       // the last traversed hit, which we must discard. So move one
       // more forward.
-      uint64_t delta;
-      offset += VarInt::Decode(
-          posting_list_used->posting_list_buffer() + offset, &delta);
-      val += delta;
+      DecodedHitInfo decoded_hit_info = DecodeNextHitValue(
+          posting_list_used->posting_list_buffer() + offset, val);
+      offset += decoded_hit_info.encoded_size;
+      val = decoded_hit_info.hit_value;
 
       // Now val is the first hit of the new posting list.
       if (kSpecialHitsSize + sizeof(Hit::Value) <= offset) {
@@ -509,17 +570,18 @@
         memcpy(mutable_posting_list_used->posting_list_buffer() + offset, &val,
                sizeof(Hit::Value));
       } else {
-        // val won't fit in compressed area. Also see if there is a
+        // val won't fit in compressed area. Also see if there is a flag or
         // term_frequency.
         Hit hit(val);
         libtextclassifier3::Status status =
-            ConsumeTermFrequencyIfPresent(posting_list_used, &hit, &offset);
+            ConsumeFlagsAndTermFrequencyIfPresent(posting_list_used, &hit,
+                                                  &offset);
         if (!status.ok()) {
           // This posting list has been corrupted somehow. The first hit of
-          // the posting list claims to have a term frequency, but there's no
-          // more room in the posting list for that term frequency to exist.
-          // Return an empty vector and zero to indicate no hits retrieved. Do
-          // not pop anything.
+          // the posting list claims to have a term frequency or flag, but
+          // there's no more room in the posting list for that term frequency or
+          // flag to exist. Return an empty vector and zero to indicate no hits
+          // retrieved. Do not pop anything.
           if (out != nullptr) {
             out->clear();
           }
@@ -569,7 +631,7 @@
     return absl_ports::InvalidArgumentError(
         "Special hits only exist at indices 0 and 1");
   }
-  Hit val;
+  Hit val(Hit::kInvalidValue);
   memcpy(&val, posting_list_used->posting_list_buffer() + index * sizeof(val),
          sizeof(val));
   return val;
@@ -653,11 +715,11 @@
     // not_full state. Safe to ignore the return value because 0 and 1 are both
     // < kNumSpecialData.
     SetSpecialHit(posting_list_used, /*index=*/0, Hit(offset));
-    SetSpecialHit(posting_list_used, /*index=*/1, Hit());
+    SetSpecialHit(posting_list_used, /*index=*/1, Hit(Hit::kInvalidValue));
   } else if (offset == sizeof(Hit)) {
     // almost_full state. Safe to ignore the return value because 1 is both <
     // kNumSpecialData.
-    SetSpecialHit(posting_list_used, /*index=*/0, Hit());
+    SetSpecialHit(posting_list_used, /*index=*/0, Hit(Hit::kInvalidValue));
   }
   // Nothing to do for the FULL state - the offset isn't actually stored
   // anywhere and both special hits hold valid hits.
@@ -667,46 +729,72 @@
 libtextclassifier3::StatusOr<uint32_t>
 PostingListHitSerializer::PrependHitUncompressed(
     PostingListUsed* posting_list_used, const Hit& hit, uint32_t offset) const {
-  if (hit.has_term_frequency()) {
-    if (offset < kSpecialHitsSize + sizeof(Hit)) {
-      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Not enough room to prepend Hit at offset %d.", offset));
-    }
-    offset -= sizeof(Hit);
-    memcpy(posting_list_used->posting_list_buffer() + offset, &hit,
-           sizeof(Hit));
-  } else {
-    if (offset < kSpecialHitsSize + sizeof(Hit::Value)) {
-      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-          "Not enough room to prepend Hit::Value at offset %d.", offset));
-    }
-    offset -= sizeof(Hit::Value);
-    Hit::Value val = hit.value();
-    memcpy(posting_list_used->posting_list_buffer() + offset, &val,
-           sizeof(Hit::Value));
+  uint32_t hit_bytes_to_prepend = sizeof(Hit::Value) + GetFlagsByteSize(hit) +
+                                  GetTermFrequencyByteSize(hit);
+
+  if (offset < kSpecialHitsSize + hit_bytes_to_prepend) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Not enough room to prepend Hit at offset %d.", offset));
   }
+
+  if (hit.has_term_frequency()) {
+    offset -= sizeof(Hit::TermFrequency);
+    Hit::TermFrequency term_frequency = hit.term_frequency();
+    memcpy(posting_list_used->posting_list_buffer() + offset, &term_frequency,
+           sizeof(Hit::TermFrequency));
+  }
+  if (hit.has_flags()) {
+    offset -= sizeof(Hit::Flags);
+    Hit::Flags flags = hit.flags();
+    memcpy(posting_list_used->posting_list_buffer() + offset, &flags,
+           sizeof(Hit::Flags));
+  }
+  offset -= sizeof(Hit::Value);
+  Hit::Value val = hit.value();
+  memcpy(posting_list_used->posting_list_buffer() + offset, &val,
+         sizeof(Hit::Value));
   return offset;
 }
 
 libtextclassifier3::Status
-PostingListHitSerializer::ConsumeTermFrequencyIfPresent(
+PostingListHitSerializer::ConsumeFlagsAndTermFrequencyIfPresent(
     const PostingListUsed* posting_list_used, Hit* hit,
     uint32_t* offset) const {
-  if (!hit->has_term_frequency()) {
-    // No term frequency to consume. Everything is fine.
+  if (!hit->has_flags()) {
+    // No flags (and by extension, no term-frequency) to consume. Everything is
+    // fine.
     return libtextclassifier3::Status::OK;
   }
-  if (*offset + sizeof(Hit::TermFrequency) >
-      posting_list_used->size_in_bytes()) {
+
+  // Consume flags
+  Hit::Flags flags;
+  if (*offset + sizeof(Hit::Flags) > posting_list_used->size_in_bytes()) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-        "offset %d must not point past the end of the posting list of size %d.",
+        "offset %d must not point past the end of the posting list of size "
+        "%d.",
         *offset, posting_list_used->size_in_bytes()));
   }
-  Hit::TermFrequency term_frequency;
-  memcpy(&term_frequency, posting_list_used->posting_list_buffer() + *offset,
-         sizeof(Hit::TermFrequency));
-  *hit = Hit(hit->value(), term_frequency);
-  *offset += sizeof(Hit::TermFrequency);
+  memcpy(&flags, posting_list_used->posting_list_buffer() + *offset,
+         sizeof(Hit::Flags));
+  *hit = Hit(hit->value(), flags, Hit::kDefaultTermFrequency);
+  *offset += sizeof(Hit::Flags);
+
+  if (hit->has_term_frequency()) {
+    // Consume term frequency
+    if (*offset + sizeof(Hit::TermFrequency) >
+        posting_list_used->size_in_bytes()) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "offset %d must not point past the end of the posting list of size "
+          "%d.",
+          *offset, posting_list_used->size_in_bytes()));
+    }
+    Hit::TermFrequency term_frequency;
+    memcpy(&term_frequency, posting_list_used->posting_list_buffer() + *offset,
+           sizeof(Hit::TermFrequency));
+    *hit = Hit(hit->value(), flags, term_frequency);
+    *offset += sizeof(Hit::TermFrequency);
+  }
+
   return libtextclassifier3::Status::OK;
 }
 

diff --git a/icing/index/main/posting-list-hit-serializer.h b/icing/index/main/posting-list-hit-serializer.h
index 2986d9c..08c792c 100644
--- a/icing/index/main/posting-list-hit-serializer.h
+++ b/icing/index/main/posting-list-hit-serializer.h

@@ -15,6 +15,7 @@
 #ifndef ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
 #define ICING_INDEX_MAIN_POSTING_LIST_HIT_SERIALIZER_H_
 
+#include <cstddef>
 #include <cstdint>
 #include <vector>
 
@@ -23,6 +24,7 @@
 #include "icing/file/posting_list/posting-list-common.h"
 #include "icing/file/posting_list/posting-list-used.h"
 #include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-bit-util.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -34,6 +36,52 @@
  public:
   static constexpr uint32_t kSpecialHitsSize = kNumSpecialData * sizeof(Hit);
 
+  struct DecodedHitInfo {
+    // The decoded hit value.
+    Hit::Value hit_value;
+
+    // Size of the encoded hit in bytes.
+    size_t encoded_size;
+  };
+
+  // Given the current hit value, encodes the next hit value for serialization
+  // in the posting list.
+  //
+  // The encoded value is the varint-encoded delta between next_hit_value and
+  // curr_hit_value.
+  // - We add 1 to this delta so as to avoid getting a delta value of 0.
+  // - This allows us to add duplicate hits with the same value, which is a
+  //   valid case if we need to store hits with different flags that belong in
+  //   the same section-id/doc-id combo.
+  // - We cannot have an encoded hit delta with a value of 0 as 0 is currently
+  //   used for padding the unused region in the posting list.
+  //
+  // REQUIRES: next_hit_value <= curr_hit_value AND
+  //           curr_hit_value - next_hit_value <
+  //              std::numeric_limits<Hit::Value>::max()
+  //
+  // RETURNS: next_hit_value's encoded length in bytes and writes the encoded
+  //          value directly into encoded_buf_out.
+  static size_t EncodeNextHitValue(Hit::Value next_hit_value,
+                                   Hit::Value curr_hit_value,
+                                   uint8_t* encoded_buf_out) {
+    uint64_t delta = curr_hit_value - next_hit_value + 1;
+    return VarInt::Encode(delta, encoded_buf_out);
+  }
+
+  // Given the current hit value, decodes the next hit value from an encoded
+  // byte array buffer.
+  //
+  // RETURNS: DecodedHitInfo containing the decoded hit value and the value's
+  //          encoded size in bytes.
+  static DecodedHitInfo DecodeNextHitValue(const uint8_t* encoded_buf_in,
+                                           Hit::Value curr_hit_value) {
+    uint64_t delta;
+    size_t delta_size = VarInt::Decode(encoded_buf_in, &delta);
+    Hit::Value hit_value = curr_hit_value + delta - 1;
+    return {hit_value, delta_size};
+  }
+
   uint32_t GetDataTypeBytes() const override { return sizeof(Hit); }
 
   uint32_t GetMinPostingListSize() const override {
@@ -299,15 +347,19 @@
       PostingListUsed* posting_list_used, const Hit& hit,
       uint32_t offset) const;
 
-  // If hit has a term frequency, consumes the term frequency at offset, updates
-  // hit to include the term frequency and updates offset to reflect that the
-  // term frequency has been consumed.
+  // If hit has the flags and/or term frequency field, consumes the flags and/or
+  // term frequency at offset, updates hit to include the flag and/or term
+  // frequency and updates offset to reflect that the flag and/or term frequency
+  // fields have been consumed.
   //
   // RETURNS:
   //   - OK, if successful
-  //   - INVALID_ARGUMENT if hit has a term frequency and offset +
-  //     sizeof(Hit::TermFrequency) >= posting_list_used->size_in_bytes()
-  libtextclassifier3::Status ConsumeTermFrequencyIfPresent(
+  //   - INVALID_ARGUMENT if hit has a flags and/or term frequency field and
+  //     offset + sizeof(Hit's flag) + sizeof(Hit's tf) >=
+  //     posting_list_used->size_in_bytes()
+  //     i.e. the posting list is not large enough to consume the hit's flags
+  //     and term frequency fields
+  libtextclassifier3::Status ConsumeFlagsAndTermFrequencyIfPresent(
       const PostingListUsed* posting_list_used, Hit* hit,
       uint32_t* offset) const;
 };

diff --git a/icing/index/main/posting-list-hit-serializer_test.cc b/icing/index/main/posting-list-hit-serializer_test.cc
index 7f0b945..ea135ef 100644
--- a/icing/index/main/posting-list-hit-serializer_test.cc
+++ b/icing/index/main/posting-list-hit-serializer_test.cc

@@ -14,22 +14,32 @@
 
 #include "icing/index/main/posting-list-hit-serializer.h"
 
+#include <algorithm>
+#include <cstddef>
 #include <cstdint>
 #include <deque>
-#include <memory>
+#include <iterator>
+#include <limits>
 #include <vector>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/file/posting_list/posting-list-used.h"
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/hit-test-utils.h"
 
 using testing::ElementsAre;
 using testing::ElementsAreArray;
 using testing::Eq;
+using testing::Gt;
 using testing::IsEmpty;
+using testing::IsFalse;
+using testing::IsTrue;
 using testing::Le;
 using testing::Lt;
 
@@ -58,103 +68,143 @@
       PostingListUsed::CreateFromUnitializedRegion(&serializer, kHitsSize));
 
   // Make used.
-  Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/56,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit0));
-  // Size = sizeof(uncompressed hit0)
-  int expected_size = sizeof(Hit);
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  // Size = sizeof(uncompressed hit0::Value)
+  //        + sizeof(hit0::Flags)
+  //        + sizeof(hit0::TermFrequency)
+  int expected_size =
+      sizeof(Hit::Value) + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
 
-  Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency);
+  Hit hit1(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
+  size_t delta_len = PostingListHitSerializer::EncodeNextHitValue(
+      /*next_hit_value=*/hit1.value(),
+      /*curr_hit_value=*/hit0.value(), delta_buf);
   ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit1));
-  // Size = sizeof(uncompressed hit1)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
-  expected_size += 2 + sizeof(Hit::TermFrequency);
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  // Size = sizeof(uncompressed hit1::Value)
+  //        + sizeof(hit0-hit1)
+  //        + sizeof(hit0::Flags)
+  //        + sizeof(hit0::TermFrequency)
+  expected_size += delta_len;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit1, hit0)));
 
-  Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56);
+  Hit hit2(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/56,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  delta_len = PostingListHitSerializer::EncodeNextHitValue(
+      /*next_hit_value=*/hit2.value(),
+      /*curr_hit_value=*/hit1.value(), delta_buf);
   ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit2));
-  // Size = sizeof(uncompressed hit2)
+  // Size = sizeof(uncompressed hit2::Value) + sizeof(hit2::Flags)
+  //        + sizeof(hit2::TermFrequency)
   //        + sizeof(hit1-hit2)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
-  expected_size += 2;
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  //        + sizeof(hit0-hit1)
+  //        + sizeof(hit0::flags)
+  //        + sizeof(hit0::term_frequency)
+  expected_size += delta_len + sizeof(Hit::Flags) + sizeof(Hit::TermFrequency);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
 
-  Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency);
+  Hit hit3(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  delta_len = PostingListHitSerializer::EncodeNextHitValue(
+      /*next_hit_value=*/hit3.value(),
+      /*curr_hit_value=*/hit2.value(), delta_buf);
   ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit3));
-  // Size = sizeof(uncompressed hit3)
-  //        + sizeof(hit2-hit3) + sizeof(hit2::term_frequency)
+  // Size = sizeof(uncompressed hit3::Value)
+  //        + sizeof(hit2-hit3) + sizeof(hit2::Flags)
+  //        + sizeof(hit2::TermFrequency)
   //        + sizeof(hit1-hit2)
-  //        + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
-  expected_size += 2 + sizeof(Hit::TermFrequency);
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  //        + sizeof(hit0-hit1)
+  //        + sizeof(hit0::flags)
+  //        + sizeof(hit0::term_frequency)
+  expected_size += delta_len;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
 }
 
-TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
+TEST(PostingListHitSerializerTest,
+     PostingListUsedPrependHitAlmostFull_withFlags) {
   PostingListHitSerializer serializer;
 
-  int size = 2 * serializer.GetMinPostingListSize();
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
 
   // Fill up the compressed region.
   // Transitions:
   // Adding hit0: EMPTY -> NOT_FULL
   // Adding hit1: NOT_FULL -> NOT_FULL
   // Adding hit2: NOT_FULL -> NOT_FULL
-  Hit hit0(/*section_id=*/0, 0, Hit::kDefaultTermFrequency);
-  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
-  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57,
+                       /*is_in_prefix_section=*/true,
+                       /*is_prefix_hit=*/true);
+  EXPECT_THAT(hit2.has_flags(), IsTrue());
+  EXPECT_THAT(hit2.has_term_frequency(), IsTrue());
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
-  // Size used will be 2+2+4=8 bytes
-  int expected_size = sizeof(Hit::Value) + 2 + 2;
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  // Size used will be 4 (hit2::Value) + 1 (hit2::Flags) + 1
+  // (hit2::TermFrequency) + 2 (hit1-hit2) + 3 (hit0-hit1) = 11 bytes
+  int expected_size = sizeof(Hit::Value) + sizeof(Hit::Flags) +
+                      sizeof(Hit::TermFrequency) + 2 + 3;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
 
   // Add one more hit to transition NOT_FULL -> ALMOST_FULL
-  Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
+  Hit hit3 =
+      CreateHit(hit2, /*desired_byte_length=*/3, Hit::kDefaultTermFrequency,
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  EXPECT_THAT(hit3.has_flags(), IsFalse());
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
-  // Compressed region would be 2+2+3+4=11 bytes, but the compressed region is
-  // only 10 bytes. So instead, the posting list will transition to ALMOST_FULL.
-  // The in-use compressed region will actually shrink from 8 bytes to 7 bytes
+  // Storing them in the compressed region requires 4 (hit3::Value) + 3
+  // (hit2-hit3) + 1 (hit2::Flags) + 1 (hit2::TermFrequency) + 2 (hit1-hit2) + 3
+  // (hit0-hit1) = 14 bytes, but there are only 12 bytes in the compressed
+  // region. So instead, the posting list will transition to ALMOST_FULL.
+  // The in-use compressed region will actually shrink from 11 bytes to 10 bytes
   // because the uncompressed version of hit2 will be overwritten with the
   // compressed delta of hit2. hit3 will be written to one of the special hits.
   // Because we're in ALMOST_FULL, the expected size is the size of the pl minus
   // the one hit used to mark the posting list as ALMOST_FULL.
-  expected_size = size - sizeof(Hit);
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  expected_size = pl_size - sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
 
   // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
   Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4));
-  // There are currently 7 bytes in use in the compressed region. hit3 will have
-  // a 2-byte delta. That delta will fit in the compressed region (which will
-  // now have 9 bytes in use), hit4 will be placed in one of the special hits
-  // and the posting list will remain in ALMOST_FULL.
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
+  // There are currently 10 bytes in use in the compressed region. hit3 will
+  // have a 2-byte delta, which fits in the compressed region. Hit3 will be
+  // moved from the special hit to the compressed region (which will have 12
+  // bytes in use after adding hit3). Hit4 will be placed in one of the special
+  // hits and the posting list will remain in ALMOST_FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
 
   // Add one more hit to transition ALMOST_FULL -> FULL
   Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5));
-  // There are currently 9 bytes in use in the compressed region. hit4 will have
-  // a 2-byte delta which will not fit in the compressed region. So hit4 will
-  // remain in one of the special hits and hit5 will occupy the other, making
-  // the posting list FULL.
-  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(size));
+  // There are currently 12 bytes in use in the compressed region. hit4 will
+  // have a 2-byte delta which will not fit in the compressed region. So hit4
+  // will remain in one of the special hits and hit5 will occupy the other,
+  // making the posting list FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
   EXPECT_THAT(serializer.GetHits(&pl_used),
               IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
 
@@ -164,9 +214,133 @@
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 }
 
+TEST(PostingListHitSerializerTest, PostingListUsedPrependHitAlmostFull) {
+  PostingListHitSerializer serializer;
+
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+
+  // Fill up the compressed region.
+  // Transitions:
+  // Adding hit0: EMPTY -> NOT_FULL
+  // Adding hit1: NOT_FULL -> NOT_FULL
+  // Adding hit2: NOT_FULL -> NOT_FULL
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/3);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
+  // Size used will be 4 (hit2::Value) + 3 (hit1-hit2) + 3 (hit0-hit1)
+  // = 10 bytes
+  int expected_size = sizeof(Hit::Value) + 3 + 3;
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
+
+  // Add one more hit to transition NOT_FULL -> ALMOST_FULL
+  Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/3);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
+  // Storing them in the compressed region requires 4 (hit3::Value) + 3
+  // (hit2-hit3) + 3 (hit1-hit2) + 3 (hit0-hit1) = 13 bytes, but there are only
+  // 12 bytes in the compressed region. So instead, the posting list will
+  // transition to ALMOST_FULL.
+  // The in-use compressed region will actually shrink from 10 bytes to 9 bytes
+  // because the uncompressed version of hit2 will be overwritten with the
+  // compressed delta of hit2. hit3 will be written to one of the special hits.
+  // Because we're in ALMOST_FULL, the expected size is the size of the pl minus
+  // the one hit used to mark the posting list as ALMOST_FULL.
+  expected_size = pl_size - sizeof(Hit);
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
+
+  // Add one more hit to transition ALMOST_FULL -> ALMOST_FULL
+  Hit hit4 = CreateHit(hit3, /*desired_byte_length=*/2);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit4));
+  // There are currently 9 bytes in use in the compressed region. Hit3 will
+  // have a 2-byte delta, which fits in the compressed region. Hit3 will be
+  // moved from the special hit to the compressed region (which will have 11
+  // bytes in use after adding hit3). Hit4 will be placed in one of the special
+  // hits and the posting list will remain in ALMOST_FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(expected_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit4, hit3, hit2, hit1, hit0)));
+
+  // Add one more hit to transition ALMOST_FULL -> FULL
+  Hit hit5 = CreateHit(hit4, /*desired_byte_length=*/2);
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit5));
+  // There are currently 11 bytes in use in the compressed region. Hit4 will
+  // have a 2-byte delta which will not fit in the compressed region. So hit4
+  // will remain in one of the special hits and hit5 will occupy the other,
+  // making the posting list FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit5, hit4, hit3, hit2, hit1, hit0)));
+
+  // The posting list is FULL. Adding another hit should fail.
+  Hit hit6 = CreateHit(hit5, /*desired_byte_length=*/1);
+  EXPECT_THAT(serializer.PrependHit(&pl_used, hit6),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(PostingListHitSerializerTest, PrependHitsWithSameValue) {
+  PostingListHitSerializer serializer;
+
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+
+  // Fill up the compressed region.
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/3);
+  Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2, /*term_frequency=*/57,
+                       /*is_in_prefix_section=*/true,
+                       /*is_prefix_hit=*/true);
+  // Create hit3 with the same value but different flags as hit2 (hit3_flags
+  // is set to have all currently-defined flags enabled)
+  Hit::Flags hit3_flags = 0;
+  for (int i = 0; i < Hit::kNumFlagsInFlagsField; ++i) {
+    hit3_flags |= (1 << i);
+  }
+  Hit hit3(hit2.value(), /*term_frequency=*/hit2.term_frequency(),
+           /*flags=*/hit3_flags);
+
+  // hit3 is larger than hit2 (its flag value is larger), and so needs to be
+  // prepended first
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit3));
+  ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit2));
+  // Posting list is now ALMOST_FULL
+  // ----------------------
+  // 23-21     delta(Hit #0)
+  // 20-19     delta(Hit #1)
+  // 18        term-frequency(Hit #2)
+  // 17        flags(Hit #2)
+  // 16        delta(Hit #2) = 0
+  // 15-12     <unused padding = 0>
+  // 11-6      Hit #3
+  // 5-0       kSpecialHit
+  // ----------------------
+  int bytes_used = pl_size - sizeof(Hit);
+
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAre(hit2, hit3, hit1, hit0)));
+}
+
 TEST(PostingListHitSerializerTest, PostingListUsedMinSize) {
   PostingListHitSerializer serializer;
 
+  // Min size = 12
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
       PostingListUsed::CreateFromUnitializedRegion(
@@ -176,7 +350,7 @@
   EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
 
   // Add a hit, PL should shift to ALMOST_FULL state
-  Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/0,
+  Hit hit0(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/0,
            /*is_in_prefix_section=*/false,
            /*is_prefix_hit=*/true);
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit0));
@@ -185,10 +359,10 @@
   EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Le(expected_size));
   EXPECT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(ElementsAre(hit0)));
 
-  // Add the smallest hit possible - no term_frequency and a delta of 1. PL
-  // should shift to FULL state.
-  Hit hit1(/*section_id=*/0, 0, /*term_frequency=*/0,
-           /*is_in_prefix_section=*/true,
+  // Add the smallest hit possible - no term_frequency, non-prefix hit and a
+  // delta of 0b10. PL should shift to FULL state.
+  Hit hit1(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0,
+           /*is_in_prefix_section=*/false,
            /*is_prefix_hit=*/false);
   ICING_EXPECT_OK(serializer.PrependHit(&pl_used, hit1));
   // Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
@@ -198,7 +372,7 @@
               IsOkAndHolds(ElementsAre(hit1, hit0)));
 
   // Try to add the smallest hit possible. Should fail
-  Hit hit2(/*section_id=*/0, 0, /*term_frequency=*/0,
+  Hit hit2(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/0,
            /*is_in_prefix_section=*/false,
            /*is_prefix_hit=*/false);
   EXPECT_THAT(serializer.PrependHit(&pl_used, hit2),
@@ -212,14 +386,17 @@
      PostingListPrependHitArrayMinSizePostingList) {
   PostingListHitSerializer serializer;
 
-  // Min Size = 10
-  int size = serializer.GetMinPostingListSize();
+  // Min Size = 12
+  int pl_size = serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
 
   std::vector<HitElt> hits_in;
-  hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
+  hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0,
+                           Hit::kDefaultTermFrequency,
+                           /*is_in_prefix_section=*/false,
+                           /*is_prefix_hit=*/false));
   hits_in.emplace_back(
       CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
   hits_in.emplace_back(
@@ -235,7 +412,7 @@
   ICING_ASSERT_OK_AND_ASSIGN(
       uint32_t num_can_prepend,
       (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-          &pl_used, &hits_in[0], hits_in.size(), false)));
+          &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
   EXPECT_THAT(num_can_prepend, Eq(2));
 
   int can_fit_hits = num_can_prepend;
@@ -243,10 +420,11 @@
   // problem, transitioning the PL from EMPTY -> ALMOST_FULL -> FULL
   const HitElt *hits_in_ptr = hits_in.data() + (hits_in.size() - 2);
   ICING_ASSERT_OK_AND_ASSIGN(
-      num_can_prepend, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-                           &pl_used, hits_in_ptr, can_fit_hits, false)));
+      num_can_prepend,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, hits_in_ptr, can_fit_hits, /*keep_prepended=*/false)));
   EXPECT_THAT(num_can_prepend, Eq(can_fit_hits));
-  EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
+  EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used)));
   std::deque<Hit> hits_pushed;
   std::transform(hits_in.rbegin(),
                  hits_in.rend() - hits_in.size() + can_fit_hits,
@@ -258,14 +436,17 @@
 TEST(PostingListHitSerializerTest, PostingListPrependHitArrayPostingList) {
   PostingListHitSerializer serializer;
 
-  // Size = 30
-  int size = 3 * serializer.GetMinPostingListSize();
+  // Size = 36
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
 
   std::vector<HitElt> hits_in;
-  hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
+  hits_in.emplace_back(Hit(/*section_id=*/1, /*document_id=*/0,
+                           Hit::kDefaultTermFrequency,
+                           /*is_in_prefix_section=*/false,
+                           /*is_prefix_hit=*/false));
   hits_in.emplace_back(
       CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
   hits_in.emplace_back(
@@ -278,14 +459,14 @@
   // The last hit is uncompressed and the four before it should only take one
   // byte. Total use = 8 bytes.
   // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25-22  Hit #5
-  // 21-10  <unused>
-  // 9-5    kSpecialHit
-  // 4-0    Offset=22
+  // 35     delta(Hit #0)
+  // 34     delta(Hit #1)
+  // 33     delta(Hit #2)
+  // 32     delta(Hit #3)
+  // 31-28  Hit #4
+  // 27-12  <unused>
+  // 11-6   kSpecialHit
+  // 5-0    Offset=28
   // ----------------------
   int byte_size = sizeof(Hit::Value) + hits_in.size() - 1;
 
@@ -294,7 +475,7 @@
   ICING_ASSERT_OK_AND_ASSIGN(
       uint32_t num_could_fit,
       (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-          &pl_used, &hits_in[0], hits_in.size(), false)));
+          &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
   EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
   EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
   std::deque<Hit> hits_pushed;
@@ -316,31 +497,35 @@
       CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
   hits_in.emplace_back(
       CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+  hits_in.emplace_back(
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
   std::reverse(hits_in.begin(), hits_in.end());
-  // Size increased by the deltas of these hits (1+2+1+2+3+2) = 11 bytes
+  // Size increased by the deltas of these hits (1+2+1+2+3+2+3) = 14 bytes
   // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-11  Hit #11
-  // 10  <unused>
-  // 9-5    kSpecialHit
-  // 4-0    Offset=11
+  // 35     delta(Hit #0)
+  // 34     delta(Hit #1)
+  // 33     delta(Hit #2)
+  // 32     delta(Hit #3)
+  // 31     delta(Hit #4)
+  // 30-29  delta(Hit #5)
+  // 28     delta(Hit #6)
+  // 27-26  delta(Hit #7)
+  // 25-23  delta(Hit #8)
+  // 22-21  delta(Hit #9)
+  // 20-18  delta(Hit #10)
+  // 17-14  Hit #11
+  // 13-12  <unused>
+  // 11-6   kSpecialHit
+  // 5-0    Offset=14
   // ----------------------
-  byte_size += 11;
+  byte_size += 14;
 
-  // Add these 6 hits. The PL is currently in the NOT_FULL state and should
+  // Add these 7 hits. The PL is currently in the NOT_FULL state and should
   // remain in the NOT_FULL state.
   ICING_ASSERT_OK_AND_ASSIGN(
-      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-                         &pl_used, &hits_in[0], hits_in.size(), false)));
+      num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
   EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
   EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
   // All hits from hits_in were added.
@@ -353,29 +538,32 @@
   hits_in.clear();
   hits_in.emplace_back(first_hit);
   // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-12  delta(Hit #11)
-  // 11-10  <unused>
-  // 9-5    Hit #12
-  // 4-0    kSpecialHit
+  // 35     delta(Hit #0)
+  // 34     delta(Hit #1)
+  // 33     delta(Hit #2)
+  // 32     delta(Hit #3)
+  // 31     delta(Hit #4)
+  // 30-29  delta(Hit #5)
+  // 28     delta(Hit #6)
+  // 27-26  delta(Hit #7)
+  // 25-23  delta(Hit #8)
+  // 22-21  delta(Hit #9)
+  // 20-18  delta(Hit #10)
+  // 17-15  delta(Hit #11)
+  // 14-12  <unused>
+  // 11-6   Hit #12
+  // 5-0    kSpecialHit
   // ----------------------
-  byte_size = 25;
+  byte_size = 30;  // 36 - 6
 
   // Add this 1 hit. The PL is currently in the NOT_FULL state and should
   // transition to the ALMOST_FULL state - even though there is still some
-  // unused space.
+  // unused space. This is because the unused space (3 bytes) is less than
+  // the size of a uncompressed Hit.
   ICING_ASSERT_OK_AND_ASSIGN(
-      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-                         &pl_used, &hits_in[0], hits_in.size(), false)));
+      num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
   EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
   EXPECT_THAT(byte_size, Eq(serializer.GetBytesUsed(&pl_used)));
   // All hits from hits_in were added.
@@ -388,37 +576,40 @@
   hits_in.clear();
   hits_in.emplace_back(first_hit);
   hits_in.emplace_back(
-      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/2));
+      CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/3));
   std::reverse(hits_in.begin(), hits_in.end());
   // ----------------------
-  // 29     delta(Hit #1)
-  // 28     delta(Hit #2)
-  // 27     delta(Hit #3)
-  // 26     delta(Hit #4)
-  // 25     delta(Hit #5)
-  // 24-23  delta(Hit #6)
-  // 22     delta(Hit #7)
-  // 21-20  delta(Hit #8)
-  // 19-17  delta(Hit #9)
-  // 16-15  delta(Hit #10)
-  // 14-12  delta(Hit #11)
-  // 11     delta(Hit #12)
-  // 10     <unused>
-  // 9-5    Hit #13
-  // 4-0    Hit #14
+  // 35     delta(Hit #0)
+  // 34     delta(Hit #1)
+  // 33     delta(Hit #2)
+  // 32     delta(Hit #3)
+  // 31     delta(Hit #4)
+  // 30-29  delta(Hit #5)
+  // 28     delta(Hit #6)
+  // 27-26  delta(Hit #7)
+  // 25-23  delta(Hit #8)
+  // 22-21  delta(Hit #9)
+  // 20-18  delta(Hit #10)
+  // 17-15  delta(Hit #11)
+  // 14     delta(Hit #12)
+  // 13-12  <unused>
+  // 11-6   Hit #13
+  // 5-0    Hit #14
   // ----------------------
 
-  // Add these 2 hits. The PL is currently in the ALMOST_FULL state. Adding the
-  // first hit should keep the PL in ALMOST_FULL because the delta between Hit
-  // #12 and Hit #13 (1 byte) can fit in the unused area (2 bytes). Adding the
-  // second hit should tranisition to the FULL state because the delta between
-  // Hit #13 and Hit #14 (2 bytes) is larger than the remaining unused area
-  // (1 byte).
+  // Add these 2 hits.
+  // - The PL is currently in the ALMOST_FULL state. Adding the first hit should
+  //   keep the PL in ALMOST_FULL because the delta between
+  //   Hit #13 and Hit #14 (1 byte) can fit in the unused area (3 bytes).
+  // - Adding the second hit should transition to the FULL state because the
+  //   delta between Hit #14 and Hit #15 (3 bytes) is larger than the remaining
+  //   unused area (2 byte).
   ICING_ASSERT_OK_AND_ASSIGN(
-      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-                         &pl_used, &hits_in[0], hits_in.size(), false)));
+      num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hits_in[0], hits_in.size(), /*keep_prepended=*/false)));
   EXPECT_THAT(num_could_fit, Eq(hits_in.size()));
-  EXPECT_THAT(size, Eq(serializer.GetBytesUsed(&pl_used)));
+  EXPECT_THAT(pl_size, Eq(serializer.GetBytesUsed(&pl_used)));
   // All hits from hits_in were added.
   std::transform(hits_in.rbegin(), hits_in.rend(),
                  std::front_inserter(hits_pushed), HitElt::get_hit);
@@ -431,9 +622,9 @@
 
   static constexpr int kNumHits = 128;
   static constexpr int kDeltaSize = 1;
-  static constexpr int kTermFrequencySize = 1;
   static constexpr size_t kHitsSize =
-      ((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
+      ((kNumHits - 2) * kDeltaSize + (2 * sizeof(Hit))) / sizeof(Hit) *
+      sizeof(Hit);
 
   // Create an array with one too many hits
   std::vector<Hit> hits_in_too_many =
@@ -442,18 +633,21 @@
   for (const Hit &hit : hits_in_too_many) {
     hit_elts_in_too_many.emplace_back(hit);
   }
+  // Reverse so that hits are inserted in descending order
+  std::reverse(hit_elts_in_too_many.begin(), hit_elts_in_too_many.end());
+
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
       PostingListUsed::CreateFromUnitializedRegion(
           &serializer, serializer.GetMinPostingListSize()));
-
   // PrependHitArray should fail because hit_elts_in_too_many is far too large
   // for the minimum size pl.
   ICING_ASSERT_OK_AND_ASSIGN(
       uint32_t num_could_fit,
       (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
           &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
-          false)));
+          /*keep_prepended=*/false)));
+  ASSERT_THAT(num_could_fit, Eq(2));
   ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
   ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
   ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
@@ -464,10 +658,11 @@
   // PrependHitArray should fail because hit_elts_in_too_many is one hit too
   // large for this pl.
   ICING_ASSERT_OK_AND_ASSIGN(
-      num_could_fit, (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
-                         &pl_used, &hit_elts_in_too_many[0],
-                         hit_elts_in_too_many.size(), false)));
-  ASSERT_THAT(num_could_fit, Lt(hit_elts_in_too_many.size()));
+      num_could_fit,
+      (serializer.PrependHitArray<HitElt, HitElt::get_hit>(
+          &pl_used, &hit_elts_in_too_many[0], hit_elts_in_too_many.size(),
+          /*keep_prepended=*/false)));
+  ASSERT_THAT(num_could_fit, Eq(hit_elts_in_too_many.size() - 1));
   ASSERT_THAT(serializer.GetBytesUsed(&pl_used), Eq(0));
   ASSERT_THAT(serializer.GetHits(&pl_used), IsOkAndHolds(IsEmpty()));
 }
@@ -476,16 +671,37 @@
      PostingListStatusJumpFromNotFullToFullAndBack) {
   PostingListHitSerializer serializer;
 
+  // Size = 18
   const uint32_t pl_size = 3 * sizeof(Hit);
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl,
       PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
-  ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue - 1, 0)));
+
+  Hit max_valued_hit(kMaxSectionId, kMinDocumentId, Hit::kMaxTermFrequency,
+                     /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl, max_valued_hit));
   uint32_t bytes_used = serializer.GetBytesUsed(&pl);
+  ASSERT_THAT(bytes_used, sizeof(Hit::Value) + sizeof(Hit::Flags) +
+                              sizeof(Hit::TermFrequency));
   // Status not full.
   ASSERT_THAT(bytes_used,
               Le(pl_size - PostingListHitSerializer::kSpecialHitsSize));
-  ICING_ASSERT_OK(serializer.PrependHit(&pl, Hit(Hit::kInvalidValue >> 2, 0)));
+
+  Hit min_valued_hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency,
+                     /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
+  uint8_t delta_buf[VarInt::kMaxEncodedLen64];
+  size_t delta_len = PostingListHitSerializer::EncodeNextHitValue(
+      /*next_hit_value=*/min_valued_hit.value(),
+      /*curr_hit_value=*/max_valued_hit.value(), delta_buf);
+  // The compressed region available is pl_size - 2 * sizeof(specialHits) = 6
+  // We need to also fit max_valued_hit's flags and term-frequency fields, which
+  // each take 1 byte
+  // So we'll jump directly to FULL if the varint-encoded delta of the 2 hits >
+  // 6 - sizeof(Hit::Flags) - sizeof(Hit::TermFrequency) = 4
+  ASSERT_THAT(delta_len, Gt(4));
+  ICING_ASSERT_OK(serializer.PrependHit(
+      &pl, Hit(kMinSectionId, kMaxDocumentId, Hit::kMaxTermFrequency,
+               /*is_in_prefix_section=*/true, /*is_prefix_hit=*/true)));
   // Status should jump to full directly.
   ASSERT_THAT(serializer.GetBytesUsed(&pl), Eq(pl_size));
   ICING_ASSERT_OK(serializer.PopFrontHits(&pl, 1));
@@ -501,11 +717,12 @@
       PostingListUsed pl,
       PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
 
+  static const Hit::Value kMaxHitValue = std::numeric_limits<Hit::Value>::max();
   static const Hit::Value kOverflow[4] = {
-      Hit::kInvalidValue >> 2,
-      (Hit::kInvalidValue >> 2) * 2,
-      (Hit::kInvalidValue >> 2) * 3,
-      Hit::kInvalidValue - 1,
+      kMaxHitValue >> 2,
+      (kMaxHitValue >> 2) * 2,
+      (kMaxHitValue >> 2) * 3,
+      kMaxHitValue - 1,
   };
 
   // Fit at least 4 ordinary values.
@@ -516,22 +733,245 @@
   // Cannot fit 4 overflow values.
   ICING_ASSERT_OK_AND_ASSIGN(
       pl, PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
-  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[3])));
-  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[2])));
+  Hit::Flags has_term_frequency_flags = 0b1;
+  ICING_EXPECT_OK(serializer.PrependHit(
+      &pl, Hit(/*value=*/kOverflow[3], has_term_frequency_flags,
+               /*term_frequency=*/8)));
+  ICING_EXPECT_OK(serializer.PrependHit(
+      &pl, Hit(/*value=*/kOverflow[2], has_term_frequency_flags,
+               /*term_frequency=*/8)));
 
   // Can fit only one more.
-  ICING_EXPECT_OK(serializer.PrependHit(&pl, Hit(kOverflow[1])));
-  EXPECT_THAT(serializer.PrependHit(&pl, Hit(kOverflow[0])),
+  ICING_EXPECT_OK(serializer.PrependHit(
+      &pl, Hit(/*value=*/kOverflow[1], has_term_frequency_flags,
+               /*term_frequency=*/8)));
+  EXPECT_THAT(serializer.PrependHit(
+                  &pl, Hit(/*value=*/kOverflow[0], has_term_frequency_flags,
+                           /*term_frequency=*/8)),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
 }
 
+TEST(PostingListHitSerializerTest, GetMinPostingListToFitForNotFullPL) {
+  PostingListHitSerializer serializer;
+
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+  // Create and add some hits to make pl_used NOT_FULL
+  std::vector<Hit> hits_in =
+      CreateHits(/*num_hits=*/7, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits_in) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
+  }
+  // ----------------------
+  // 23     delta(Hit #0)
+  // 22     delta(Hit #1)
+  // 21     delta(Hit #2)
+  // 20     delta(Hit #3)
+  // 19     delta(Hit #4)
+  // 18     delta(Hit #5)
+  // 17-14  Hit #6
+  // 13-12  <unused>
+  // 11-6   kSpecialHit
+  // 5-0    Offset=14
+  // ----------------------
+  int bytes_used = 10;
+
+  // Check that all hits have been inserted
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
+  std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  // Get the min size to fit for the hits in pl_used. Moving the hits in pl_used
+  // into a posting list with this min size should make it ALMOST_FULL, which we
+  // can see should have size = 18.
+  // ----------------------
+  // 17     delta(Hit #0)
+  // 16     delta(Hit #1)
+  // 15     delta(Hit #2)
+  // 14     delta(Hit #3)
+  // 13     delta(Hit #4)
+  // 12     delta(Hit #5)
+  // 11-6   Hit #6
+  // 5-0    kSpecialHit
+  // ----------------------
+  int expected_min_size = 18;
+  uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
+  EXPECT_THAT(min_size_to_fit, Eq(expected_min_size));
+
+  // Also check that this min size to fit posting list actually does fit all the
+  // hits and can only hit one more hit in the ALMOST_FULL state.
+  ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed min_size_to_fit_pl,
+                             PostingListUsed::CreateFromUnitializedRegion(
+                                 &serializer, min_size_to_fit));
+  for (const Hit &hit : hits_in) {
+    ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit));
+  }
+
+  // Adding another hit to the min-size-to-fit posting list should succeed
+  Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1);
+  ICING_ASSERT_OK(serializer.PrependHit(&min_size_to_fit_pl, hit));
+  // Adding any other hits should fail with RESOURCE_EXHAUSTED error.
+  EXPECT_THAT(serializer.PrependHit(&min_size_to_fit_pl,
+                                    CreateHit(hit, /*desired_byte_length=*/1)),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  // Check that all hits have been inserted and the min-fit posting list is now
+  // FULL.
+  EXPECT_THAT(serializer.GetBytesUsed(&min_size_to_fit_pl),
+              Eq(min_size_to_fit));
+  hits_pushed.emplace_front(hit);
+  EXPECT_THAT(serializer.GetHits(&min_size_to_fit_pl),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+}
+
+TEST(PostingListHitSerializerTest, GetMinPostingListToFitForTwoHits) {
+  PostingListHitSerializer serializer;
+
+  // Size = 36
+  int pl_size = 3 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+
+  // Create and add 2 hits
+  Hit first_hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/5,
+                /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  std::vector<Hit> hits_in =
+      CreateHits(first_hit, /*num_hits=*/2, /*desired_byte_length=*/4);
+  for (const Hit &hit : hits_in) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
+  }
+  // ----------------------
+  // 35     term-frequency(Hit #0)
+  // 34     flags(Hit #0)
+  // 33-30  delta(Hit #0)
+  // 29     term-frequency(Hit #1)
+  // 28     flags(Hit #1)
+  // 27-24  Hit #1
+  // 23-12  <unused>
+  // 11-6   kSpecialHit
+  // 5-0    Offset=24
+  // ----------------------
+  int bytes_used = 12;
+
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
+  std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  // GetMinPostingListSizeToFit should return min posting list size.
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
+              Eq(serializer.GetMinPostingListSize()));
+}
+
+TEST(PostingListHitSerializerTest, GetMinPostingListToFitForThreeSmallHits) {
+  PostingListHitSerializer serializer;
+
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+  // Create and add 3 small hits that fit in the size range where we should be
+  // checking for whether the PL has only 2 hits
+  std::vector<Hit> hits_in =
+      CreateHits(/*num_hits=*/3, /*desired_byte_length=*/1);
+  for (const Hit &hit : hits_in) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
+  }
+  // ----------------------
+  // 23     delta(Hit #0)
+  // 22     delta(Hit #1)
+  // 21-18  Hit #2
+  // 17-12  <unused>
+  // 11-6   kSpecialHit
+  // 5-0    Offset=18
+  // ----------------------
+  int bytes_used = 6;
+
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
+  std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  // Get the min size to fit for the hits in pl_used. Moving the hits in pl_used
+  // into a posting list with this min size should make it ALMOST_FULL, which we
+  // can see should have size = 14. This should not return the min posting list
+  // size.
+  // ----------------------
+  // 13     delta(Hit #0)
+  // 12     delta(Hit #1)
+  // 11-6   Hit #2
+  // 5-0    kSpecialHit
+  // ----------------------
+  int expected_min_size = 14;
+
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
+              Gt(serializer.GetMinPostingListSize()));
+  EXPECT_THAT(serializer.GetMinPostingListSizeToFit(&pl_used),
+              Eq(expected_min_size));
+}
+
+TEST(PostingListHitSerializerTest,
+     GetMinPostingListToFitForAlmostFullAndFullPLReturnsSameSize) {
+  PostingListHitSerializer serializer;
+
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      PostingListUsed pl_used,
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
+  // Create and add some hits to make pl_used ALMOST_FULL
+  std::vector<Hit> hits_in =
+      CreateHits(/*num_hits=*/7, /*desired_byte_length=*/2);
+  for (const Hit &hit : hits_in) {
+    ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
+  }
+  // ----------------------
+  // 23-22     delta(Hit #0)
+  // 21-20     delta(Hit #1)
+  // 19-18     delta(Hit #2)
+  // 17-16     delta(Hit #3)
+  // 15-14     delta(Hit #4)
+  // 13-12     delta(Hit #5)
+  // 11-6      Hit #6
+  // 5-0       kSpecialHit
+  // ----------------------
+  int bytes_used = 18;
+
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
+  std::deque<Hit> hits_pushed(hits_in.rbegin(), hits_in.rend());
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  // GetMinPostingListSizeToFit should return the same size as pl_used.
+  uint32_t min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
+  EXPECT_THAT(min_size_to_fit, Eq(pl_size));
+
+  // Add another hit to make the posting list FULL
+  Hit hit = CreateHit(hits_in.back(), /*desired_byte_length=*/1);
+  ICING_ASSERT_OK(serializer.PrependHit(&pl_used, hit));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(pl_size));
+  hits_pushed.emplace_front(hit);
+  EXPECT_THAT(serializer.GetHits(&pl_used),
+              IsOkAndHolds(ElementsAreArray(hits_pushed)));
+
+  // GetMinPostingListSizeToFit should still be the same size as pl_used.
+  min_size_to_fit = serializer.GetMinPostingListSizeToFit(&pl_used);
+  EXPECT_THAT(min_size_to_fit, Eq(pl_size));
+}
+
 TEST(PostingListHitSerializerTest, MoveFrom) {
   PostingListHitSerializer serializer;
 
-  int size = 3 * serializer.GetMinPostingListSize();
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used1,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits1 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
   for (const Hit &hit : hits1) {
@@ -540,7 +980,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used2,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits2 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
   for (const Hit &hit : hits2) {
@@ -556,10 +996,10 @@
 TEST(PostingListHitSerializerTest, MoveFromNullArgumentReturnsInvalidArgument) {
   PostingListHitSerializer serializer;
 
-  int size = 3 * serializer.GetMinPostingListSize();
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used1,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits = CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
   for (const Hit &hit : hits) {
     ICING_ASSERT_OK(serializer.PrependHit(&pl_used1, hit));
@@ -575,10 +1015,10 @@
      MoveFromInvalidPostingListReturnsInvalidArgument) {
   PostingListHitSerializer serializer;
 
-  int size = 3 * serializer.GetMinPostingListSize();
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used1,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits1 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
   for (const Hit &hit : hits1) {
@@ -587,7 +1027,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used2,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits2 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
   for (const Hit &hit : hits2) {
@@ -595,7 +1035,7 @@
   }
 
   // Write invalid hits to the beginning of pl_used1 to make it invalid.
-  Hit invalid_hit;
+  Hit invalid_hit(Hit::kInvalidValue);
   Hit *first_hit = reinterpret_cast<Hit *>(pl_used1.posting_list_buffer());
   *first_hit = invalid_hit;
   ++first_hit;
@@ -610,10 +1050,10 @@
      MoveToInvalidPostingListReturnsFailedPrecondition) {
   PostingListHitSerializer serializer;
 
-  int size = 3 * serializer.GetMinPostingListSize();
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used1,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits1 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
   for (const Hit &hit : hits1) {
@@ -622,7 +1062,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used2,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits2 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/2);
   for (const Hit &hit : hits2) {
@@ -630,7 +1070,7 @@
   }
 
   // Write invalid hits to the beginning of pl_used2 to make it invalid.
-  Hit invalid_hit;
+  Hit invalid_hit(Hit::kInvalidValue);
   Hit *first_hit = reinterpret_cast<Hit *>(pl_used2.posting_list_buffer());
   *first_hit = invalid_hit;
   ++first_hit;
@@ -644,10 +1084,10 @@
 TEST(PostingListHitSerializerTest, MoveToPostingListTooSmall) {
   PostingListHitSerializer serializer;
 
-  int size = 3 * serializer.GetMinPostingListSize();
+  int pl_size = 3 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used1,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
   std::vector<Hit> hits1 =
       CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
   for (const Hit &hit : hits1) {
@@ -672,30 +1112,36 @@
               IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
 }
 
-TEST(PostingListHitSerializerTest, PopHitsWithScores) {
+TEST(PostingListHitSerializerTest, PopHitsWithTermFrequenciesAndFlags) {
   PostingListHitSerializer serializer;
 
-  int size = 2 * serializer.GetMinPostingListSize();
+  // Size = 24
+  int pl_size = 2 * serializer.GetMinPostingListSize();
   ICING_ASSERT_OK_AND_ASSIGN(
       PostingListUsed pl_used,
-      PostingListUsed::CreateFromUnitializedRegion(&serializer, size));
+      PostingListUsed::CreateFromUnitializedRegion(&serializer, pl_size));
 
-  // This posting list is 20-bytes. Create four hits that will have deltas of
-  // two bytes each and all of whom will have a non-default score. This posting
-  // list will be almost_full.
+  // This posting list is 24-bytes. Create four hits that will have deltas of
+  // two bytes each and all of whom will have a non-default term-frequency. This
+  // posting list will be almost_full.
   //
   // ----------------------
-  // 19     score(Hit #0)
-  // 18-17  delta(Hit #0)
-  // 16     score(Hit #1)
-  // 15-14  delta(Hit #1)
-  // 13     score(Hit #2)
-  // 12-11  delta(Hit #2)
-  // 10     <unused>
-  // 9-5    Hit #3
-  // 4-0    kInvalidHitVal
+  // 23     term-frequency(Hit #0)
+  // 22     flags(Hit #0)
+  // 21-20  delta(Hit #0)
+  // 19     term-frequency(Hit #1)
+  // 18     flags(Hit #1)
+  // 17-16  delta(Hit #1)
+  // 15     term-frequency(Hit #2)
+  // 14     flags(Hit #2)
+  // 13-12  delta(Hit #2)
+  // 11-6   Hit #3
+  // 5-0    kInvalidHit
   // ----------------------
-  Hit hit0(/*section_id=*/0, /*document_id=*/0, /*score=*/5);
+  int bytes_used = 18;
+
+  Hit hit0(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/5,
+           /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
   Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
   Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
   Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2);
@@ -707,22 +1153,26 @@
   ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out,
                              serializer.GetHits(&pl_used));
   EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
 
   // Now, pop the last hit. The posting list should contain the first three
   // hits.
   //
   // ----------------------
-  // 19     score(Hit #0)
-  // 18-17  delta(Hit #0)
-  // 16     score(Hit #1)
-  // 15-14  delta(Hit #1)
-  // 13-10  <unused>
-  // 9-5    Hit #2
-  // 4-0    kInvalidHitVal
+  // 23     term-frequency(Hit #0)
+  // 22     flags(Hit #0)
+  // 21-20  delta(Hit #0)
+  // 19     term-frequency(Hit #1)
+  // 18     flags(Hit #1)
+  // 17-16  delta(Hit #1)
+  // 15-12  <unused>
+  // 11-6   Hit #2
+  // 5-0    kInvalidHit
   // ----------------------
   ICING_ASSERT_OK(serializer.PopFrontHits(&pl_used, 1));
   ICING_ASSERT_OK_AND_ASSIGN(hits_out, serializer.GetHits(&pl_used));
   EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0));
+  EXPECT_THAT(serializer.GetBytesUsed(&pl_used), Eq(bytes_used));
 }
 
 }  // namespace

diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 74e51cf..bc0917b 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc

@@ -172,8 +172,10 @@
       search_spec.enabled_features().end());
   for (const Feature feature : results.features_in_use) {
     if (enabled_features.find(feature) == enabled_features.end()) {
-      return absl_ports::InvalidArgumentError(
-          absl_ports::StrCat("Attempted use of unenabled feature ", feature));
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "Attempted use of unenabled feature ", feature,
+          ". Please make sure that you have explicitly set all advanced query "
+          "features used in this query as enabled in the SearchSpec."));
     }
   }
 

diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
index 7ad8a64..2fd3ac8 100644
--- a/icing/testing/hit-test-utils.cc
+++ b/icing/testing/hit-test-utils.cc

@@ -14,29 +14,44 @@
 
 #include "icing/testing/hit-test-utils.h"
 
+#include <cstdint>
+#include <vector>
+
+#include "icing/index/hit/hit.h"
+#include "icing/index/main/posting-list-hit-serializer.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
 namespace icing {
 namespace lib {
 
 // Returns a hit that has a delta of desired_byte_length from last_hit.
-Hit CreateHit(Hit last_hit, int desired_byte_length) {
-  Hit hit = (last_hit.section_id() == kMinSectionId)
-                ? Hit(kMaxSectionId, last_hit.document_id() + 1,
-                      last_hit.term_frequency())
-                : Hit(last_hit.section_id() - 1, last_hit.document_id(),
-                      last_hit.term_frequency());
+Hit CreateHit(const Hit& last_hit, int desired_byte_length) {
+  return CreateHit(last_hit, desired_byte_length, last_hit.term_frequency(),
+                   /*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
+}
+
+// Returns a hit that has a delta of desired_byte_length from last_hit, with
+// the desired term_frequency and flags
+Hit CreateHit(const Hit& last_hit, int desired_byte_length,
+              Hit::TermFrequency term_frequency, bool is_in_prefix_section,
+              bool is_prefix_hit) {
+  Hit hit = last_hit;
   uint8_t buf[5];
-  while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
-         desired_byte_length) {
+  do {
     hit = (hit.section_id() == kMinSectionId)
-              ? Hit(kMaxSectionId, hit.document_id() + 1, hit.term_frequency())
-              : Hit(hit.section_id() - 1, hit.document_id(),
-                    hit.term_frequency());
-  }
+              ? Hit(kMaxSectionId, hit.document_id() + 1, term_frequency,
+                    is_in_prefix_section, is_prefix_hit)
+              : Hit(hit.section_id() - 1, hit.document_id(), term_frequency,
+                    is_in_prefix_section, is_prefix_hit);
+  } while (PostingListHitSerializer::EncodeNextHitValue(
+               /*next_hit_value=*/hit.value(),
+               /*curr_hit_value=*/last_hit.value(), buf) < desired_byte_length);
   return hit;
 }
 
 // Returns a vector of num_hits Hits with the first hit starting at start_docid
-// and with 1-byte deltas.
+// and with deltas of the desired byte length.
 std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
                             int desired_byte_length) {
   std::vector<Hit> hits;
@@ -44,13 +59,30 @@
     return hits;
   }
   hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid,
-                     Hit::kDefaultTermFrequency));
+                     Hit::kDefaultTermFrequency, /*is_in_prefix_section=*/false,
+                     /*is_prefix_hit=*/false));
   while (hits.size() < num_hits) {
     hits.push_back(CreateHit(hits.back(), desired_byte_length));
   }
   return hits;
 }
 
+// Returns a vector of num_hits Hits with the first hit being the desired byte
+// length from last_hit, and with deltas of the same desired byte length.
+std::vector<Hit> CreateHits(const Hit& last_hit, int num_hits,
+                            int desired_byte_length) {
+  std::vector<Hit> hits;
+  if (num_hits < 1) {
+    return hits;
+  }
+  hits.reserve(num_hits);
+  for (int i = 0; i < num_hits; ++i) {
+    hits.push_back(
+        CreateHit(hits.empty() ? last_hit : hits.back(), desired_byte_length));
+  }
+  return hits;
+}
+
 std::vector<Hit> CreateHits(int num_hits, int desired_byte_length) {
   return CreateHits(/*start_docid=*/0, num_hits, desired_byte_length);
 }

diff --git a/icing/testing/hit-test-utils.h b/icing/testing/hit-test-utils.h
index e236ec0..2953c5c 100644
--- a/icing/testing/hit-test-utils.h
+++ b/icing/testing/hit-test-utils.h

@@ -18,21 +18,30 @@
 #include <vector>
 
 #include "icing/index/hit/hit.h"
-#include "icing/legacy/index/icing-bit-util.h"
-#include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 
 namespace icing {
 namespace lib {
 
 // Returns a hit that has a delta of desired_byte_length from last_hit.
-Hit CreateHit(Hit last_hit, int desired_byte_length);
+Hit CreateHit(const Hit& last_hit, int desired_byte_length);
+
+// Returns a hit that has a delta of desired_byte_length from last_hit, with
+// the desired term_frequency and flags
+Hit CreateHit(const Hit& last_hit, int desired_byte_length,
+              Hit::TermFrequency term_frequency, bool is_in_prefix_section,
+              bool is_prefix_hit);
 
 // Returns a vector of num_hits Hits with the first hit starting at start_docid
 // and with desired_byte_length deltas.
 std::vector<Hit> CreateHits(DocumentId start_docid, int num_hits,
                             int desired_byte_length);
 
+// Returns a vector of num_hits Hits with the first hit being the desired byte
+// length from last_hit, and with deltas of the same desired byte length.
+std::vector<Hit> CreateHits(const Hit& last_hit, int num_hits,
+                            int desired_byte_length);
+
 // Returns a vector of num_hits Hits with the first hit starting at 0 and each
 // with desired_byte_length deltas.
 std::vector<Hit> CreateHits(int num_hits, int desired_byte_length);

diff --git a/java/src/com/google/android/icing/IcingSearchEngineInterface.java b/java/src/com/google/android/icing/IcingSearchEngineInterface.java
index 0bc58f1..67f60ed 100644
--- a/java/src/com/google/android/icing/IcingSearchEngineInterface.java
+++ b/java/src/com/google/android/icing/IcingSearchEngineInterface.java

@@ -32,7 +32,7 @@
 import com.google.android.icing.proto.UsageReport;
 import java.io.Closeable;
 
-/** A common user-facing interface to expose the funcationalities provided by Icing Library. */
+/** A common user-facing interface to expose the functionalities provided by Icing Library. */
 public interface IcingSearchEngineInterface extends Closeable {
   /**
    * Initializes the current IcingSearchEngine implementation.

diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index c716dba..78e1588 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto

@@ -34,7 +34,7 @@
 // TODO(cassiewang) Define a sample proto file that can be used by tests and for
 // documentation.
 //
-// Next tag: 7
+// Next tag: 8
 message SchemaTypeConfigProto {
   // REQUIRED: Named type that uniquely identifies the structured, logical
   // schema being defined.
@@ -43,6 +43,13 @@
   // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
   optional string schema_type = 1;
 
+  // OPTIONAL: A natural language description of the SchemaTypeConfigProto.
+  //
+  // This string is not used by Icing in any way. It simply exists to allow
+  // users to store semantic information about the SchemaTypeConfigProto for
+  // future retrieval.
+  optional string description = 7;
+
   // List of all properties that are supported by Documents of this type.
   // An Document should never have properties that are not listed here.
   //
@@ -208,7 +215,7 @@
 // Describes the schema of a single property of Documents that belong to a
 // specific SchemaTypeConfigProto. These can be considered as a rich, structured
 // type for each property of Documents accepted by IcingSearchEngine.
-// Next tag: 9
+// Next tag: 10
 message PropertyConfigProto {
   // REQUIRED: Name that uniquely identifies a property within an Document of
   // a specific SchemaTypeConfigProto.
@@ -219,6 +226,13 @@
   // Eg: 'address' for http://schema.org/Place.
   optional string property_name = 1;
 
+  // OPTIONAL: A natural language description of the property.
+  //
+  // This string is not used by Icing in any way. It simply exists to allow
+  // users to store semantic information about the PropertyConfigProto for
+  // future retrieval.
+  optional string description = 9;
+
   // REQUIRED: Physical data-types of the contents of the property.
   message DataType {
     enum Code {

diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index a36a007..c22a1e8 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt

@@ -1 +1 @@
-set(synced_AOSP_CL_number=596096845)
+set(synced_AOSP_CL_number=613977851)
commit	29d4712b67d1ade154739e5fa9a9a7970afe6c0a	[log] [tgz]
author	Tim Barron <tjbarron@google.com>	Thu Mar 07 00:42:57 2024 +0000
committer	Tim Barron <tjbarron@google.com>	Tue Mar 12 15:41:59 2024 +0000
tree	0a26da402c204c50e001539cffc89abbe51b08b6
parent	030b99d2648aea03e5c2e3809a13fdfacdccde11 [diff]