Snap for 7574892 from d462d0aedaec65748ad2a8cb5b3f5cb352ab2c20 to sc-d1-release

Change-Id: I639f4d07decb2aacf21743aa5b23d527641fe301
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 2443cb2..0989935 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -56,6 +56,7 @@
 #ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
 #define ICING_FILE_FILE_BACKED_VECTOR_H_
 
+#include <inttypes.h>
 #include <stdint.h>
 #include <sys/mman.h>
 
@@ -423,13 +424,6 @@
         absl_ports::StrCat("Invalid header kMagic for ", file_path));
   }
 
-  // Mmap the content of the vector, excluding the header so its easier to
-  // access elements from the mmapped region
-  auto mmapped_file =
-      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
-  ICING_RETURN_IF_ERROR(
-      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
   // Check header
   if (header->header_checksum != header->CalculateHeaderChecksum()) {
     return absl_ports::FailedPreconditionError(
@@ -442,6 +436,20 @@
         header->element_size));
   }
 
+  int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
+  if (min_file_size > file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Inconsistent file size, expected %" PRId64 ", actual %" PRId64,
+        min_file_size, file_size));
+  }
+
+  // Mmap the content of the vector, excluding the header so its easier to
+  // access elements from the mmapped region
+  auto mmapped_file =
+      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+  ICING_RETURN_IF_ERROR(
+      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
   // Check vector contents
   Crc32 vector_checksum;
   std::string_view vector_contents(
@@ -591,9 +599,24 @@
   least_file_size_needed = math_util::RoundUpTo(
       least_file_size_needed,
       int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
-  if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
-    return absl_ports::InternalError(
-        absl_ports::StrCat("Couldn't grow file ", file_path_));
+
+  // We use PWrite here rather than Grow because Grow doesn't actually allocate
+  // an underlying disk block. This can lead to problems with mmap because mmap
+  // has no effective way to signal that it was impossible to allocate the disk
+  // block and ends up crashing instead. PWrite will force the allocation of
+  // these blocks, which will ensure that any failure to grow will surface here.
+  int64_t page_size = getpagesize();
+  auto buf = std::make_unique<uint8_t[]>(page_size);
+  int64_t size_to_write = page_size - (current_file_size % page_size);
+  ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
+  while (current_file_size < least_file_size_needed) {
+    if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
+                             size_to_write)) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Couldn't grow file ", file_path_));
+    }
+    current_file_size += size_to_write;
+    size_to_write = page_size - (current_file_size % page_size);
   }
 
   ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index bc2fef6..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -32,6 +32,7 @@
 #include "icing/util/logging.h"
 
 using ::testing::Eq;
+using ::testing::IsTrue;
 using ::testing::Pointee;
 
 namespace icing {
@@ -278,7 +279,6 @@
           filesystem_, file_path_,
           MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
   EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
   EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
               StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
   EXPECT_THAT(vector->Set(-1, 'a'),
@@ -318,25 +318,32 @@
           filesystem_, file_path_,
           MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
 
-  // Our initial file size should just be the size of the header
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(sizeof(FileBackedVector<char>::Header)));
+  // Our initial file size should just be the size of the header. Disk usage
+  // will indicate that one block has been allocated, which contains the header.
+  int header_size = sizeof(FileBackedVector<char>::Header);
+  int page_size = getpagesize();
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
 
-  // Once we add something though, we'll grow to kGrowElements big
+  // Once we add something though, we'll grow to be kGrowElements big. From this
+  // point on, file size and disk usage should be the same because Growing will
+  // explicitly allocate the number of blocks needed to accomodate the file.
   Insert(vector.get(), 0, "a");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  int file_size = kGrowElements * sizeof(int);
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Should still be the same size, don't need to grow underlying file
   Insert(vector.get(), 1, "b");
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * sizeof(int)));
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Now we grow by a kGrowElements chunk, so the underlying file is 2
   // kGrowElements big
+  file_size *= 2;
   Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
-  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
-              Eq(kGrowElements * 2 * sizeof(int)));
+  EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+  EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
 
   // Destroy/persist the contents.
   vector.reset();
@@ -463,6 +470,174 @@
   }
 }
 
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Shrink the file to be smaller than the header.
+  filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+  {
+    // 3. Attempt to create the file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a different element size and confirm
+    // that it fails.
+    EXPECT_THAT(FileBackedVector<int>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header, but don't update the checksum. This would be similar
+  // to corruption of the header.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  header.num_elements = 1;
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with a header that doesn't match its
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Modify the header so that the number of elements exceeds the actual size
+  // of the underlying file.
+  FileBackedVector<char>::Header header;
+  ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+              IsTrue());
+  int64_t file_size = filesystem_.GetFileSize(fd_);
+  int64_t allocated_elements_size = file_size - sizeof(header);
+  header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+  header.header_checksum = header.CalculateHeaderChecksum();
+  ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+              IsTrue());
+
+  {
+    // 3. Attempt to create the file with num_elements that is larger than the
+    // underlying file and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  // 2. Overwrite the values of the first two elements.
+  std::string corrupted_content = "BY";
+  ASSERT_THAT(
+      filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+                         corrupted_content.c_str(), corrupted_content.length()),
+      IsTrue());
+
+  {
+    // 3. Attempt to create the file with elements that don't match their
+    // checksum and confirm that it fails.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+  {
+    // 1. Create a vector with a few elements.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<FileBackedVector<char>> vector,
+        FileBackedVector<char>::Create(
+            filesystem_, file_path_,
+            MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+    Insert(vector.get(), 0, "A");
+    Insert(vector.get(), 1, "Z");
+    ASSERT_THAT(vector->PersistToDisk(), IsOk());
+  }
+
+  {
+    // 2. Attempt to create the file with a completely valid header and elements
+    // region. This should succeed.
+    EXPECT_THAT(FileBackedVector<char>::Create(
+                    filesystem_, file_path_,
+                    MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+                IsOk());
+  }
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 81edce1..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -1577,6 +1577,7 @@
   int size = document_id_mapper_->num_elements();
   int num_deleted = 0;
   int num_expired = 0;
+  UsageStore::UsageScores default_usage;
   for (DocumentId document_id = 0; document_id < size; document_id++) {
     auto document_or = Get(document_id, /*clear_internal_fields=*/false);
     if (absl_ports::IsNotFound(document_or.status())) {
@@ -1625,10 +1626,14 @@
     // Copy over usage scores.
     ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
                            usage_store_->GetUsageScores(document_id));
-
-    DocumentId new_document_id = new_document_id_or.ValueOrDie();
-    ICING_RETURN_IF_ERROR(
-        new_doc_store->SetUsageScores(new_document_id, usage_scores));
+    if (!(usage_scores == default_usage)) {
+      // If the usage scores for this document are the default (no usage), then
+      // don't bother setting it. No need to possibly allocate storage if
+      // there's nothing interesting to store.
+      DocumentId new_document_id = new_document_id_or.ValueOrDie();
+      ICING_RETURN_IF_ERROR(
+          new_doc_store->SetUsageScores(new_document_id, usage_scores));
+    }
   }
   if (stats != nullptr) {
     stats->set_num_original_documents(size);