Merge "New SplitPatchWriter to split a bsdiff patch into multiple ones."
diff --git a/Android.bp b/Android.bp
index 711efde..9907383 100644
--- a/Android.bp
+++ b/Android.bp
@@ -59,6 +59,7 @@
         "bz2_compressor.cc",
         "diff_encoder.cc",
         "patch_writer.cc",
+        "split_patch_writer.cc",
     ],
     static_libs: [
         "libdivsufsort64",
@@ -104,6 +105,7 @@
         "extents_file_unittest.cc",
         "extents_unittest.cc",
         "patch_writer_unittest.cc",
+        "split_patch_writer_unittest.cc",
         "test_utils.cc",
         "testrunner.cc",
     ],
diff --git a/Makefile b/Makefile
index b9c3b66..0a881a7 100644
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,8 @@
     bsdiff.cc \
     bz2_compressor.cc \
     diff_encoder.cc \
-    patch_writer.cc
+    patch_writer.cc \
+    split_patch_writer.cc
 
 # "bspatch" program.
 bspatch_src_files := \
@@ -56,6 +57,7 @@
     extents_file_unittest.cc \
     extents_unittest.cc \
     patch_writer_unittest.cc \
+    split_patch_writer_unittest.cc \
     test_utils.cc \
     testrunner.cc
 
diff --git a/bsdiff.gyp b/bsdiff.gyp
index dcb7bde..87f0c2b 100644
--- a/bsdiff.gyp
+++ b/bsdiff.gyp
@@ -56,6 +56,7 @@
         'bz2_compressor.cc',
         'diff_encoder.cc',
         'patch_writer.cc',
+        'split_patch_writer.cc',
       ],
     },
     # bsdiff executable
@@ -131,6 +132,7 @@
             'extents_file_unittest.cc',
             'extents_unittest.cc',
             'patch_writer_unittest.cc',
+            'split_patch_writer_unittest.cc',
             'test_utils.cc',
           ],
         },
diff --git a/include/bsdiff/patch_writer_interface.h b/include/bsdiff/patch_writer_interface.h
index 6ae4656..04f1675 100644
--- a/include/bsdiff/patch_writer_interface.h
+++ b/include/bsdiff/patch_writer_interface.h
@@ -27,6 +27,11 @@
 
   // The value to add to the source pointer after patching from the diff stream.
   int64_t offset_increment;
+
+  bool operator==(const ControlEntry& o) const {
+    return diff_size == o.diff_size && extra_size == o.extra_size &&
+           offset_increment == o.offset_increment;
+  }
 };
 
 class PatchWriterInterface {
diff --git a/split_patch_writer.cc b/split_patch_writer.cc
new file mode 100644
index 0000000..4123440
--- /dev/null
+++ b/split_patch_writer.cc
@@ -0,0 +1,171 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "bsdiff/split_patch_writer.h"
+
+#include <algorithm>
+
+#include "bsdiff/logging.h"
+
+using std::endl;
+
+namespace bsdiff {
+
+bool SplitPatchWriter::Init() {
+  // Fail gracefully if re-initialized.
+  if (current_patch_ || patches_.empty())
+    return false;
+
+  return patches_[0]->Init();
+}
+
+bool SplitPatchWriter::WriteDiffStream(const uint8_t* data, size_t size) {
+  return WriteToStream(&PatchWriterInterface::WriteDiffStream, &diff_sizes_,
+                       data, size);
+}
+
+bool SplitPatchWriter::WriteExtraStream(const uint8_t* data, size_t size) {
+  return WriteToStream(&PatchWriterInterface::WriteExtraStream, &extra_sizes_,
+                       data, size);
+}
+
+bool SplitPatchWriter::AddControlEntry(const ControlEntry& entry) {
+  ControlEntry remaining(entry);
+  while (written_output_ + remaining.diff_size + remaining.extra_size >=
+         (current_patch_ + 1) * new_chunk_size_) {
+    // We need to write some of the current ControlEntry to the current patch
+    // and move on to the next patch if there are more bytes to write.
+    uint64_t remaining_bytes =
+        (current_patch_ + 1) * new_chunk_size_ - written_output_;
+    // The offset_increment is always 0 in this case since we don't plan to read
+    // for the old file in the current_patch anymore.
+    ControlEntry current_patch_entry(0, 0, 0);
+
+    current_patch_entry.diff_size =
+        std::min(remaining.diff_size, remaining_bytes);
+    remaining_bytes -= current_patch_entry.diff_size;
+    remaining.diff_size -= current_patch_entry.diff_size;
+
+    // This will be positive only if we used all the diff_size bytes.
+    current_patch_entry.extra_size =
+        std::min(remaining.extra_size, remaining_bytes);
+    remaining_bytes -= current_patch_entry.extra_size;
+    remaining.extra_size -= current_patch_entry.extra_size;
+
+    AddControlEntryToCurrentPatch(current_patch_entry);
+
+    if (remaining.diff_size + remaining.extra_size > 0) {
+      current_patch_++;
+      if (current_patch_ >= patches_.size()) {
+        LOG(ERROR) << "Writing past the last patch" << endl;
+        return false;
+      }
+      if (!patches_[current_patch_]->Init()) {
+        LOG(ERROR) << "Failed to initialize patch " << current_patch_ << endl;
+        return false;
+      }
+      if (!remaining.diff_size) {
+        // When no diff need to be sent to the output, we can just push the
+        // existing old_pos_ as part of the current triplet, since the extra
+        // stream doesn't use the old_pos_;
+        remaining.offset_increment += old_pos_;
+        old_pos_ = 0;
+      }
+      // Need to add a dummy control entry at the beginning of the patch to
+      // offset the old_pos in the new patch, which would start at 0.
+      if (old_pos_ != 0) {
+        if (!patches_[current_patch_]->AddControlEntry(
+                ControlEntry(0, 0, old_pos_)))
+          return false;
+      }
+    } else {
+      // There was no need to write more bytes past the current patch, so just
+      // update the old_pos_ we are tracking for the next patch, if any.
+      old_pos_ += remaining.offset_increment;
+      return true;
+    }
+  }
+
+  // Trivial entries will be ignored.
+  return AddControlEntryToCurrentPatch(remaining);
+}
+
+bool SplitPatchWriter::Close() {
+  uint64_t missing_bytes = 0;
+  for (auto size : diff_sizes_)
+    missing_bytes += size;
+  for (auto size : extra_sizes_)
+    missing_bytes += size;
+  if (missing_bytes > 0) {
+    LOG(ERROR) << "Close() called but there are " << missing_bytes
+               << " bytes missing from Write*Stream() calls" << endl;
+    return false;
+  }
+
+  // |current_patch_| holds the last patch that was Init()'ed. If there are more
+  // patches in the list those have not been initialized/closed, which is a
+  // programming error.
+  if (current_patch_ + 1 != patches_.size()) {
+    LOG(ERROR) << "Close() called but no bytes habe been written to the last "
+                  "patch"
+               << endl;
+    return false;
+  }
+
+  // Close all the remaining streams.
+  for (; closed_patches_ < patches_.size(); closed_patches_++) {
+    if (!patches_[closed_patches_]->Close())
+      return false;
+  }
+  return true;
+}
+
+bool SplitPatchWriter::AddControlEntryToCurrentPatch(
+    const ControlEntry& entry) {
+  // Ignore trivial control entries that don't modify the state.
+  if (!entry.diff_size && !entry.extra_size && !entry.offset_increment)
+    return true;
+
+  if (current_patch_ >= patches_.size()) {
+    LOG(ERROR) << "Writing past the last patch" << endl;
+    return false;
+  }
+  old_pos_ += entry.diff_size + entry.offset_increment;
+  written_output_ += entry.diff_size + entry.extra_size;
+  // Register the diff/extra sizes as required bytes for the current patch.
+  diff_sizes_[current_patch_] += entry.diff_size;
+  extra_sizes_[current_patch_] += entry.extra_size;
+  return patches_[current_patch_]->AddControlEntry(entry);
+}
+
+bool SplitPatchWriter::WriteToStream(WriteStreamMethod method,
+                                     std::vector<size_t>* sizes_vector,
+                                     const uint8_t* data,
+                                     size_t size) {
+  size_t written = 0;
+  for (size_t i = closed_patches_; i <= current_patch_ && written < size; i++) {
+    if ((*sizes_vector)[i]) {
+      size_t flush_size = std::min(size - written, (*sizes_vector)[i]);
+      if (!(patches_[i]->*method)(data + written, flush_size))
+        return false;
+      written += flush_size;
+      (*sizes_vector)[i] -= flush_size;
+    }
+
+    if (i < current_patch_ && !diff_sizes_[i] && !extra_sizes_[i]) {
+      // All bytes expected for the patch i are already sent.
+      if (!patches_[i]->Close())
+        return false;
+      closed_patches_++;
+    }
+  }
+  if (written < size) {
+    LOG(ERROR) << "Calling Write*Stream() before the corresponding "
+                  "AddControlEntry() is not supported.";
+    return false;
+  }
+  return true;
+}
+
+}  // namespace bsdiff
diff --git a/split_patch_writer.h b/split_patch_writer.h
new file mode 100644
index 0000000..81c913e
--- /dev/null
+++ b/split_patch_writer.h
@@ -0,0 +1,82 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _BSDIFF_SPLIT_PATCH_WRITER_H_
+#define _BSDIFF_SPLIT_PATCH_WRITER_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "bsdiff/patch_writer_interface.h"
+
+namespace bsdiff {
+
+// A PatchWriterInterface class that splits the output patch into multiple
+// patches of a fixed new-file size. The size of each patch data will depend on
+// the contents of the new file data, and won't necessarily be uniform.
+class SplitPatchWriter : public PatchWriterInterface {
+ public:
+  // Create a PatchWriter using that will split in several patches where each
+  // one will write |new_chunk_size| bytes of new file data. Each patch will
+  // use the old file as a whole input file.
+  SplitPatchWriter(uint64_t new_chunk_size,
+                   const std::vector<PatchWriterInterface*>& patches)
+      : new_chunk_size_(new_chunk_size), patches_(patches) {
+    diff_sizes_.resize(patches.size());
+    extra_sizes_.resize(patches.size());
+  }
+
+  // PatchWriterInterface overrides.
+  // Note: Calling WriteDiffStream/WriteExtraStream before calling the
+  // corresponding AddControlEntry() is not supported and will fail. The reason
+  // for this is because which underlying patch takes the bytes depends on the
+  // control entries.
+  bool Init() override;
+  bool WriteDiffStream(const uint8_t* data, size_t size) override;
+  bool WriteExtraStream(const uint8_t* data, size_t size) override;
+  bool AddControlEntry(const ControlEntry& entry) override;
+  bool Close() override;
+
+ private:
+  // Add a ControlEntry to the |current_patch_| without splitting it. Updates
+  // the internal structures of used data.
+  bool AddControlEntryToCurrentPatch(const ControlEntry& entry);
+
+  using WriteStreamMethod = bool (PatchWriterInterface::*)(const uint8_t* data,
+                                                           size_t size);
+
+  // Write to the diff or extra stream as determined by |method|.
+  bool WriteToStream(WriteStreamMethod method,
+                     std::vector<size_t>* sizes_vector,
+                     const uint8_t* data,
+                     size_t size);
+
+  // The size of each chunk of the new file written to.
+  uint64_t new_chunk_size_;
+  std::vector<PatchWriterInterface*> patches_;
+
+  // The size of the diff and extra streams that should go in each patch and has
+  // been written so far.
+  std::vector<size_t> diff_sizes_;
+  std::vector<size_t> extra_sizes_;
+
+  // The current patch number in the |patches_| array we are writing to.
+  size_t current_patch_{0};
+
+  // The number of patches we already called Close() on. The patches are always
+  // closed in order.
+  size_t closed_patches_{0};
+
+  // Bytes of the new files already written. Needed to store the new length in
+  // the header of the file.
+  uint64_t written_output_{0};
+
+  // The current pointer into the old stream.
+  uint64_t old_pos_{0};
+};
+
+}  // namespace bsdiff
+
+#endif  // _BSDIFF_SPLIT_PATCH_WRITER_H_
diff --git a/split_patch_writer_unittest.cc b/split_patch_writer_unittest.cc
new file mode 100644
index 0000000..1170db9
--- /dev/null
+++ b/split_patch_writer_unittest.cc
@@ -0,0 +1,156 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "bsdiff/split_patch_writer.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "bsdiff/fake_patch_writer.h"
+#include "bsdiff/test_utils.h"
+
+namespace bsdiff {
+
+class SplitPatchWriterTest : public testing::Test {
+ protected:
+  void SetUpForSize(size_t num_chunks, uint64_t new_chunk_size) {
+    fake_patches_.resize(num_chunks);
+    std::vector<PatchWriterInterface*> patches;
+    for (auto& fake_patch : fake_patches_)
+      patches.push_back(&fake_patch);
+
+    patch_writer_.reset(new SplitPatchWriter(new_chunk_size, patches));
+    EXPECT_TRUE(patch_writer_->Init());
+  }
+
+  std::vector<FakePatchWriter> fake_patches_;
+  std::unique_ptr<SplitPatchWriter> patch_writer_;
+};
+
+// A single empty patch is allowed.
+TEST_F(SplitPatchWriterTest, NonSplitEmptyPatchTest) {
+  SetUpForSize(1, 100);
+  EXPECT_TRUE(patch_writer_->Close());
+
+  EXPECT_TRUE(fake_patches_[0].entries().empty());
+}
+
+// Leaving patches at the end that are empty is considered an error.
+TEST_F(SplitPatchWriterTest, NotAllPatchesWrittenErrorTest) {
+  SetUpForSize(2, 100);
+  // We write less than the amount needed for two patches, which should fail.
+  EXPECT_FALSE(patch_writer_->Close());
+}
+
+TEST_F(SplitPatchWriterTest, MissingDiffBytesErrorTest) {
+  SetUpForSize(2, 10);
+
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 5, 0)));
+  std::vector<uint8_t> zeros(20, 0);
+  // We write 12 diff bytes instead of the expected 15. This should fail on
+  // Close().
+  EXPECT_TRUE(patch_writer_->WriteDiffStream(zeros.data(), 12));
+  EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 5));
+  EXPECT_FALSE(patch_writer_->Close());
+}
+
+TEST_F(SplitPatchWriterTest, MissingExtraBytesErrorTest) {
+  SetUpForSize(2, 10);
+
+  std::vector<uint8_t> zeros(20, 0);
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 15, -5)));
+  EXPECT_TRUE(patch_writer_->WriteDiffStream(zeros.data(), 5));
+  // We write a little less than the expected 15. This operation should succeed,
+  // since we could write the rest later.
+  EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 10));
+  EXPECT_FALSE(patch_writer_->Close());
+}
+
+// Test all sort of corner cases when splitting the ControlEntry across multiple
+// patches
+TEST_F(SplitPatchWriterTest, SplitControlAcrossSeveralPatchesTest) {
+  SetUpForSize(4, 10);
+  // The middle control entry would be split in tree different patches.
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 1, -5)));
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(4, 0, -4)));
+  // old_pos at this point is 0. This is the end of the first patch.
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(6, 0, -1)));
+  // old_pos at this point is 5.
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(1, 18, 2)));
+  // old_pos at this point is 8.
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(1, 0, 1)));
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(4, 0, -5)));
+
+  std::vector<uint8_t> zeros(40, 0);
+  EXPECT_TRUE(
+      patch_writer_->WriteDiffStream(zeros.data(), 5 + 4 + 6 + 1 + 1 + 4));
+  EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 1 + 18));
+  EXPECT_TRUE(patch_writer_->Close());
+
+  EXPECT_EQ((std::vector<ControlEntry>{
+                ControlEntry(5, 1, -5),
+                ControlEntry(4, 0, 0),  // (4, 0, -4) but the -4 is not needed.
+            }),
+            fake_patches_[0].entries());
+  EXPECT_EQ((std::vector<ControlEntry>{
+                // No need for dummy entry because the old_pos is already at 0.
+                ControlEntry(6, 0, -1),
+                ControlEntry(1, 3, 0),  // the first part of (1, 18, 2)
+            }),
+            fake_patches_[1].entries());
+  EXPECT_EQ((std::vector<ControlEntry>{
+                // No need for dummy entry because the first entry is all in
+                // the extra stream and this is the last entry.
+                ControlEntry(0, 10, 0),  // the middle part of (1, 18, 2)
+            }),
+            fake_patches_[2].entries());
+  EXPECT_EQ((std::vector<ControlEntry>{
+                // No need for dummy entry because the first entry is all in
+                // the extra stream, so use that.
+                ControlEntry(0, 5, 8),  // the last part of (1, 18, 2), plus the
+                                        // old_pos 5. 8 = 1 + 2 + 5.
+                ControlEntry(1, 0, 1),  // (1, 0, 1) copied
+                ControlEntry(4, 0, 0),  // (4, 0, -5) ignoring the offset.
+            }),
+            fake_patches_[3].entries());
+}
+
+TEST_F(SplitPatchWriterTest, WriteStreamsAfterControlAcrossPatchesTest) {
+  std::vector<uint8_t> numbers(40);
+  for (size_t i = 0; i < numbers.size(); ++i)
+    numbers[i] = 'A' + i;
+
+  SetUpForSize(4, 10);
+  // The sequence is 15 diff, 10 extra, 15 diff.
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 10, 0)));
+  EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 0, 0)));
+  // Numbers [0, 30) for the diff stream, and [30, 40) for the extra stream.
+  EXPECT_TRUE(patch_writer_->WriteDiffStream(numbers.data(), 30));
+  EXPECT_TRUE(patch_writer_->WriteExtraStream(numbers.data() + 30, 10));
+  EXPECT_TRUE(patch_writer_->Close());
+
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin(), numbers.begin() + 10),
+            fake_patches_[0].diff_stream());
+  EXPECT_TRUE(fake_patches_[0].extra_stream().empty());
+
+  // 5 diff, then 5 extra.
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 10, numbers.begin() + 15),
+            fake_patches_[1].diff_stream());
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 30, numbers.begin() + 35),
+            fake_patches_[1].extra_stream());
+
+  // 5 extra, then 5 diff.
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 15, numbers.begin() + 20),
+            fake_patches_[2].diff_stream());
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 35, numbers.begin() + 40),
+            fake_patches_[2].extra_stream());
+
+  EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 20, numbers.begin() + 30),
+            fake_patches_[3].diff_stream());
+  EXPECT_TRUE(fake_patches_[3].extra_stream().empty());
+}
+
+}  // namespace bsdiff