Merge "New SplitPatchWriter to split a bsdiff patch into multiple ones."
diff --git a/Android.bp b/Android.bp
index 711efde..9907383 100644
--- a/Android.bp
+++ b/Android.bp
@@ -59,6 +59,7 @@
"bz2_compressor.cc",
"diff_encoder.cc",
"patch_writer.cc",
+ "split_patch_writer.cc",
],
static_libs: [
"libdivsufsort64",
@@ -104,6 +105,7 @@
"extents_file_unittest.cc",
"extents_unittest.cc",
"patch_writer_unittest.cc",
+ "split_patch_writer_unittest.cc",
"test_utils.cc",
"testrunner.cc",
],
diff --git a/Makefile b/Makefile
index b9c3b66..0a881a7 100644
--- a/Makefile
+++ b/Makefile
@@ -36,7 +36,8 @@
bsdiff.cc \
bz2_compressor.cc \
diff_encoder.cc \
- patch_writer.cc
+ patch_writer.cc \
+ split_patch_writer.cc
# "bspatch" program.
bspatch_src_files := \
@@ -56,6 +57,7 @@
extents_file_unittest.cc \
extents_unittest.cc \
patch_writer_unittest.cc \
+ split_patch_writer_unittest.cc \
test_utils.cc \
testrunner.cc
diff --git a/bsdiff.gyp b/bsdiff.gyp
index dcb7bde..87f0c2b 100644
--- a/bsdiff.gyp
+++ b/bsdiff.gyp
@@ -56,6 +56,7 @@
'bz2_compressor.cc',
'diff_encoder.cc',
'patch_writer.cc',
+ 'split_patch_writer.cc',
],
},
# bsdiff executable
@@ -131,6 +132,7 @@
'extents_file_unittest.cc',
'extents_unittest.cc',
'patch_writer_unittest.cc',
+ 'split_patch_writer_unittest.cc',
'test_utils.cc',
],
},
diff --git a/include/bsdiff/patch_writer_interface.h b/include/bsdiff/patch_writer_interface.h
index 6ae4656..04f1675 100644
--- a/include/bsdiff/patch_writer_interface.h
+++ b/include/bsdiff/patch_writer_interface.h
@@ -27,6 +27,11 @@
// The value to add to the source pointer after patching from the diff stream.
int64_t offset_increment;
+
+ bool operator==(const ControlEntry& o) const {
+ return diff_size == o.diff_size && extra_size == o.extra_size &&
+ offset_increment == o.offset_increment;
+ }
};
class PatchWriterInterface {
diff --git a/split_patch_writer.cc b/split_patch_writer.cc
new file mode 100644
index 0000000..4123440
--- /dev/null
+++ b/split_patch_writer.cc
@@ -0,0 +1,171 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "bsdiff/split_patch_writer.h"
+
+#include <algorithm>
+
+#include "bsdiff/logging.h"
+
+using std::endl;
+
+namespace bsdiff {
+
+bool SplitPatchWriter::Init() {
+ // Fail gracefully if re-initialized.
+ if (current_patch_ || patches_.empty())
+ return false;
+
+ return patches_[0]->Init();
+}
+
+bool SplitPatchWriter::WriteDiffStream(const uint8_t* data, size_t size) {
+ return WriteToStream(&PatchWriterInterface::WriteDiffStream, &diff_sizes_,
+ data, size);
+}
+
+bool SplitPatchWriter::WriteExtraStream(const uint8_t* data, size_t size) {
+ return WriteToStream(&PatchWriterInterface::WriteExtraStream, &extra_sizes_,
+ data, size);
+}
+
+bool SplitPatchWriter::AddControlEntry(const ControlEntry& entry) {
+ ControlEntry remaining(entry);
+ while (written_output_ + remaining.diff_size + remaining.extra_size >=
+ (current_patch_ + 1) * new_chunk_size_) {
+ // We need to write some of the current ControlEntry to the current patch
+ // and move on to the next patch if there are more bytes to write.
+ uint64_t remaining_bytes =
+ (current_patch_ + 1) * new_chunk_size_ - written_output_;
+ // The offset_increment is always 0 in this case since we don't plan to read
+ // for the old file in the current_patch anymore.
+ ControlEntry current_patch_entry(0, 0, 0);
+
+ current_patch_entry.diff_size =
+ std::min(remaining.diff_size, remaining_bytes);
+ remaining_bytes -= current_patch_entry.diff_size;
+ remaining.diff_size -= current_patch_entry.diff_size;
+
+ // This will be positive only if we used all the diff_size bytes.
+ current_patch_entry.extra_size =
+ std::min(remaining.extra_size, remaining_bytes);
+ remaining_bytes -= current_patch_entry.extra_size;
+ remaining.extra_size -= current_patch_entry.extra_size;
+
+ AddControlEntryToCurrentPatch(current_patch_entry);
+
+ if (remaining.diff_size + remaining.extra_size > 0) {
+ current_patch_++;
+ if (current_patch_ >= patches_.size()) {
+ LOG(ERROR) << "Writing past the last patch" << endl;
+ return false;
+ }
+ if (!patches_[current_patch_]->Init()) {
+ LOG(ERROR) << "Failed to initialize patch " << current_patch_ << endl;
+ return false;
+ }
+ if (!remaining.diff_size) {
+ // When no diff need to be sent to the output, we can just push the
+ // existing old_pos_ as part of the current triplet, since the extra
+ // stream doesn't use the old_pos_;
+ remaining.offset_increment += old_pos_;
+ old_pos_ = 0;
+ }
+ // Need to add a dummy control entry at the beginning of the patch to
+ // offset the old_pos in the new patch, which would start at 0.
+ if (old_pos_ != 0) {
+ if (!patches_[current_patch_]->AddControlEntry(
+ ControlEntry(0, 0, old_pos_)))
+ return false;
+ }
+ } else {
+ // There was no need to write more bytes past the current patch, so just
+ // update the old_pos_ we are tracking for the next patch, if any.
+ old_pos_ += remaining.offset_increment;
+ return true;
+ }
+ }
+
+ // Trivial entries will be ignored.
+ return AddControlEntryToCurrentPatch(remaining);
+}
+
+bool SplitPatchWriter::Close() {
+ uint64_t missing_bytes = 0;
+ for (auto size : diff_sizes_)
+ missing_bytes += size;
+ for (auto size : extra_sizes_)
+ missing_bytes += size;
+ if (missing_bytes > 0) {
+ LOG(ERROR) << "Close() called but there are " << missing_bytes
+ << " bytes missing from Write*Stream() calls" << endl;
+ return false;
+ }
+
+ // |current_patch_| holds the last patch that was Init()'ed. If there are more
+ // patches in the list those have not been initialized/closed, which is a
+ // programming error.
+ if (current_patch_ + 1 != patches_.size()) {
+ LOG(ERROR) << "Close() called but no bytes habe been written to the last "
+ "patch"
+ << endl;
+ return false;
+ }
+
+ // Close all the remaining streams.
+ for (; closed_patches_ < patches_.size(); closed_patches_++) {
+ if (!patches_[closed_patches_]->Close())
+ return false;
+ }
+ return true;
+}
+
+bool SplitPatchWriter::AddControlEntryToCurrentPatch(
+ const ControlEntry& entry) {
+ // Ignore trivial control entries that don't modify the state.
+ if (!entry.diff_size && !entry.extra_size && !entry.offset_increment)
+ return true;
+
+ if (current_patch_ >= patches_.size()) {
+ LOG(ERROR) << "Writing past the last patch" << endl;
+ return false;
+ }
+ old_pos_ += entry.diff_size + entry.offset_increment;
+ written_output_ += entry.diff_size + entry.extra_size;
+ // Register the diff/extra sizes as required bytes for the current patch.
+ diff_sizes_[current_patch_] += entry.diff_size;
+ extra_sizes_[current_patch_] += entry.extra_size;
+ return patches_[current_patch_]->AddControlEntry(entry);
+}
+
+bool SplitPatchWriter::WriteToStream(WriteStreamMethod method,
+ std::vector<size_t>* sizes_vector,
+ const uint8_t* data,
+ size_t size) {
+ size_t written = 0;
+ for (size_t i = closed_patches_; i <= current_patch_ && written < size; i++) {
+ if ((*sizes_vector)[i]) {
+ size_t flush_size = std::min(size - written, (*sizes_vector)[i]);
+ if (!(patches_[i]->*method)(data + written, flush_size))
+ return false;
+ written += flush_size;
+ (*sizes_vector)[i] -= flush_size;
+ }
+
+ if (i < current_patch_ && !diff_sizes_[i] && !extra_sizes_[i]) {
+ // All bytes expected for the patch i are already sent.
+ if (!patches_[i]->Close())
+ return false;
+ closed_patches_++;
+ }
+ }
+ if (written < size) {
+ LOG(ERROR) << "Calling Write*Stream() before the corresponding "
+ "AddControlEntry() is not supported.";
+ return false;
+ }
+ return true;
+}
+
+} // namespace bsdiff
diff --git a/split_patch_writer.h b/split_patch_writer.h
new file mode 100644
index 0000000..81c913e
--- /dev/null
+++ b/split_patch_writer.h
@@ -0,0 +1,82 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef _BSDIFF_SPLIT_PATCH_WRITER_H_
+#define _BSDIFF_SPLIT_PATCH_WRITER_H_
+
+#include <stdint.h>
+
+#include <vector>
+
+#include "bsdiff/patch_writer_interface.h"
+
+namespace bsdiff {
+
+// A PatchWriterInterface class that splits the output patch into multiple
+// patches of a fixed new-file size. The size of each patch data will depend on
+// the contents of the new file data, and won't necessarily be uniform.
+class SplitPatchWriter : public PatchWriterInterface {
+ public:
+ // Create a PatchWriter using that will split in several patches where each
+ // one will write |new_chunk_size| bytes of new file data. Each patch will
+ // use the old file as a whole input file.
+ SplitPatchWriter(uint64_t new_chunk_size,
+ const std::vector<PatchWriterInterface*>& patches)
+ : new_chunk_size_(new_chunk_size), patches_(patches) {
+ diff_sizes_.resize(patches.size());
+ extra_sizes_.resize(patches.size());
+ }
+
+ // PatchWriterInterface overrides.
+ // Note: Calling WriteDiffStream/WriteExtraStream before calling the
+ // corresponding AddControlEntry() is not supported and will fail. The reason
+ // for this is because which underlying patch takes the bytes depends on the
+ // control entries.
+ bool Init() override;
+ bool WriteDiffStream(const uint8_t* data, size_t size) override;
+ bool WriteExtraStream(const uint8_t* data, size_t size) override;
+ bool AddControlEntry(const ControlEntry& entry) override;
+ bool Close() override;
+
+ private:
+ // Add a ControlEntry to the |current_patch_| without splitting it. Updates
+ // the internal structures of used data.
+ bool AddControlEntryToCurrentPatch(const ControlEntry& entry);
+
+ using WriteStreamMethod = bool (PatchWriterInterface::*)(const uint8_t* data,
+ size_t size);
+
+ // Write to the diff or extra stream as determined by |method|.
+ bool WriteToStream(WriteStreamMethod method,
+ std::vector<size_t>* sizes_vector,
+ const uint8_t* data,
+ size_t size);
+
+ // The size of each chunk of the new file written to.
+ uint64_t new_chunk_size_;
+ std::vector<PatchWriterInterface*> patches_;
+
+ // The size of the diff and extra streams that should go in each patch and has
+ // been written so far.
+ std::vector<size_t> diff_sizes_;
+ std::vector<size_t> extra_sizes_;
+
+ // The current patch number in the |patches_| array we are writing to.
+ size_t current_patch_{0};
+
+ // The number of patches we already called Close() on. The patches are always
+ // closed in order.
+ size_t closed_patches_{0};
+
+ // Bytes of the new files already written. Needed to store the new length in
+ // the header of the file.
+ uint64_t written_output_{0};
+
+ // The current pointer into the old stream.
+ uint64_t old_pos_{0};
+};
+
+} // namespace bsdiff
+
+#endif // _BSDIFF_SPLIT_PATCH_WRITER_H_
diff --git a/split_patch_writer_unittest.cc b/split_patch_writer_unittest.cc
new file mode 100644
index 0000000..1170db9
--- /dev/null
+++ b/split_patch_writer_unittest.cc
@@ -0,0 +1,156 @@
+// Copyright 2017 The Chromium OS Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "bsdiff/split_patch_writer.h"
+
+#include <memory>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include "bsdiff/fake_patch_writer.h"
+#include "bsdiff/test_utils.h"
+
+namespace bsdiff {
+
+class SplitPatchWriterTest : public testing::Test {
+ protected:
+ void SetUpForSize(size_t num_chunks, uint64_t new_chunk_size) {
+ fake_patches_.resize(num_chunks);
+ std::vector<PatchWriterInterface*> patches;
+ for (auto& fake_patch : fake_patches_)
+ patches.push_back(&fake_patch);
+
+ patch_writer_.reset(new SplitPatchWriter(new_chunk_size, patches));
+ EXPECT_TRUE(patch_writer_->Init());
+ }
+
+ std::vector<FakePatchWriter> fake_patches_;
+ std::unique_ptr<SplitPatchWriter> patch_writer_;
+};
+
+// A single empty patch is allowed.
+TEST_F(SplitPatchWriterTest, NonSplitEmptyPatchTest) {
+ SetUpForSize(1, 100);
+ EXPECT_TRUE(patch_writer_->Close());
+
+ EXPECT_TRUE(fake_patches_[0].entries().empty());
+}
+
+// Leaving patches at the end that are empty is considered an error.
+TEST_F(SplitPatchWriterTest, NotAllPatchesWrittenErrorTest) {
+ SetUpForSize(2, 100);
+ // We write less than the amount needed for two patches, which should fail.
+ EXPECT_FALSE(patch_writer_->Close());
+}
+
+TEST_F(SplitPatchWriterTest, MissingDiffBytesErrorTest) {
+ SetUpForSize(2, 10);
+
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 5, 0)));
+ std::vector<uint8_t> zeros(20, 0);
+ // We write 12 diff bytes instead of the expected 15. This should fail on
+ // Close().
+ EXPECT_TRUE(patch_writer_->WriteDiffStream(zeros.data(), 12));
+ EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 5));
+ EXPECT_FALSE(patch_writer_->Close());
+}
+
+TEST_F(SplitPatchWriterTest, MissingExtraBytesErrorTest) {
+ SetUpForSize(2, 10);
+
+ std::vector<uint8_t> zeros(20, 0);
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 15, -5)));
+ EXPECT_TRUE(patch_writer_->WriteDiffStream(zeros.data(), 5));
+ // We write a little less than the expected 15. This operation should succeed,
+ // since we could write the rest later.
+ EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 10));
+ EXPECT_FALSE(patch_writer_->Close());
+}
+
+// Test all sort of corner cases when splitting the ControlEntry across multiple
+// patches
+TEST_F(SplitPatchWriterTest, SplitControlAcrossSeveralPatchesTest) {
+ SetUpForSize(4, 10);
+ // The middle control entry would be split in tree different patches.
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(5, 1, -5)));
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(4, 0, -4)));
+ // old_pos at this point is 0. This is the end of the first patch.
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(6, 0, -1)));
+ // old_pos at this point is 5.
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(1, 18, 2)));
+ // old_pos at this point is 8.
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(1, 0, 1)));
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(4, 0, -5)));
+
+ std::vector<uint8_t> zeros(40, 0);
+ EXPECT_TRUE(
+ patch_writer_->WriteDiffStream(zeros.data(), 5 + 4 + 6 + 1 + 1 + 4));
+ EXPECT_TRUE(patch_writer_->WriteExtraStream(zeros.data(), 1 + 18));
+ EXPECT_TRUE(patch_writer_->Close());
+
+ EXPECT_EQ((std::vector<ControlEntry>{
+ ControlEntry(5, 1, -5),
+ ControlEntry(4, 0, 0), // (4, 0, -4) but the -4 is not needed.
+ }),
+ fake_patches_[0].entries());
+ EXPECT_EQ((std::vector<ControlEntry>{
+ // No need for dummy entry because the old_pos is already at 0.
+ ControlEntry(6, 0, -1),
+ ControlEntry(1, 3, 0), // the first part of (1, 18, 2)
+ }),
+ fake_patches_[1].entries());
+ EXPECT_EQ((std::vector<ControlEntry>{
+ // No need for dummy entry because the first entry is all in
+ // the extra stream and this is the last entry.
+ ControlEntry(0, 10, 0), // the middle part of (1, 18, 2)
+ }),
+ fake_patches_[2].entries());
+ EXPECT_EQ((std::vector<ControlEntry>{
+ // No need for dummy entry because the first entry is all in
+ // the extra stream, so use that.
+ ControlEntry(0, 5, 8), // the last part of (1, 18, 2), plus the
+ // old_pos 5. 8 = 1 + 2 + 5.
+ ControlEntry(1, 0, 1), // (1, 0, 1) copied
+ ControlEntry(4, 0, 0), // (4, 0, -5) ignoring the offset.
+ }),
+ fake_patches_[3].entries());
+}
+
+TEST_F(SplitPatchWriterTest, WriteStreamsAfterControlAcrossPatchesTest) {
+ std::vector<uint8_t> numbers(40);
+ for (size_t i = 0; i < numbers.size(); ++i)
+ numbers[i] = 'A' + i;
+
+ SetUpForSize(4, 10);
+ // The sequence is 15 diff, 10 extra, 15 diff.
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 10, 0)));
+ EXPECT_TRUE(patch_writer_->AddControlEntry(ControlEntry(15, 0, 0)));
+ // Numbers [0, 30) for the diff stream, and [30, 40) for the extra stream.
+ EXPECT_TRUE(patch_writer_->WriteDiffStream(numbers.data(), 30));
+ EXPECT_TRUE(patch_writer_->WriteExtraStream(numbers.data() + 30, 10));
+ EXPECT_TRUE(patch_writer_->Close());
+
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin(), numbers.begin() + 10),
+ fake_patches_[0].diff_stream());
+ EXPECT_TRUE(fake_patches_[0].extra_stream().empty());
+
+ // 5 diff, then 5 extra.
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 10, numbers.begin() + 15),
+ fake_patches_[1].diff_stream());
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 30, numbers.begin() + 35),
+ fake_patches_[1].extra_stream());
+
+ // 5 extra, then 5 diff.
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 15, numbers.begin() + 20),
+ fake_patches_[2].diff_stream());
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 35, numbers.begin() + 40),
+ fake_patches_[2].extra_stream());
+
+ EXPECT_EQ(std::vector<uint8_t>(numbers.begin() + 20, numbers.begin() + 30),
+ fake_patches_[3].diff_stream());
+ EXPECT_TRUE(fake_patches_[3].extra_stream().empty());
+}
+
+} // namespace bsdiff