Allow bsdiff to accept multiple compressors
We modified the patch writer to have the capability to produce the smallest
patch if multiple compressors are given. This type is supported for bsdf2
format only.
Example usage:
bsdiff --format=bsdf2 --type=bz2:brotli <src_file> <tgt_file> <patch_file>
Test: diff & patch an apk file
Change-Id: Icd4322f21975b82c5ee09f8feb9321f52c9813a4
diff --git a/bsdiff_arguments.cc b/bsdiff_arguments.cc
index ae23fe7..20e6149 100644
--- a/bsdiff_arguments.cc
+++ b/bsdiff_arguments.cc
@@ -41,21 +41,34 @@
namespace bsdiff {
+std::vector<CompressorType> BsdiffArguments::compressor_types() const {
+ return std::vector<CompressorType>(compressor_types_.begin(),
+ compressor_types_.end());
+}
+
bool BsdiffArguments::IsValid() const {
- if (compressor_type_ == CompressorType::kBrotli &&
+ if (compressor_types_.empty()) {
+ return false;
+ }
+
+ if (IsCompressorSupported(CompressorType::kBrotli) &&
(brotli_quality_ < BROTLI_MIN_QUALITY ||
brotli_quality_ > BROTLI_MAX_QUALITY)) {
return false;
}
if (format_ == BsdiffFormat::kLegacy) {
- return compressor_type_ == CompressorType::kBZ2;
+ return compressor_types_.size() == 1 &&
+ IsCompressorSupported(CompressorType::kBZ2);
} else if (format_ == BsdiffFormat::kBsdf2) {
- return (compressor_type_ == CompressorType::kBZ2 ||
- compressor_type_ == CompressorType::kBrotli);
- } else if (format_ == BsdiffFormat::kEndsley) {
- // All compression options are valid for this format.
+ if (IsCompressorSupported(CompressorType::kNoCompression)) {
+ std::cerr << "no compression is not supported in Bsdf2 format\n";
+ return false;
+ }
return true;
+ } else if (format_ == BsdiffFormat::kEndsley) {
+ // Only one compressor is supported for this format.
+ return compressor_types_.size() == 1;
}
return false;
}
@@ -78,7 +91,7 @@
return false;
}
} else if (name == "type") {
- if (!ParseCompressorType(optarg, &compressor_type_)) {
+ if (!ParseCompressorTypes(optarg, &compressor_types_)) {
return false;
}
} else if (name == "brotli_quality") {
@@ -94,9 +107,9 @@
// If quality is uninitialized for brotli, set it to default value.
if (format_ != BsdiffFormat::kLegacy &&
- compressor_type_ == CompressorType::kBrotli && brotli_quality_ == -1) {
+ IsCompressorSupported(CompressorType::kBrotli) && brotli_quality_ == -1) {
brotli_quality_ = kBrotliDefaultQuality;
- } else if (compressor_type_ != CompressorType::kBrotli &&
+ } else if (!IsCompressorSupported(CompressorType::kBrotli) &&
brotli_quality_ != -1) {
std::cerr << "Warning: Brotli quality is only used in the brotli"
" compressor.\n";
@@ -105,23 +118,37 @@
return true;
}
-bool BsdiffArguments::ParseCompressorType(const string& str,
- CompressorType* type) {
- string type_string = str;
- std::transform(type_string.begin(), type_string.end(), type_string.begin(),
- ::tolower);
- if (type_string == kNoCompressionString) {
- *type = CompressorType::kNoCompression;
- return true;
- } else if (type_string == kBZ2String) {
- *type = CompressorType::kBZ2;
- return true;
- } else if (type_string == kBrotliString) {
- *type = CompressorType::kBrotli;
- return true;
+bool BsdiffArguments::ParseCompressorTypes(const string& str,
+ std::set<CompressorType>* types) {
+ types->clear();
+ // The expected types string is separated by ":", e.g. bz2:brotli
+ std::vector<std::string> type_list;
+ size_t base = 0;
+ size_t found;
+ while (true) {
+ found = str.find(":", base);
+ type_list.emplace_back(str, base, found - base);
+
+ if (found == str.npos)
+ break;
+ base = found + 1;
}
- std::cerr << "Failed to parse compressor type in " << str << endl;
- return false;
+
+ for (auto& type : type_list) {
+ std::transform(type.begin(), type.end(), type.begin(), ::tolower);
+ if (type == kNoCompressionString) {
+ types->emplace(CompressorType::kNoCompression);
+ } else if (type == kBZ2String) {
+ types->emplace(CompressorType::kBZ2);
+ } else if (type == kBrotliString) {
+ types->emplace(CompressorType::kBrotli);
+ } else {
+ std::cerr << "Failed to parse compressor type in " << str << endl;
+ return false;
+ }
+ }
+
+ return true;
}
bool BsdiffArguments::ParseMinLength(const string& str, size_t* len) {
@@ -182,4 +209,8 @@
return true;
}
+bool BsdiffArguments::IsCompressorSupported(CompressorType type) const {
+ return compressor_types_.find(type) != compressor_types_.end();
+}
+
} // namespace bsdiff
diff --git a/bsdiff_arguments.h b/bsdiff_arguments.h
index 1907e73..becd8b8 100644
--- a/bsdiff_arguments.h
+++ b/bsdiff_arguments.h
@@ -7,7 +7,9 @@
#include <stdint.h>
+#include <set>
#include <string>
+#include <vector>
#include "bsdiff/constants.h"
#include "bsdiff/patch_writer_interface.h"
@@ -18,14 +20,15 @@
// brotli_quality.
class BsdiffArguments {
public:
- BsdiffArguments()
- : format_(BsdiffFormat::kLegacy),
- compressor_type_(CompressorType::kBZ2),
- brotli_quality_(-1) {}
+ BsdiffArguments() : format_(BsdiffFormat::kLegacy), brotli_quality_(-1) {
+ compressor_types_.emplace(CompressorType::kBZ2);
+ }
- BsdiffArguments(BsdiffFormat format, CompressorType type, int brotli_quality)
+ BsdiffArguments(BsdiffFormat format,
+ std::set<CompressorType> types,
+ int brotli_quality)
: format_(format),
- compressor_type_(type),
+ compressor_types_(std::move(types)),
brotli_quality_(brotli_quality) {}
// Check if the compressor type is compatible with the bsdiff format.
@@ -36,7 +39,7 @@
int min_length() const { return min_length_; }
- CompressorType compressor_type() const { return compressor_type_; }
+ std::vector<CompressorType> compressor_types() const;
int brotli_quality() const { return brotli_quality_; }
@@ -45,7 +48,8 @@
bool ParseCommandLine(int argc, char** argv);
// Parse the compression type from string.
- static bool ParseCompressorType(const std::string& str, CompressorType* type);
+ static bool ParseCompressorTypes(const std::string& str,
+ std::set<CompressorType>* types);
// Parse the minimum length parameter from string.
static bool ParseMinLength(const std::string& str, size_t* len);
@@ -61,11 +65,13 @@
int max);
private:
+ bool IsCompressorSupported(CompressorType type) const;
+
// Current format supported are the legacy "BSDIFF40" or "BSDF2".
BsdiffFormat format_;
- // The algorithm to compress the patch, i.e. BZ2 or Brotli.
- CompressorType compressor_type_;
+ // The algorithms to compress the patch, e.g. bz2, brotli.
+ std::set<CompressorType> compressor_types_;
// The quality of brotli compressor.
int brotli_quality_;
diff --git a/bsdiff_arguments_unittest.cc b/bsdiff_arguments_unittest.cc
index 51e1038..412fac5 100644
--- a/bsdiff_arguments_unittest.cc
+++ b/bsdiff_arguments_unittest.cc
@@ -4,21 +4,41 @@
#include "bsdiff/bsdiff_arguments.h"
-#include <vector>
-
#include <gtest/gtest.h>
namespace bsdiff {
TEST(BsdiffArgumentsTest, ParseCompressorTypeTest) {
- CompressorType type;
- EXPECT_TRUE(BsdiffArguments::ParseCompressorType("Brotli", &type));
- EXPECT_EQ(CompressorType::kBrotli, type);
+ std::set<CompressorType> types;
+ EXPECT_TRUE(BsdiffArguments::ParseCompressorTypes("Brotli", &types));
+ EXPECT_EQ(1U, types.size());
+ EXPECT_NE(types.end(), types.find(CompressorType::kBrotli));
- EXPECT_TRUE(BsdiffArguments::ParseCompressorType("bz2", &type));
- EXPECT_EQ(CompressorType::kBZ2, type);
+ types.clear();
- EXPECT_FALSE(BsdiffArguments::ParseCompressorType("invalid", &type));
+ EXPECT_TRUE(BsdiffArguments::ParseCompressorTypes("bz2", &types));
+ EXPECT_EQ(1U, types.size());
+ EXPECT_NE(types.end(), types.find(CompressorType::kBZ2));
+
+ types.clear();
+
+ EXPECT_FALSE(BsdiffArguments::ParseCompressorTypes("invalid", &types));
+}
+
+TEST(BsdiffArgumentsTest, ParseMultipleCompressorTypeTest) {
+ std::set<CompressorType> types;
+ EXPECT_TRUE(BsdiffArguments::ParseCompressorTypes("bz2:brotli:nocompression",
+ &types));
+ EXPECT_EQ(3U, types.size());
+ EXPECT_NE(types.end(), types.find(CompressorType::kBrotli));
+ EXPECT_NE(types.end(), types.find(CompressorType::kBZ2));
+ EXPECT_NE(types.end(), types.find(CompressorType::kNoCompression));
+
+ types.clear();
+
+ // No space in the type string.
+ EXPECT_FALSE(
+ BsdiffArguments::ParseCompressorTypes("bz2 : nocompression", &types));
}
TEST(BsdiffArgumentsTest, ParseBsdiffFormatTest) {
@@ -66,28 +86,34 @@
// brotli is not supported for BsdiffFormat::kLegacy.
EXPECT_FALSE(
- BsdiffArguments(BsdiffFormat::kLegacy, CompressorType::kBrotli, -1)
+ BsdiffArguments(BsdiffFormat::kLegacy, {CompressorType::kBrotli}, -1)
.IsValid());
- EXPECT_TRUE(BsdiffArguments(BsdiffFormat::kBsdf2, CompressorType::kBrotli, 9)
- .IsValid());
+ EXPECT_TRUE(
+ BsdiffArguments(BsdiffFormat::kBsdf2, {CompressorType::kBrotli}, 9)
+ .IsValid());
// Compression quality out of range for brotli.
EXPECT_FALSE(
- BsdiffArguments(BsdiffFormat::kBsdf2, CompressorType::kBrotli, 20)
+ BsdiffArguments(BsdiffFormat::kBsdf2, {CompressorType::kBrotli}, 20)
.IsValid());
}
TEST(BsdiffArgumentsTest, ParseArgumentsSmokeTest) {
- std::vector<const char*> args = {"bsdiff", "--format=bsdf2", "--type=brotli",
- "--brotli_quality=9", "--minlen=12"};
+ std::vector<const char*> args = {"bsdiff", "--format=bsdf2",
+ "--type=brotli:bz2", "--brotli_quality=9",
+ "--minlen=12"};
BsdiffArguments arguments;
EXPECT_TRUE(
arguments.ParseCommandLine(args.size(), const_cast<char**>(args.data())));
EXPECT_EQ(BsdiffFormat::kBsdf2, arguments.format());
- EXPECT_EQ(CompressorType::kBrotli, arguments.compressor_type());
+
+ std::vector<CompressorType> types = {CompressorType::kBZ2,
+ CompressorType::kBrotli};
+ EXPECT_EQ(types, arguments.compressor_types());
+
EXPECT_EQ(9, arguments.brotli_quality());
EXPECT_EQ(12, arguments.min_length());
}
diff --git a/bsdiff_main.cc b/bsdiff_main.cc
index c4753c7..a7aeeb5 100644
--- a/bsdiff_main.cc
+++ b/bsdiff_main.cc
@@ -80,11 +80,11 @@
patch_writer = bsdiff::CreateBsdiffPatchWriter(patch_filename);
} else if (arguments.format() == bsdiff::BsdiffFormat::kBsdf2) {
patch_writer = bsdiff::CreateBSDF2PatchWriter(patch_filename,
- arguments.compressor_type(),
+ arguments.compressor_types(),
arguments.brotli_quality());
} else if (arguments.format() == bsdiff::BsdiffFormat::kEndsley) {
patch_writer = bsdiff::CreateEndsleyPatchWriter(
- &raw_data, arguments.compressor_type(), arguments.brotli_quality());
+ &raw_data, arguments.compressor_types()[0], arguments.brotli_quality());
} else {
std::cerr << "unexpected bsdiff format." << std::endl;
return 1;
@@ -120,7 +120,8 @@
<< " --minlen LEN The minimum match length "
"required to consider a match in the algorithm.\n"
<< " --type <bz2|brotli|nocompression> The algorithm to compress "
- "the patch, bsdf2 format only.\n"
+ "the patch, bsdf2 format only. Multiple supported compressors "
+ "should be split by ':', e.g. bz2:brotli.\n"
<< " --brotli_quality Quality of the brotli "
"compressor.\n";
}
diff --git a/include/bsdiff/patch_writer_factory.h b/include/bsdiff/patch_writer_factory.h
index 82abafd..599b973 100644
--- a/include/bsdiff/patch_writer_factory.h
+++ b/include/bsdiff/patch_writer_factory.h
@@ -30,6 +30,14 @@
CompressorType type,
int brotli_quality);
+// Create a patch writer using the "BSDF2" patch format. It also tries all the
+// compressors in |types| to generate the smallest patch.
+BSDIFF_EXPORT
+std::unique_ptr<PatchWriterInterface> CreateBSDF2PatchWriter(
+ const std::string& patch_filename,
+ const std::vector<CompressorType>& types,
+ int brotli_quality);
+
// Create a patch writer compatible with Android Play Store bsdiff patches.
// The data will be written to the passed |patch| vector, which must be valid
// until Close() is called or this patch is destroyed. The data will be
diff --git a/patch_writer.cc b/patch_writer.cc
index b4e80d9..b1f45fa 100644
--- a/patch_writer.cc
+++ b/patch_writer.cc
@@ -30,30 +30,38 @@
BsdiffPatchWriter::BsdiffPatchWriter(const std::string& patch_filename)
: patch_filename_(patch_filename),
format_(BsdiffFormat::kLegacy),
- type_(CompressorType::kBZ2),
- brotli_quality_(-1) {}
+ brotli_quality_(-1) {
+ types_.emplace_back(CompressorType::kBZ2);
+}
BsdiffPatchWriter::BsdiffPatchWriter(const std::string& patch_filename,
- CompressorType type,
+ const std::vector<CompressorType>& types,
int brotli_quality)
: patch_filename_(patch_filename),
format_(BsdiffFormat::kBsdf2),
- type_(type),
+ types_(types),
brotli_quality_(brotli_quality) {}
bool BsdiffPatchWriter::InitializeCompressorList(
std::vector<std::unique_ptr<bsdiff::CompressorInterface>>*
compressor_list) {
- switch (type_) {
- case CompressorType::kBZ2:
- compressor_list->emplace_back(new BZ2Compressor());
- break;
- case CompressorType::kBrotli:
- compressor_list->emplace_back(new BrotliCompressor(brotli_quality_));
- break;
- case CompressorType::kNoCompression:
- LOG(ERROR) << "Unsupported compression type " << static_cast<int>(type_);
+ if (types_.empty()) {
+ LOG(ERROR) << "Patch writer expects at least one compressor.";
+ return false;
+ }
+
+ for (const auto& type : types_) {
+ switch (type) {
+ case CompressorType::kBZ2:
+ compressor_list->emplace_back(new BZ2Compressor());
+ break;
+ case CompressorType::kBrotli:
+ compressor_list->emplace_back(new BrotliCompressor(brotli_quality_));
+ break;
+ case CompressorType::kNoCompression:
+ LOG(ERROR) << "Unsupported compression type " << static_cast<int>(type);
+ }
}
for (const auto& compressor : *compressor_list) {
diff --git a/patch_writer.h b/patch_writer.h
index 91927d4..8ad4cde 100644
--- a/patch_writer.h
+++ b/patch_writer.h
@@ -26,7 +26,7 @@
// with algorithm |type|; and quality |brotli_quality| if it's brotli. This
// writer also writes the patch data to the file |patch_filename|.
BsdiffPatchWriter(const std::string& patch_filename,
- CompressorType type,
+ const std::vector<CompressorType>& types,
int brotli_quality);
// PatchWriterInterface overrides.
@@ -67,9 +67,8 @@
// The format of bsdiff we're using.
BsdiffFormat format_;
- // The compressor we're using, or kSmallest in which case we try all
- // compressors and pick the one with best compression ratio.
- CompressorType type_;
+ // The compressors we're using.
+ std::vector<CompressorType> types_;
// The compression quality of the brotli compressor.
int brotli_quality_;
diff --git a/patch_writer_factory.cc b/patch_writer_factory.cc
index b9a41f3..5f0dbc9 100644
--- a/patch_writer_factory.cc
+++ b/patch_writer_factory.cc
@@ -19,8 +19,16 @@
const std::string& patch_filename,
CompressorType type,
int brotli_quality) {
+ return CreateBSDF2PatchWriter(
+ patch_filename, std::vector<CompressorType>{type}, brotli_quality);
+}
+
+std::unique_ptr<PatchWriterInterface> CreateBSDF2PatchWriter(
+ const std::string& patch_filename,
+ const std::vector<CompressorType>& types,
+ int brotli_quality) {
return std::unique_ptr<PatchWriterInterface>(
- new BsdiffPatchWriter(patch_filename, type, brotli_quality));
+ new BsdiffPatchWriter(patch_filename, types, brotli_quality));
}
std::unique_ptr<PatchWriterInterface> CreateEndsleyPatchWriter(