Snap for 10453563 from d877dde634fc72406a3fa1648146ff1ec2898873 to mainline-mediaprovider-release
Change-Id: Id330693ef34b41b7f922ad47f0a8498457206d58
diff --git a/.clang-format b/.clang-format
deleted file mode 120000
index fd0645f..0000000
--- a/.clang-format
+++ /dev/null
@@ -1 +0,0 @@
-../.clang-format-2
\ No newline at end of file
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..a4e23f8
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,11 @@
+BasedOnStyle: Google
+Standard: Cpp11
+AllowShortFunctionsOnASingleLine: Inline
+ColumnLimit: 100
+CommentPragmas: NOLINT:.*
+DerivePointerAlignment: false
+IncludeBlocks: Preserve
+IndentWidth: 2
+PointerAlignment: Left
+TabWidth: 2
+UseTab: Never
diff --git a/Android.bp b/Android.bp
index 8226d39..79cd3c0 100644
--- a/Android.bp
+++ b/Android.bp
@@ -298,16 +298,6 @@
name: "ziparchive_tests_large",
srcs: ["test_ziparchive_large.py"],
main: "test_ziparchive_large.py",
- version: {
- py2: {
- enabled: true,
- embedded_launcher: false,
- },
- py3: {
- enabled: false,
- embedded_launcher: false,
- },
- },
test_suites: ["general-tests"],
test_options: {
unit_test: false,
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 3a1ce58..1ff8f6b 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING
@@ -4,7 +4,7 @@
"name": "ziparchive-tests"
}
],
- "hwasan-postsubmit": [
+ "hwasan-presubmit": [
{
"name": "ziparchive-tests"
}
diff --git a/incfs_support/include/incfs_support/util.h b/incfs_support/include/incfs_support/util.h
index aa4c950..0806e2d 100644
--- a/incfs_support/include/incfs_support/util.h
+++ b/incfs_support/include/incfs_support/util.h
@@ -16,6 +16,11 @@
#pragma once
+#ifdef __BIONIC__
+#include <linux/incrementalfs.h>
+#include <sys/vfs.h>
+#endif
+
namespace incfs::util {
// Some tools useful for writing hardened code
@@ -27,4 +32,16 @@
Container().swap(c);
}
-} // namespace incfs::util
\ No newline at end of file
+bool isIncfsFd([[maybe_unused]] int fd) {
+#ifdef __BIONIC__
+ struct statfs fs = {};
+ if (::fstatfs(fd, &fs) != 0) {
+ return false;
+ }
+ return fs.f_type == static_cast<decltype(fs.f_type)>(INCFS_MAGIC_NUMBER);
+#else
+ return false; // incfs is linux-specific, and only matters on Android.
+#endif
+}
+
+} // namespace incfs::util
diff --git a/include/ziparchive/zip_archive.h b/include/ziparchive/zip_archive.h
index 3843026..fcd4be1 100644
--- a/include/ziparchive/zip_archive.h
+++ b/include/ziparchive/zip_archive.h
@@ -28,7 +28,9 @@
#include <functional>
#include <string>
#include <string_view>
+#include <utility>
+#include "android-base/macros.h"
#include "android-base/off64_t.h"
/* Zip compression methods we support */
@@ -322,27 +324,52 @@
class Writer {
public:
virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
- virtual ~Writer();
+
+ // Returns the internal buffer that can we written into directly.
+ using Buffer = std::pair<uint8_t*, size_t>;
+ virtual Buffer GetBuffer(size_t length);
protected:
Writer() = default;
+ ~Writer() = default;
private:
- Writer(const Writer&) = delete;
- void operator=(const Writer&) = delete;
+ DISALLOW_COPY_AND_ASSIGN(Writer);
};
-class Reader {
+class LowLevelReader {
+ public:
+ // Get |len| bytes of data starting at |offset|, either by copying them into the supplied |buf|,
+ // or returning an internal buffer directly.
+ // Returns a pointer to the data (which can be different from |buf|), or |nullptr| on error.
+ virtual const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
+
+ // Returns |true| if the reader doesn't need an external buffer but instead returns its own one.
+ virtual bool IsZeroCopy() const = 0;
+
+ protected:
+ LowLevelReader() = default;
+ ~LowLevelReader() = default;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(LowLevelReader);
+};
+
+class Reader : public LowLevelReader {
public:
virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const = 0;
- virtual ~Reader();
+
+ // Ensure the existing classes implementing Reader don't need to bother with
+ // the new method.
+ const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override;
+ bool IsZeroCopy() const override;
protected:
Reader() = default;
+ ~Reader() = default;
private:
- Reader(const Reader&) = delete;
- void operator=(const Reader&) = delete;
+ DISALLOW_COPY_AND_ASSIGN(Reader);
};
//
diff --git a/include/ziparchive/zip_writer.h b/include/ziparchive/zip_writer.h
index d68683d..268e8b6 100644
--- a/include/ziparchive/zip_writer.h
+++ b/include/ziparchive/zip_writer.h
@@ -65,6 +65,12 @@
* mmapping the data at runtime.
*/
kAlign32 = 0x02,
+
+ /**
+ * Flag to use gzip's default level of compression (6). If not set, 9 will
+ * be used.
+ */
+ kDefaultCompression = 0x04,
};
/**
@@ -162,7 +168,7 @@
DISALLOW_COPY_AND_ASSIGN(ZipWriter);
int32_t HandleError(int32_t error_code);
- int32_t PrepareDeflate();
+ int32_t PrepareDeflate(int compression_level);
int32_t StoreBytes(FileEntry* file, const void* data, uint32_t len);
int32_t CompressBytes(FileEntry* file, const void* data, uint32_t len);
int32_t FlushCompressedBytes(FileEntry* file);
diff --git a/test_ziparchive_large.py b/test_ziparchive_large.py
index 46d02aa..5c6c66f 100644
--- a/test_ziparchive_large.py
+++ b/test_ziparchive_large.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# Copyright (C) 2020 The Android Open Source Project
#
@@ -27,121 +27,125 @@
class Zip64Test(unittest.TestCase):
@staticmethod
def _WriteFile(path, size_in_kib):
- contents = os.path.basename(path)[0] * 1024
- with open(path, 'w') as f:
- for it in range(0, size_in_kib):
+ contents = b'X' * 1024
+ with open(path, 'wb') as f:
+ for i in range(size_in_kib):
f.write(contents)
@staticmethod
def _AddEntriesToZip(output_zip, entries_dict=None):
+ contents = b'X' * 1024
for name, size in entries_dict.items():
- file_path = tempfile.NamedTemporaryFile()
- Zip64Test._WriteFile(file_path.name, size)
- output_zip.write(file_path.name, arcname = name)
+ # Need to pass a ZipInfo with a file_size
+ # to .open() so that it adds the Zip64 header
+ # on larger files
+ info = zipfile.ZipInfo(name)
+ info.file_size = size * 1024
+ with output_zip.open(info, mode='w') as f:
+ for i in range(size):
+ f.write(contents)
def _getEntryNames(self, zip_name):
cmd = ['ziptool', 'zipinfo', '-1', zip_name]
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
output, _ = proc.communicate()
- self.assertEquals(0, proc.returncode)
+ self.assertEqual(0, proc.returncode)
self.assertNotEqual(None, output)
- return output.split()
+ return output.decode('utf-8').split()
def _ExtractEntries(self, zip_name):
- temp_dir = tempfile.mkdtemp()
- cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- proc.communicate()
- self.assertEquals(0, proc.returncode)
+ with tempfile.TemporaryDirectory() as temp_dir:
+ cmd = ['ziptool', 'unzip', '-d', temp_dir, zip_name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc.communicate()
+ self.assertEqual(0, proc.returncode)
def test_entriesSmallerThan2G(self):
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
- # than 4GiB in size.
- with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
- entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
- 'd.txt': 1025 * 1024, 'e.txt': 1024}
- self._AddEntriesToZip(output_zip, entry_dict)
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ # Add a few entries with each of them smaller than 2GiB. But the entire zip file is larger
+ # than 4GiB in size.
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ entry_dict = {'a.txt': 1025 * 1024, 'b.txt': 1025 * 1024, 'c.txt': 1025 * 1024,
+ 'd.txt': 1025 * 1024, 'e.txt': 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
- self._ExtractEntries(zip_path.name)
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
def test_largeNumberOfEntries(self):
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- entry_dict = {}
- # Add 100k entries (more than 65535|UINT16_MAX).
- for num in range(0, 100 * 1024):
- entry_dict[str(num)] = 50
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ entry_dict = {}
+ # Add 100k entries (more than 65535|UINT16_MAX).
+ for num in range(0, 100 * 1024):
+ entry_dict[str(num)] = 50
- with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
- self._AddEntriesToZip(output_zip, entry_dict)
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ self._AddEntriesToZip(output_zip, entry_dict)
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
- self._ExtractEntries(zip_path.name)
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
def test_largeCompressedEntriesSmallerThan4G(self):
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
- allowZip64=True) as output_zip:
- # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
- # sizes in the extra field. Test if our ziptool should be able to parse it.
- entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
- self._AddEntriesToZip(output_zip, entry_dict)
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
+ allowZip64=True) as output_zip:
+ # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
+ # sizes in the extra field. Test if our ziptool should be able to parse it.
+ entry_dict = {'e.txt': 4095 * 1024, 'f.txt': 4095 * 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
- self._ExtractEntries(zip_path.name)
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
def test_forceDataDescriptor(self):
- file_path = tempfile.NamedTemporaryFile(suffix='.txt')
- self._WriteFile(file_path.name, 5000 * 1024)
+ with tempfile.NamedTemporaryFile(suffix='.txt') as file_path:
+ self._WriteFile(file_path.name, 5000 * 1024)
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
- pass
- # The fd option force writes a data descriptor
- cmd = ['zip', '-fd', zip_path.name, file_path.name]
- proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
- proc.communicate()
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals([file_path.name[1:]], read_names)
- self._ExtractEntries(zip_path.name)
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ with zipfile.ZipFile(zip_path, 'w', allowZip64=True) as output_zip:
+ pass
+ # The fd option force writes a data descriptor
+ cmd = ['zip', '-fd', zip_path.name, file_path.name]
+ proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ proc.communicate()
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual([file_path.name[1:]], read_names)
+ self._ExtractEntries(zip_path.name)
def test_largeUncompressedEntriesLargerThan4G(self):
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_STORED,
- allowZip64=True) as output_zip:
- # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
- # sizes in the extra field. Test if our ziptool should be able to parse it.
- entry_dict = {'g.txt': 5000 * 1024, 'h.txt': 6000 * 1024}
- self._AddEntriesToZip(output_zip, entry_dict)
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_STORED,
+ allowZip64=True) as output_zip:
+ # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
+ # sizes in the extra field. Test if our ziptool should be able to parse it.
+ entry_dict = {'g.txt': 5000 * 1024, 'h.txt': 6000 * 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
- self._ExtractEntries(zip_path.name)
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
def test_largeCompressedEntriesLargerThan4G(self):
- zip_path = tempfile.NamedTemporaryFile(suffix='.zip')
- with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
- allowZip64=True) as output_zip:
- # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
- # sizes in the extra field. Test if our ziptool should be able to parse it.
- entry_dict = {'i.txt': 4096 * 1024, 'j.txt': 7000 * 1024}
- self._AddEntriesToZip(output_zip, entry_dict)
+ with tempfile.NamedTemporaryFile(suffix='.zip') as zip_path:
+ with zipfile.ZipFile(zip_path, 'w', compression=zipfile.ZIP_DEFLATED,
+ allowZip64=True) as output_zip:
+ # Add entries close to 4GiB in size. Somehow the python library will put the (un)compressed
+ # sizes in the extra field. Test if our ziptool should be able to parse it.
+ entry_dict = {'i.txt': 4096 * 1024, 'j.txt': 7000 * 1024}
+ self._AddEntriesToZip(output_zip, entry_dict)
- read_names = self._getEntryNames(zip_path.name)
- self.assertEquals(sorted(entry_dict.keys()), sorted(read_names))
- self._ExtractEntries(zip_path.name)
+ read_names = self._getEntryNames(zip_path.name)
+ self.assertEqual(sorted(entry_dict.keys()), sorted(read_names))
+ self._ExtractEntries(zip_path.name)
if __name__ == '__main__':
- testsuite = unittest.TestLoader().discover(
- os.path.dirname(os.path.realpath(__file__)))
- unittest.TextTestRunner(verbosity=2).run(testsuite)
+ unittest.main(verbosity=2)
diff --git a/zip_archive.cc b/zip_archive.cc
index 8d87f6f..7680053 100644
--- a/zip_archive.cc
+++ b/zip_archive.cc
@@ -31,6 +31,12 @@
#include <time.h>
#include <unistd.h>
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#endif
+
#include <memory>
#include <optional>
#include <span>
@@ -91,15 +97,62 @@
* of the string length into the hash table entry.
*/
+constexpr auto kPageSize = 4096;
+
+[[maybe_unused]] static constexpr uintptr_t pageAlignDown(uintptr_t ptr_int) {
+ return ptr_int & ~(kPageSize - 1);
+}
+
+[[maybe_unused]] static constexpr uintptr_t pageAlignUp(uintptr_t ptr_int) {
+ return pageAlignDown(ptr_int + kPageSize - 1);
+}
+
+[[maybe_unused]] static std::pair<void*, size_t> expandToPageBounds(void* ptr, size_t size) {
+ const auto ptr_int = reinterpret_cast<uintptr_t>(ptr);
+ const auto aligned_ptr_int = pageAlignDown(ptr_int);
+ const auto aligned_size = pageAlignUp(ptr_int + size) - aligned_ptr_int;
+ return {reinterpret_cast<void*>(aligned_ptr_int), aligned_size};
+}
+
+[[maybe_unused]] static void maybePrefetch([[maybe_unused]] const void* ptr,
+ [[maybe_unused]] size_t size) {
+#ifdef __linux__
+ // Let's only ask for a readahead explicitly if there's enough pages to read. A regular OS
+ // readahead implementation would take care of the smaller requests, and it would also involve
+ // only a single kernel transition, just an implicit one from the page fault.
+ //
+ // Note: there's no implementation for other OSes, as the prefetch logic is highly specific
+ // to the memory manager, and we don't have any well defined benchmarks on Windows/Mac;
+ // it also mostly matters only for the cold OS boot where no files are in the page cache yet,
+ // but we rarely would hit this situation outside of the device startup.
+ auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
+ if (aligned_size > 32 * kPageSize) {
+ if (::madvise(aligned_ptr, aligned_size, MADV_WILLNEED)) {
+ ALOGW("Zip: madvise(file, WILLNEED) failed: %s (%d)", strerror(errno), errno);
+ }
+ }
+#endif
+}
+
+[[maybe_unused]] static void maybePrepareSequentialReading([[maybe_unused]] const void* ptr,
+ [[maybe_unused]] size_t size) {
+#ifdef __linux__
+ auto [aligned_ptr, aligned_size] = expandToPageBounds(const_cast<void*>(ptr), size);
+ if (::madvise(reinterpret_cast<void*>(aligned_ptr), aligned_size, MADV_SEQUENTIAL)) {
+ ALOGW("Zip: madvise(file, SEQUENTIAL) failed: %s (%d)", strerror(errno), errno);
+ }
+#endif
+}
+
#if defined(__BIONIC__)
-uint64_t GetOwnerTag(const ZipArchive* archive) {
+static uint64_t GetOwnerTag(const ZipArchive* archive) {
return android_fdsan_create_owner_tag(ANDROID_FDSAN_OWNER_TYPE_ZIPARCHIVE,
reinterpret_cast<uint64_t>(archive));
}
#endif
ZipArchive::ZipArchive(MappedZipFile&& map, bool assume_ownership)
- : mapped_zip(map),
+ : mapped_zip(std::move(map)),
close_file(assume_ownership),
directory_offset(0),
central_directory(),
@@ -107,7 +160,7 @@
num_entries(0) {
#if defined(__BIONIC__)
if (assume_ownership) {
- CHECK(mapped_zip.HasFd());
+ CHECK(mapped_zip.GetFileDescriptor() >= 0 || !mapped_zip.GetBasePtr());
android_fdsan_exchange_owner_tag(mapped_zip.GetFileDescriptor(), 0, GetOwnerTag(this));
}
#endif
@@ -161,21 +214,23 @@
}
// We expect to find the zip64 eocd locator immediately before the zip eocd.
const int64_t locatorOffset = eocdOffset - sizeof(Zip64EocdLocator);
- Zip64EocdLocator zip64EocdLocator{};
- if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocator)),
- sizeof(Zip64EocdLocator), locatorOffset)) {
+ Zip64EocdLocator zip64EocdLocatorBuf;
+ const auto zip64EocdLocator = reinterpret_cast<const Zip64EocdLocator*>(
+ archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>((&zip64EocdLocatorBuf)),
+ sizeof(zip64EocdLocatorBuf), locatorOffset));
+ if (!zip64EocdLocator) {
ALOGW("Zip: %s: Read %zu from offset %" PRId64 " failed %s", debugFileName,
- sizeof(Zip64EocdLocator), locatorOffset, debugFileName);
+ sizeof(zip64EocdLocatorBuf), locatorOffset, debugFileName);
return kIoError;
}
- if (zip64EocdLocator.locator_signature != Zip64EocdLocator::kSignature) {
+ if (zip64EocdLocator->locator_signature != Zip64EocdLocator::kSignature) {
ALOGW("Zip: %s: Zip64 eocd locator signature not found at offset %" PRId64, debugFileName,
locatorOffset);
return kInvalidFile;
}
- const int64_t zip64EocdOffset = zip64EocdLocator.zip64_eocd_offset;
+ const int64_t zip64EocdOffset = zip64EocdLocator->zip64_eocd_offset;
if (locatorOffset <= sizeof(Zip64EocdRecord) ||
zip64EocdOffset > locatorOffset - sizeof(Zip64EocdRecord)) {
ALOGW("Zip: %s: Bad zip64 eocd offset %" PRId64 ", eocd locator offset %" PRId64, debugFileName,
@@ -183,31 +238,34 @@
return kInvalidOffset;
}
- Zip64EocdRecord zip64EocdRecord{};
- if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecord),
- sizeof(Zip64EocdRecord), zip64EocdOffset)) {
+ Zip64EocdRecord zip64EocdRecordBuf;
+ const auto zip64EocdRecord = reinterpret_cast<const Zip64EocdRecord*>(
+ archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&zip64EocdRecordBuf),
+ sizeof(zip64EocdRecordBuf), zip64EocdOffset));
+ if (!zip64EocdRecord) {
ALOGW("Zip: %s: read %zu from offset %" PRId64 " failed %s", debugFileName,
- sizeof(Zip64EocdLocator), zip64EocdOffset, debugFileName);
+ sizeof(zip64EocdRecordBuf), zip64EocdOffset, debugFileName);
return kIoError;
}
- if (zip64EocdRecord.record_signature != Zip64EocdRecord::kSignature) {
+ if (zip64EocdRecord->record_signature != Zip64EocdRecord::kSignature) {
ALOGW("Zip: %s: Zip64 eocd record signature not found at offset %" PRId64, debugFileName,
zip64EocdOffset);
return kInvalidFile;
}
- if (zip64EocdOffset <= zip64EocdRecord.cd_size ||
- zip64EocdRecord.cd_start_offset > zip64EocdOffset - zip64EocdRecord.cd_size) {
+ if (zip64EocdOffset <= zip64EocdRecord->cd_size ||
+ zip64EocdRecord->cd_start_offset > zip64EocdOffset - zip64EocdRecord->cd_size) {
ALOGW("Zip: %s: Bad offset for zip64 central directory. cd offset %" PRIu64 ", cd size %" PRIu64
", zip64 eocd offset %" PRIu64,
- debugFileName, zip64EocdRecord.cd_start_offset, zip64EocdRecord.cd_size, zip64EocdOffset);
+ debugFileName, zip64EocdRecord->cd_start_offset, zip64EocdRecord->cd_size,
+ zip64EocdOffset);
return kInvalidOffset;
}
- *cdInfo = {.num_records = zip64EocdRecord.num_records,
- .cd_size = zip64EocdRecord.cd_size,
- .cd_start_offset = zip64EocdRecord.cd_start_offset};
+ *cdInfo = {.num_records = zip64EocdRecord->num_records,
+ .cd_size = zip64EocdRecord->cd_size,
+ .cd_start_offset = zip64EocdRecord->cd_start_offset};
return kSuccess;
}
@@ -220,7 +278,8 @@
const auto read_amount = static_cast<uint32_t>(scan_buffer.size());
const off64_t search_start = file_length - read_amount;
- if (!archive->mapped_zip.ReadAtOffset(scan_buffer.data(), read_amount, search_start)) {
+ const auto data = archive->mapped_zip.ReadAtOffset(scan_buffer.data(), read_amount, search_start);
+ if (!data) {
ALOGE("Zip: read %" PRId64 " from offset %" PRId64 " failed", static_cast<int64_t>(read_amount),
static_cast<int64_t>(search_start));
return kIoError;
@@ -235,8 +294,8 @@
CHECK_LE(read_amount, std::numeric_limits<int32_t>::max());
int32_t i = read_amount - sizeof(EocdRecord);
for (; i >= 0; i--) {
- if (scan_buffer[i] == 0x50) {
- uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
+ if (data[i] == 0x50) {
+ const uint32_t* sig_addr = reinterpret_cast<const uint32_t*>(&data[i]);
if (android::base::get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
ALOGV("+++ Found EOCD at buf+%d", i);
break;
@@ -249,7 +308,7 @@
}
const off64_t eocd_offset = search_start + i;
- auto eocd = reinterpret_cast<const EocdRecord*>(scan_buffer.data() + i);
+ auto eocd = reinterpret_cast<const EocdRecord*>(data + i);
/*
* Verify that there's no trailing space at the end of the central directory
* and its comment.
@@ -293,7 +352,8 @@
* num_entries
*/
static ZipError MapCentralDirectory(const char* debug_file_name, ZipArchive* archive) {
- // Test file length. We use lseek64 to make sure the file is small enough to be a zip file.
+ // Test file length. We want to make sure the file is small enough to be a zip
+ // file.
off64_t file_length = archive->mapped_zip.GetFileLength();
if (file_length == -1) {
return kInvalidFile;
@@ -321,10 +381,7 @@
*
* We start by pulling in the last part of the file.
*/
- uint32_t read_amount = kMaxEOCDSearch;
- if (file_length < read_amount) {
- read_amount = static_cast<uint32_t>(file_length);
- }
+ const auto read_amount = uint32_t(std::min<off64_t>(file_length, kMaxEOCDSearch));
CentralDirectoryInfo cdInfo = {};
std::vector<uint8_t> scan_buffer(read_amount);
@@ -481,25 +538,15 @@
static ZipError ParseZipArchive(ZipArchive* archive) {
SCOPED_SIGBUS_HANDLER(return kIoError);
+ maybePrefetch(archive->central_directory.GetBasePtr(), archive->central_directory.GetMapLength());
const uint8_t* const cd_ptr = archive->central_directory.GetBasePtr();
const size_t cd_length = archive->central_directory.GetMapLength();
- const uint64_t num_entries = archive->num_entries;
-
- if (num_entries <= UINT16_MAX) {
- archive->cd_entry_map = CdEntryMapZip32::Create(static_cast<uint16_t>(num_entries));
- } else {
- archive->cd_entry_map = CdEntryMapZip64::Create();
- }
- if (archive->cd_entry_map == nullptr) {
- return kAllocationFailed;
- }
-
- /*
- * Walk through the central directory, adding entries to the hash
- * table and verifying values.
- */
const uint8_t* const cd_end = cd_ptr + cd_length;
+ const uint64_t num_entries = archive->num_entries;
const uint8_t* ptr = cd_ptr;
+ uint16_t max_file_name_length = 0;
+
+ /* Walk through the central directory and verify values */
for (uint64_t i = 0; i < num_entries; i++) {
if (ptr > cd_end - sizeof(CentralDirectoryRecord)) {
ALOGW("Zip: ran off the end (item #%" PRIu64 ", %zu bytes of central directory)", i,
@@ -528,6 +575,8 @@
return kInvalidEntryName;
}
+ max_file_name_length = std::max(max_file_name_length, file_name_length);
+
const uint8_t* extra_field = file_name + file_name_length;
if (extra_length >= cd_length || extra_field > cd_end - extra_length) {
ALOGW("Zip: extra field for entry %" PRIu64
@@ -561,15 +610,6 @@
return kInvalidEntryName;
}
- // Add the CDE filename to the hash table.
- std::string_view entry_name{reinterpret_cast<const char*>(file_name), file_name_length};
- if (auto add_result =
- archive->cd_entry_map->AddToMap(entry_name, archive->central_directory.GetBasePtr());
- add_result != 0) {
- ALOGW("Zip: Error adding entry to hash table %d", add_result);
- return add_result;
- }
-
ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu64, ptr - cd_ptr, cd_length, i);
@@ -577,15 +617,36 @@
}
}
- uint32_t lfh_start_bytes;
- if (!archive->mapped_zip.ReadAtOffset(reinterpret_cast<uint8_t*>(&lfh_start_bytes),
- sizeof(uint32_t), 0)) {
+ /* Create memory efficient entry map */
+ archive->cd_entry_map = CdEntryMapInterface::Create(num_entries, cd_length, max_file_name_length);
+ if (archive->cd_entry_map == nullptr) {
+ return kAllocationFailed;
+ }
+
+ /* Central directory verified, now add entries to the hash table */
+ ptr = cd_ptr;
+ for (uint64_t i = 0; i < num_entries; i++) {
+ auto cdr = reinterpret_cast<const CentralDirectoryRecord*>(ptr);
+ std::string_view entry_name{reinterpret_cast<const char*>(ptr + sizeof(*cdr)),
+ cdr->file_name_length};
+ auto add_result = archive->cd_entry_map->AddToMap(entry_name, cd_ptr);
+ if (add_result != 0) {
+ ALOGW("Zip: Error adding entry to hash table %d", add_result);
+ return add_result;
+ }
+ ptr += sizeof(*cdr) + cdr->file_name_length + cdr->extra_field_length + cdr->comment_length;
+ }
+
+ uint32_t lfh_start_bytes_buf;
+ auto lfh_start_bytes = reinterpret_cast<const uint32_t*>(archive->mapped_zip.ReadAtOffset(
+ reinterpret_cast<uint8_t*>(&lfh_start_bytes_buf), sizeof(lfh_start_bytes_buf), 0));
+ if (!lfh_start_bytes) {
ALOGW("Zip: Unable to read header for entry at offset == 0.");
return kInvalidFile;
}
- if (lfh_start_bytes != LocalFileHeader::kSignature) {
- ALOGW("Zip: Entry at offset zero has invalid LFH signature %" PRIx32, lfh_start_bytes);
+ if (*lfh_start_bytes != LocalFileHeader::kSignature) {
+ ALOGW("Zip: Entry at offset zero has invalid LFH signature %" PRIx32, *lfh_start_bytes);
#if defined(__ANDROID__)
android_errorWriteLog(0x534e4554, "64211847");
#endif
@@ -678,12 +739,13 @@
offset += entry->uncompressed_length;
}
- if (!mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset)) {
+ const auto ddPtr = mapped_zip.ReadAtOffset(ddBuf, sizeof(ddBuf), offset);
+ if (!ddPtr) {
return kIoError;
}
- const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
- uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddBuf + 4 : ddBuf;
+ const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddPtr));
+ const uint8_t* ddReadPtr = (ddSignature == DataDescriptor::kOptSignature) ? ddPtr + 4 : ddPtr;
DataDescriptor descriptor{};
descriptor.crc32 = ConsumeUnaligned<uint32_t>(&ddReadPtr);
// Don't use entry->zip64_format_size, because that is set to true even if
@@ -715,11 +777,9 @@
static int32_t FindEntry(const ZipArchive* archive, std::string_view entryName,
const uint64_t nameOffset, ZipEntry64* data) {
- std::vector<uint8_t> name_buf;
- std::vector<uint8_t> local_extra_field;
+ std::vector<uint8_t> buffer;
SCOPED_SIGBUS_HANDLER({
- incfs::util::clearAndFree(name_buf);
- incfs::util::clearAndFree(local_extra_field);
+ incfs::util::clearAndFree(buffer);
return kIoError;
});
@@ -787,13 +847,14 @@
}
uint8_t lfh_buf[sizeof(LocalFileHeader)];
- if (!archive->mapped_zip.ReadAtOffset(lfh_buf, sizeof(lfh_buf), local_header_offset)) {
+ const auto lfh = reinterpret_cast<const LocalFileHeader*>(
+ archive->mapped_zip.ReadAtOffset(lfh_buf, sizeof(lfh_buf), local_header_offset));
+ if (!lfh) {
ALOGW("Zip: failed reading lfh name from offset %" PRId64,
static_cast<int64_t>(local_header_offset));
return kIoError;
}
- auto lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
if (lfh->lfh_signature != LocalFileHeader::kSignature) {
ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
static_cast<int64_t>(local_header_offset));
@@ -820,12 +881,23 @@
return kInvalidOffset;
}
- name_buf.resize(name_length);
- if (!archive->mapped_zip.ReadAtOffset(name_buf.data(), name_buf.size(), name_offset)) {
+ // An optimization: get enough memory on the stack to be able to use it later without an extra
+ // allocation when reading the zip64 extended info. Reasonable names should be under half the
+ // MAX_PATH (256 chars), and Zip64 header size is 32 bytes; archives often have some other extras,
+ // e.g. alignment, so 128 bytes is outght to be enough for (almost) anybody. If it's not we'll
+ // reallocate later anyway.
+ uint8_t static_buf[128];
+ auto name_buf = static_buf;
+ if (name_length > std::size(static_buf)) {
+ buffer.resize(name_length);
+ name_buf = buffer.data();
+ }
+ const auto read_name = archive->mapped_zip.ReadAtOffset(name_buf, name_length, name_offset);
+ if (!read_name) {
ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
return kIoError;
}
- if (memcmp(entryName.data(), name_buf.data(), name_buf.size()) != 0) {
+ if (memcmp(entryName.data(), read_name, name_length) != 0) {
ALOGW("Zip: lfh name did not match central directory");
return kInconsistentInformation;
}
@@ -847,17 +919,24 @@
return kInvalidOffset;
}
- local_extra_field.resize(lfh_extra_field_size);
- if (!archive->mapped_zip.ReadAtOffset(local_extra_field.data(), lfh_extra_field_size,
- lfh_extra_field_offset)) {
+ auto lfh_extra_field_buf = static_buf;
+ if (lfh_extra_field_size > std::size(static_buf)) {
+ // Make sure vector won't try to copy existing data if it needs to reallocate.
+ buffer.clear();
+ buffer.resize(lfh_extra_field_size);
+ lfh_extra_field_buf = buffer.data();
+ }
+ const auto local_extra_field = archive->mapped_zip.ReadAtOffset(
+ lfh_extra_field_buf, lfh_extra_field_size, lfh_extra_field_offset);
+ if (!local_extra_field) {
ALOGW("Zip: failed reading lfh extra field from offset %" PRId64, lfh_extra_field_offset);
return kIoError;
}
Zip64ExtendedInfo zip64_info{};
if (auto status = ParseZip64ExtendedInfoInExtraField(
- local_extra_field.data(), lfh_extra_field_size, lfh->uncompressed_size,
- lfh->compressed_size, std::nullopt, &zip64_info);
+ local_extra_field, lfh_extra_field_size, lfh->uncompressed_size, lfh->compressed_size,
+ std::nullopt, &zip64_info);
status != kSuccess) {
return status;
}
@@ -956,7 +1035,7 @@
IterationHandle(ZipArchive* archive, std::function<bool(std::string_view)> in_matcher)
: archive(archive), matcher(std::move(in_matcher)) {}
- bool Match(std::string_view entry_name) const { return matcher(entry_name); }
+ bool Match(std::string_view entry_name) const { return !matcher || matcher(entry_name); }
};
int32_t StartIteration(ZipArchiveHandle archive, void** cookie_ptr,
@@ -967,6 +1046,9 @@
ALOGW("Zip: prefix/suffix too long");
return kInvalidEntryName;
}
+ if (optional_prefix.empty() && optional_suffix.empty()) {
+ return StartIteration(archive, cookie_ptr, std::function<bool(std::string_view)>{});
+ }
auto matcher = [prefix = std::string(optional_prefix),
suffix = std::string(optional_suffix)](std::string_view name) mutable {
return android::base::StartsWith(name, prefix) && android::base::EndsWith(name, suffix);
@@ -1094,7 +1176,7 @@
// A Writer that writes data to a fixed size memory region.
// The size of the memory region must be equal to the total size of
// the data appended to it.
-class MemoryWriter : public zip_archive::Writer {
+class MemoryWriter final : public zip_archive::Writer {
public:
static std::optional<MemoryWriter> Create(uint8_t* buf, size_t size,
const ZipEntry64* entry) {
@@ -1109,6 +1191,10 @@
}
virtual bool Append(uint8_t* buf, size_t buf_size) override {
+ if (buf_size == 0 || (buf >= buf_ && buf < buf_ + size_)) {
+ return true;
+ }
+
if (size_ < buf_size || bytes_written_ > size_ - buf_size) {
ALOGW("Zip: Unexpected size %zu (declared) vs %zu (actual)", size_,
bytes_written_ + buf_size);
@@ -1120,7 +1206,18 @@
return true;
}
- MemoryWriter(uint8_t* buf, size_t size) : Writer(), buf_(buf), size_(size), bytes_written_(0) {}
+ Buffer GetBuffer(size_t length) override {
+ if (length > size_) {
+ // Special case for empty files: zlib wants at least some buffer but won't ever write there.
+ if (size_ == 0 && length <= sizeof(bytes_written_)) {
+ return {reinterpret_cast<uint8_t*>(&bytes_written_), length};
+ }
+ return {};
+ }
+ return {buf_, length};
+ }
+
+ MemoryWriter(uint8_t* buf, size_t size) : buf_(buf), size_(size), bytes_written_(0) {}
private:
uint8_t* const buf_{nullptr};
@@ -1130,7 +1227,7 @@
// A Writer that appends data to a file |fd| at its current position.
// The file will be truncated to the end of the written data.
-class FileWriter : public zip_archive::Writer {
+class FileWriter final : public zip_archive::Writer {
public:
// Creates a FileWriter for |fd| and prepare to write |entry| to it,
// guaranteeing that the file descriptor is valid and that there's enough
@@ -1222,16 +1319,25 @@
size_t total_bytes_written_;
};
-class EntryReader : public zip_archive::Reader {
+class EntryReader final : public zip_archive::Reader {
public:
EntryReader(const MappedZipFile& zip_file, const ZipEntry64* entry)
: Reader(), zip_file_(zip_file), entry_(entry) {}
- virtual bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const {
+ bool ReadAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
+ const auto res = zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
+ if (!res) return false;
+ if (res != buf) {
+ memcpy(buf, res, len);
+ }
+ return true;
+ }
+
+ const uint8_t* AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const override {
return zip_file_.ReadAtOffset(buf, len, entry_->offset + offset);
}
- virtual ~EntryReader() {}
+ bool IsZeroCopy() const override { return zip_file_.GetBasePtr() != nullptr; }
private:
const MappedZipFile& zip_file_;
@@ -1249,40 +1355,72 @@
namespace zip_archive {
// Moved out of line to avoid -Wweak-vtables.
-Reader::~Reader() {}
-Writer::~Writer() {}
+auto Writer::GetBuffer(size_t) -> Buffer {
+ return {};
+}
+
+const uint8_t* Reader::AccessAtOffset(uint8_t* buf, size_t len, off64_t offset) const {
+ return ReadAtOffset(buf, len, offset) ? buf : nullptr;
+}
+
+bool Reader::IsZeroCopy() const {
+ return false;
+}
} // namespace zip_archive
+static std::span<uint8_t> bufferToSpan(zip_archive::Writer::Buffer buf) {
+ return {buf.first, ssize_t(buf.second)};
+}
+
template <bool OnIncfs>
static int32_t inflateImpl(const zip_archive::Reader& reader,
const uint64_t compressed_length,
const uint64_t uncompressed_length,
zip_archive::Writer* writer, uint64_t* crc_out) {
- const size_t kBufSize = 32768;
- std::vector<uint8_t> read_buf(kBufSize);
- std::vector<uint8_t> write_buf(kBufSize);
- z_stream zstream;
- int zerr;
+ constexpr uint64_t kBufSize = 32768;
+
+ std::vector<uint8_t> read_buf;
+ uint64_t max_read_size;
+ if (reader.IsZeroCopy()) {
+ max_read_size = std::min<uint64_t>(std::numeric_limits<uint32_t>::max(), compressed_length);
+ } else {
+ max_read_size = std::min(compressed_length, kBufSize);
+ read_buf.resize(static_cast<size_t>(max_read_size));
+ }
+
+ std::vector<uint8_t> write_buf;
+ // For some files zlib needs more space than the uncompressed buffer size, e.g. when inflating
+ // an empty file.
+ const auto min_write_buffer_size = std::max(compressed_length, uncompressed_length);
+ auto write_span = bufferToSpan(writer->GetBuffer(size_t(min_write_buffer_size)));
+ bool direct_writer;
+ if (write_span.size() >= min_write_buffer_size) {
+ direct_writer = true;
+ } else {
+ direct_writer = false;
+ write_buf.resize(static_cast<size_t>(std::min(min_write_buffer_size, kBufSize)));
+ write_span = write_buf;
+ }
/*
* Initialize the zlib stream struct.
*/
- memset(&zstream, 0, sizeof(zstream));
+ z_stream zstream = {};
zstream.zalloc = Z_NULL;
zstream.zfree = Z_NULL;
zstream.opaque = Z_NULL;
zstream.next_in = NULL;
zstream.avail_in = 0;
- zstream.next_out = &write_buf[0];
- zstream.avail_out = kBufSize;
+ zstream.next_out = write_span.data();
+ zstream.avail_out = static_cast<uint32_t>(write_span.size());
zstream.data_type = Z_UNKNOWN;
/*
* Use the undocumented "negative window bits" feature to tell zlib
* that there's no zlib header waiting for it.
*/
- zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
+ int zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
if (zerr != Z_OK) {
if (zerr == Z_VERSION_ERROR) {
ALOGE("Installed zlib is not compatible with linked version (%s)", ZLIB_VERSION);
@@ -1298,6 +1436,7 @@
};
std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
+ static_assert(sizeof(zstream_guard) == sizeof(void*));
SCOPED_SIGBUS_HANDLER_CONDITIONAL(OnIncfs, {
zstream_guard.reset();
@@ -1313,18 +1452,17 @@
do {
/* read as much as we can */
if (zstream.avail_in == 0) {
- const uint32_t read_size =
- (remaining_bytes > kBufSize) ? kBufSize : static_cast<uint32_t>(remaining_bytes);
+ const auto read_size = static_cast<uint32_t>(std::min(remaining_bytes, max_read_size));
const off64_t offset = (compressed_length - remaining_bytes);
- // Make sure to read at offset to ensure concurrent access to the fd.
- if (!reader.ReadAtOffset(read_buf.data(), read_size, offset)) {
+ auto buf = reader.AccessAtOffset(read_buf.data(), read_size, offset);
+ if (!buf) {
ALOGW("Zip: inflate read failed, getSize = %u: %s", read_size, strerror(errno));
return kIoError;
}
remaining_bytes -= read_size;
- zstream.next_in = &read_buf[0];
+ zstream.next_in = buf;
zstream.avail_in = read_size;
}
@@ -1337,18 +1475,25 @@
}
/* write when we're full or when we're done */
- if (zstream.avail_out == 0 || (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
- const size_t write_size = zstream.next_out - &write_buf[0];
- if (!writer->Append(&write_buf[0], write_size)) {
+ if (zstream.avail_out == 0 ||
+ (zerr == Z_STREAM_END && zstream.avail_out != write_span.size())) {
+ const size_t write_size = zstream.next_out - write_span.data();
+ if (compute_crc) {
+ DCHECK_LE(write_size, write_span.size());
+ crc = crc32(crc, write_span.data(), static_cast<uint32_t>(write_size));
+ }
+ total_output += write_span.size() - zstream.avail_out;
+
+ if (direct_writer) {
+ write_span = write_span.subspan(write_size);
+ } else if (!writer->Append(write_span.data(), write_size)) {
return kIoError;
- } else if (compute_crc) {
- DCHECK_LE(write_size, kBufSize);
- crc = crc32(crc, &write_buf[0], static_cast<uint32_t>(write_size));
}
- total_output += kBufSize - zstream.avail_out;
- zstream.next_out = &write_buf[0];
- zstream.avail_out = kBufSize;
+ if (zstream.avail_out == 0) {
+ zstream.next_out = write_span.data();
+ zstream.avail_out = static_cast<uint32_t>(write_span.size());
+ }
}
} while (zerr == Z_OK);
@@ -1375,15 +1520,30 @@
static int32_t InflateEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
zip_archive::Writer* writer, uint64_t* crc_out) {
const EntryReader reader(mapped_zip, entry);
-
return inflateImpl<true>(reader, entry->compressed_length,
entry->uncompressed_length, writer, crc_out);
}
static int32_t CopyEntryToWriter(MappedZipFile& mapped_zip, const ZipEntry64* entry,
zip_archive::Writer* writer, uint64_t* crc_out) {
- static const uint32_t kBufSize = 32768;
- std::vector<uint8_t> buf(kBufSize);
+ constexpr uint64_t kBufSize = 32768;
+ std::vector<uint8_t> buf;
+ std::span<uint8_t> write_span{};
+ uint64_t max_read_size;
+ if (mapped_zip.GetBasePtr() == nullptr ||
+ mapped_zip.GetFileLength() < entry->uncompressed_length) {
+ // Check if we can read directly into the writer.
+ write_span = bufferToSpan(writer->GetBuffer(size_t(entry->uncompressed_length)));
+ if (write_span.size() >= entry->uncompressed_length) {
+ max_read_size = entry->uncompressed_length;
+ } else {
+ max_read_size = std::min(entry->uncompressed_length, kBufSize);
+ buf.resize((static_cast<size_t>(max_read_size)));
+ write_span = buf;
+ }
+ } else {
+ max_read_size = entry->uncompressed_length;
+ }
SCOPED_SIGBUS_HANDLER({
incfs::util::clearAndFree(buf);
@@ -1397,22 +1557,25 @@
uint64_t remaining = length - count;
off64_t offset = entry->offset + count;
- // Safe conversion because kBufSize is narrow enough for a 32 bit signed value.
- const uint32_t block_size =
- (remaining > kBufSize) ? kBufSize : static_cast<uint32_t>(remaining);
+ // Safe conversion because even kBufSize is narrow enough for a 32 bit signed value.
+ const auto block_size = static_cast<uint32_t>(std::min(remaining, max_read_size));
- // Make sure to read at offset to ensure concurrent access to the fd.
- if (!mapped_zip.ReadAtOffset(buf.data(), block_size, offset)) {
+ const auto read_buf = mapped_zip.ReadAtOffset(write_span.data(), block_size, offset);
+ if (!read_buf) {
ALOGW("CopyFileToFile: copy read failed, block_size = %u, offset = %" PRId64 ": %s",
block_size, static_cast<int64_t>(offset), strerror(errno));
return kIoError;
}
- if (!writer->Append(&buf[0], block_size)) {
+ if (!writer->Append(const_cast<uint8_t*>(read_buf), block_size)) {
return kIoError;
}
+ // Advance our span if it's a direct buffer (there's a span but local buffer's empty).
+ if (!write_span.empty() && buf.empty()) {
+ write_span = write_span.subspan(block_size);
+ }
if (crc_out) {
- crc = crc32(crc, &buf[0], block_size);
+ crc = crc32(crc, read_buf, block_size);
}
count += block_size;
}
@@ -1467,7 +1630,6 @@
if (!writer) {
return kIoError;
}
-
return extractToWriter(archive, entry, &writer.value());
}
@@ -1481,7 +1643,6 @@
if (!writer) {
return kIoError;
}
-
return extractToWriter(archive, entry, &writer.value());
}
@@ -1500,7 +1661,7 @@
// incfs_support/signal_handling.h
//
#if !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
-class ProcessWriter : public zip_archive::Writer {
+class ProcessWriter final : public zip_archive::Writer {
public:
ProcessWriter(ProcessZipEntryFunction func, void* cookie)
: Writer(), proc_function_(func), cookie_(cookie) {}
@@ -1528,19 +1689,33 @@
#endif // !ZIPARCHIVE_DISABLE_CALLBACK_API && !defined(_WIN32)
-int MappedZipFile::GetFileDescriptor() const {
- if (!has_fd_) {
- ALOGW("Zip: MappedZipFile doesn't have a file descriptor.");
- return -1;
+MappedZipFile::MappedZipFile(int fd, off64_t length, off64_t offset)
+ : fd_(fd), fd_offset_(offset), data_length_(length) {
+ // TODO(b/287285733): restore mmap() when the cold cache regression is fixed.
+#if 0
+ // Only try to mmap all files in 64-bit+ processes as it's too easy to use up the whole
+ // virtual address space on 32-bits, causing out of memory errors later.
+ if constexpr (sizeof(void*) >= 8) {
+ // Note: GetFileLength() here fills |data_length_| if it was empty.
+ // TODO(b/261875471): remove the incfs exclusion when the driver deadlock is fixed.
+ if (fd >= 0 && !incfs::util::isIncfsFd(fd) && GetFileLength() > 0 &&
+ GetFileLength() < std::numeric_limits<size_t>::max()) {
+ mapped_file_ =
+ android::base::MappedFile::FromFd(fd, fd_offset_, size_t(data_length_), PROT_READ);
+ if (mapped_file_) {
+ maybePrepareSequentialReading(mapped_file_->data(), size_t(data_length_));
+ base_ptr_ = mapped_file_->data();
+ }
+ }
}
+#endif // 0
+}
+
+int MappedZipFile::GetFileDescriptor() const {
return fd_;
}
const void* MappedZipFile::GetBasePtr() const {
- if (has_fd_) {
- ALOGW("Zip: MappedZipFile doesn't have a base pointer.");
- return nullptr;
- }
return base_ptr_;
}
@@ -1549,67 +1724,83 @@
}
off64_t MappedZipFile::GetFileLength() const {
- if (has_fd_) {
- if (data_length_ != -1) {
- return data_length_;
- }
- data_length_ = lseek64(fd_, 0, SEEK_END);
- if (data_length_ == -1) {
- ALOGE("Zip: lseek on fd %d failed: %s", fd_, strerror(errno));
- }
- return data_length_;
- } else {
- if (base_ptr_ == nullptr) {
- ALOGE("Zip: invalid file map");
- return -1;
- }
+ if (data_length_ >= 0) {
return data_length_;
}
+ if (fd_ < 0) {
+ ALOGE("Zip: invalid file map");
+ } else {
+ struct stat st;
+ if (fstat(fd_, &st)) {
+ ALOGE("Zip: fstat(%d) failed: %s", fd_, strerror(errno));
+ } else {
+ if (S_ISBLK(st.st_mode)) {
+#if defined(__linux__)
+ // Block devices are special - they report 0 as st_size.
+ uint64_t size;
+ if (ioctl(fd_, BLKGETSIZE64, &size)) {
+ ALOGE("Zip: ioctl(%d, BLKGETSIZE64) failed: %s", fd_, strerror(errno));
+ } else {
+ data_length_ = size - fd_offset_;
+ }
+#endif
+ } else {
+ data_length_ = st.st_size - fd_offset_;
+ }
+ }
+ }
+ return data_length_;
}
// Attempts to read |len| bytes into |buf| at offset |off|.
-bool MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const {
- if (has_fd_) {
- if (off < 0) {
- ALOGE("Zip: invalid offset %" PRId64, off);
- return false;
- }
-
- off64_t read_offset;
- if (__builtin_add_overflow(fd_offset_, off, &read_offset)) {
- ALOGE("Zip: invalid read offset %" PRId64 " overflows, fd offset %" PRId64, off, fd_offset_);
- return false;
- }
-
- if (data_length_ != -1) {
- off64_t read_end;
- if (len > std::numeric_limits<off64_t>::max() ||
- __builtin_add_overflow(off, static_cast<off64_t>(len), &read_end)) {
- ALOGE("Zip: invalid read length %" PRId64 " overflows, offset %" PRId64,
- static_cast<off64_t>(len), off);
- return false;
- }
-
- if (read_end > data_length_) {
- ALOGE("Zip: invalid read length %" PRId64 " exceeds data length %" PRId64 ", offset %"
- PRId64, static_cast<off64_t>(len), data_length_, off);
- return false;
- }
- }
-
- if (!android::base::ReadFullyAtOffset(fd_, buf, len, read_offset)) {
- ALOGE("Zip: failed to read at offset %" PRId64, off);
- return false;
- }
- } else {
+const uint8_t* MappedZipFile::ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const {
+ if (base_ptr_) {
if (off < 0 || data_length_ < len || off > data_length_ - len) {
ALOGE("Zip: invalid offset: %" PRId64 ", read length: %zu, data length: %" PRId64, off, len,
data_length_);
- return false;
+ return nullptr;
}
- memcpy(buf, static_cast<const uint8_t*>(base_ptr_) + off, len);
+ maybePrefetch(static_cast<const uint8_t*>(base_ptr_) + off, len);
+ return static_cast<const uint8_t*>(base_ptr_) + off;
}
- return true;
+ if (fd_ < 0) {
+ ALOGE("Zip: invalid zip file");
+ return nullptr;
+ }
+
+ if (off < 0) {
+ ALOGE("Zip: invalid offset %" PRId64, off);
+ return nullptr;
+ }
+
+ off64_t read_offset;
+ if (__builtin_add_overflow(fd_offset_, off, &read_offset)) {
+ ALOGE("Zip: invalid read offset %" PRId64 " overflows, fd offset %" PRId64, off, fd_offset_);
+ return nullptr;
+ }
+
+ if (data_length_ != -1) {
+ off64_t read_end;
+ if (len > std::numeric_limits<off64_t>::max() ||
+ __builtin_add_overflow(off, static_cast<off64_t>(len), &read_end)) {
+ ALOGE("Zip: invalid read length %" PRId64 " overflows, offset %" PRId64,
+ static_cast<off64_t>(len), off);
+ return nullptr;
+ }
+
+ if (read_end > data_length_) {
+ ALOGE("Zip: invalid read length %" PRId64 " exceeds data length %" PRId64 ", offset %" PRId64,
+ static_cast<off64_t>(len), data_length_, off);
+ return nullptr;
+ }
+ }
+
+ // Make sure to read at offset to ensure concurrent access to the fd.
+ if (!android::base::ReadFullyAtOffset(fd_, buf, len, read_offset)) {
+ ALOGE("Zip: failed to read at offset %" PRId64, off);
+ return nullptr;
+ }
+ return buf;
}
void CentralDirectory::Initialize(const void* map_base_ptr, off64_t cd_start_offset,
@@ -1619,7 +1810,7 @@
}
bool ZipArchive::InitializeCentralDirectory(off64_t cd_start_offset, size_t cd_size) {
- if (mapped_zip.HasFd()) {
+ if (!mapped_zip.GetBasePtr()) {
directory_map = android::base::MappedFile::FromFd(mapped_zip.GetFileDescriptor(),
mapped_zip.GetFileOffset() + cd_start_offset,
cd_size, PROT_READ);
@@ -1633,14 +1824,16 @@
central_directory.Initialize(directory_map->data(), 0 /*offset*/, cd_size);
} else {
if (mapped_zip.GetBasePtr() == nullptr) {
- ALOGE("Zip: Failed to map central directory, bad mapped_zip base pointer");
+ ALOGE(
+ "Zip: Failed to map central directory, bad mapped_zip base "
+ "pointer");
return false;
}
if (static_cast<off64_t>(cd_start_offset) + static_cast<off64_t>(cd_size) >
mapped_zip.GetFileLength()) {
ALOGE(
- "Zip: Failed to map central directory, offset exceeds mapped memory region ("
- "start_offset %" PRId64 ", cd_size %zu, mapped_region_size %" PRId64 ")",
+ "Zip: Failed to map central directory, offset exceeds mapped memory region (start_offset "
+ "%" PRId64 ", cd_size %zu, mapped_region_size %" PRId64 ")",
static_cast<int64_t>(cd_start_offset), cd_size, mapped_zip.GetFileLength());
return false;
}
diff --git a/zip_archive_benchmark.cpp b/zip_archive_benchmark.cpp
index cfa5912..373ade6 100644
--- a/zip_archive_benchmark.cpp
+++ b/zip_archive_benchmark.cpp
@@ -17,8 +17,9 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <memory>
#include <string>
-#include <tuple>
+#include <string_view>
#include <vector>
#include <android-base/test_utils.h>
@@ -27,16 +28,20 @@
#include <ziparchive/zip_archive_stream_entry.h>
#include <ziparchive/zip_writer.h>
-static std::unique_ptr<TemporaryFile> CreateZip(int size = 4, int count = 1000) {
+static std::unique_ptr<TemporaryFile> CreateZip(int size = 4, int count = 1000,
+ bool compress = true) {
auto result = std::make_unique<TemporaryFile>();
FILE* fp = fdopen(result->fd, "w");
ZipWriter writer(fp);
- std::string lastName = "file";
+ std::string baseName = "file";
for (size_t i = 0; i < count; i++) {
// Make file names longer and longer.
- lastName = lastName + std::to_string(i);
- writer.StartEntry(lastName.c_str(), ZipWriter::kCompress);
+ if (i && (i % 100 == 0)) {
+ baseName += "more";
+ }
+ std::string name = baseName + std::to_string(i);
+ writer.StartEntry(name.c_str(), compress ? ZipWriter::kCompress : 0);
while (size > 0) {
writer.WriteBytes("helo", 4);
size -= 4;
@@ -49,9 +54,19 @@
return result;
}
+static void OpenClose(benchmark::State& state) {
+ std::unique_ptr<TemporaryFile> temp_file(CreateZip(4, int(state.range(0))));
+ ZipArchiveHandle handle;
+ for (auto _ : state) {
+ OpenArchive(temp_file->path, &handle);
+ CloseArchive(handle);
+ }
+}
+BENCHMARK(OpenClose)->Arg(1)->Arg(10)->Arg(1000)->Arg(10000);
+
static void FindEntry_no_match(benchmark::State& state) {
// Create a temporary zip archive.
- std::unique_ptr<TemporaryFile> temp_file(CreateZip());
+ std::unique_ptr<TemporaryFile> temp_file(CreateZip(4, int(state.range(0))));
ZipArchiveHandle handle;
ZipEntry data;
@@ -60,31 +75,31 @@
std::string_view name("thisFileNameDoesNotExist");
// Start the benchmark.
+ OpenArchive(temp_file->path, &handle);
for (auto _ : state) {
- OpenArchive(temp_file->path, &handle);
FindEntry(handle, name, &data);
- CloseArchive(handle);
}
+ CloseArchive(handle);
}
-BENCHMARK(FindEntry_no_match);
+BENCHMARK(FindEntry_no_match)->Arg(1)->Arg(10)->Arg(1000)->Arg(10000);
static void Iterate_all_files(benchmark::State& state) {
- std::unique_ptr<TemporaryFile> temp_file(CreateZip());
+ std::unique_ptr<TemporaryFile> temp_file(CreateZip(4, int(state.range(0))));
ZipArchiveHandle handle;
void* iteration_cookie;
ZipEntry data;
- std::string name;
+ std::string_view name;
+ OpenArchive(temp_file->path, &handle);
for (auto _ : state) {
- OpenArchive(temp_file->path, &handle);
StartIteration(handle, &iteration_cookie);
while (Next(iteration_cookie, &data, &name) == 0) {
}
EndIteration(iteration_cookie);
- CloseArchive(handle);
}
+ CloseArchive(handle);
}
-BENCHMARK(Iterate_all_files);
+BENCHMARK(Iterate_all_files)->Arg(1)->Arg(10)->Arg(1000)->Arg(10000);
static void StartAlignedEntry(benchmark::State& state) {
TemporaryFile file;
@@ -98,7 +113,6 @@
for (auto _ : state) {
writer.StartAlignedEntry(name + std::to_string(counter++), 0, alignment);
state.PauseTiming();
- writer.WriteBytes("hola", 4);
writer.FinishEntry();
state.ResumeTiming();
}
@@ -109,7 +123,8 @@
BENCHMARK(StartAlignedEntry)->Arg(2)->Arg(16)->Arg(1024)->Arg(4096);
static void ExtractEntry(benchmark::State& state) {
- std::unique_ptr<TemporaryFile> temp_file(CreateZip(1024 * 1024, 1));
+ const auto size = int(state.range(0));
+ std::unique_ptr<TemporaryFile> temp_file(CreateZip(size * 1024, 1));
ZipArchiveHandle handle;
ZipEntry data;
@@ -120,7 +135,7 @@
state.SkipWithError("Failed to find archive entry");
}
- std::vector<uint8_t> buffer(1024 * 1024);
+ std::vector<uint8_t> buffer(size * 1024);
for (auto _ : state) {
if (ExtractToMemory(handle, &data, buffer.data(), uint32_t(buffer.size()))) {
state.SkipWithError("Failed to extract archive entry");
@@ -130,6 +145,31 @@
CloseArchive(handle);
}
-BENCHMARK(ExtractEntry)->Arg(2)->Arg(16)->Arg(1024);
+BENCHMARK(ExtractEntry)->Arg(2)->Arg(16)->Arg(64)->Arg(1024)->Arg(4096);
+
+static void ExtractStored(benchmark::State& state) {
+ const auto size = int(state.range(0));
+ std::unique_ptr<TemporaryFile> temp_file(CreateZip(size * 1024, 1, false));
+
+ ZipArchiveHandle handle;
+ ZipEntry data;
+ if (OpenArchive(temp_file->path, &handle)) {
+ state.SkipWithError("Failed to open archive");
+ }
+ if (FindEntry(handle, "file0", &data)) {
+ state.SkipWithError("Failed to find archive entry");
+ }
+
+ std::vector<uint8_t> buffer(size * 1024);
+ for (auto _ : state) {
+ if (ExtractToMemory(handle, &data, buffer.data(), uint32_t(buffer.size()))) {
+ state.SkipWithError("Failed to extract archive entry");
+ break;
+ }
+ }
+ CloseArchive(handle);
+}
+
+BENCHMARK(ExtractStored)->Arg(2)->Arg(16)->Arg(64)->Arg(1024)->Arg(4096);
BENCHMARK_MAIN();
diff --git a/zip_archive_private.h b/zip_archive_private.h
index 05cb2d0..4321f83 100644
--- a/zip_archive_private.h
+++ b/zip_archive_private.h
@@ -34,17 +34,10 @@
class MappedZipFile {
public:
- explicit MappedZipFile(const int fd)
- : has_fd_(true), fd_(fd), fd_offset_(0), base_ptr_(nullptr), data_length_(-1) {}
-
- explicit MappedZipFile(const int fd, off64_t length, off64_t offset)
- : has_fd_(true), fd_(fd), fd_offset_(offset), base_ptr_(nullptr), data_length_(length) {}
+ explicit MappedZipFile(int fd, off64_t length = -1, off64_t offset = 0);
explicit MappedZipFile(const void* address, size_t length)
- : has_fd_(false), fd_(-1), fd_offset_(0), base_ptr_(address),
- data_length_(static_cast<off64_t>(length)) {}
-
- bool HasFd() const { return has_fd_; }
+ : base_ptr_(address), data_length_(static_cast<off64_t>(length)) {}
int GetFileDescriptor() const;
@@ -54,20 +47,16 @@
off64_t GetFileLength() const;
- bool ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const;
+ const uint8_t* ReadAtOffset(uint8_t* buf, size_t len, off64_t off) const;
private:
- // If has_fd_ is true, fd is valid and we'll read contents of a zip archive
- // from the file. Otherwise, we're opening the archive from a memory mapped
- // file. In that case, base_ptr_ points to the start of the memory region and
- // data_length_ defines the file length.
- const bool has_fd_;
+ std::unique_ptr<android::base::MappedFile> mapped_file_;
- const int fd_;
- const off64_t fd_offset_;
+ const int fd_ = -1;
+ const off64_t fd_offset_ = 0;
- const void* const base_ptr_;
- mutable off64_t data_length_;
+ const void* base_ptr_ = nullptr;
+ mutable off64_t data_length_ = -1;
};
class CentralDirectory {
@@ -114,6 +103,11 @@
return ret;
}
+template <typename T>
+static T ConsumeUnaligned(const uint8_t** address) {
+ return ConsumeUnaligned<T>(const_cast<uint8_t**>(address));
+}
+
// Writes the unaligned data of type |T| and auto increment the offset.
template <typename T>
void EmitUnaligned(uint8_t** address, T data) {
diff --git a/zip_archive_stream_entry.cc b/zip_archive_stream_entry.cc
index 248d316..3158159 100644
--- a/zip_archive_stream_entry.cc
+++ b/zip_archive_stream_entry.cc
@@ -23,6 +23,7 @@
#include <sys/types.h>
#include <unistd.h>
+#include <limits>
#include <memory>
#include <vector>
@@ -91,7 +92,8 @@
size_t bytes = (length_ > data_.size()) ? data_.size() : length_;
ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
errno = 0;
- if (!archive->mapped_zip.ReadAtOffset(data_.data(), bytes, offset_)) {
+ auto res = archive->mapped_zip.ReadAtOffset(data_.data(), bytes, offset_);
+ if (!res) {
if (errno != 0) {
ALOGE("Error reading from archive fd: %s", strerror(errno));
} else {
@@ -101,7 +103,9 @@
return nullptr;
}
- if (bytes < data_.size()) {
+ if (res != data_.data()) {
+ data_.assign(res, res + bytes);
+ } else if (bytes < data_.size()) {
data_.resize(bytes);
}
computed_crc32_ = static_cast<uint32_t>(
@@ -209,7 +213,6 @@
if (z_stream_.avail_out == 0) {
z_stream_.next_out = out_.data();
z_stream_.avail_out = static_cast<uint32_t>(out_.size());
- ;
}
while (true) {
@@ -218,11 +221,11 @@
return nullptr;
}
DCHECK_LE(in_.size(), std::numeric_limits<uint32_t>::max()); // Should be buf size = 64k.
- uint32_t bytes = (compressed_length_ > in_.size()) ? static_cast<uint32_t>(in_.size())
- : compressed_length_;
- ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle_);
+ auto bytes = std::min(uint32_t(in_.size()), compressed_length_);
+ auto archive = reinterpret_cast<ZipArchive*>(handle_);
errno = 0;
- if (!archive->mapped_zip.ReadAtOffset(in_.data(), bytes, offset_)) {
+ auto res = archive->mapped_zip.ReadAtOffset(in_.data(), bytes, offset_);
+ if (!res) {
if (errno != 0) {
ALOGE("Error reading from archive fd: %s", strerror(errno));
} else {
@@ -233,7 +236,7 @@
compressed_length_ -= bytes;
offset_ += bytes;
- z_stream_.next_in = in_.data();
+ z_stream_.next_in = res;
z_stream_.avail_in = bytes;
}
diff --git a/zip_archive_test.cc b/zip_archive_test.cc
index 9b77e7f..d9a0d19 100644
--- a/zip_archive_test.cc
+++ b/zip_archive_test.cc
@@ -70,8 +70,10 @@
joined_names_ = header_ + android::base::Join(names_, separator_);
base_ptr_ = reinterpret_cast<uint8_t*>(&joined_names_[0]);
- entry_maps_.emplace_back(CdEntryMapZip32::Create(static_cast<uint16_t>(names_.size())));
- entry_maps_.emplace_back(CdEntryMapZip64::Create());
+ uint16_t num_entries = static_cast<uint16_t>(names_.size());
+ entry_maps_.emplace_back(new CdEntryMapZip32<ZipStringOffset20>(num_entries));
+ entry_maps_.emplace_back(new CdEntryMapZip32<ZipStringOffset32>(num_entries));
+ entry_maps_.emplace_back(new CdEntryMapZip64());
for (auto& cd_map : entry_maps_) {
ASSERT_NE(nullptr, cd_map);
size_t offset = header_.size();
@@ -132,8 +134,16 @@
TEST(ziparchive, Open) {
ZipArchiveHandle handle;
ASSERT_EQ(0, OpenArchiveWrapper(kValidZip, &handle));
+ // TODO(b/287285733): restore mmap() when the cold cache regression is fixed.
+#if 0
+ const auto& mappedFile = handle->mapped_zip;
+ if constexpr (sizeof(void*) < 8) {
+ ASSERT_EQ(nullptr, mappedFile.GetBasePtr());
+ } else {
+ ASSERT_NE(nullptr, mappedFile.GetBasePtr());
+ }
+#endif // 0
CloseArchive(handle);
-
ASSERT_EQ(kInvalidEntryName, OpenArchiveWrapper("bad_filename.zip", &handle));
CloseArchive(handle);
}
@@ -886,7 +896,7 @@
ASSERT_EQ(kInvalidFile, OpenArchiveFd(tmp_file.fd, "LeadingNonZipBytes", &handle, false));
}
-class VectorReader : public zip_archive::Reader {
+class VectorReader final : public zip_archive::Reader {
public:
VectorReader(const std::vector<uint8_t>& input) : Reader(), input_(input) {}
@@ -903,7 +913,7 @@
const std::vector<uint8_t>& input_;
};
-class VectorWriter : public zip_archive::Writer {
+class VectorWriter final : public zip_archive::Writer {
public:
VectorWriter() : Writer() {}
@@ -918,14 +928,14 @@
std::vector<uint8_t> output_;
};
-class BadReader : public zip_archive::Reader {
+class BadReader final : public zip_archive::Reader {
public:
BadReader() : Reader() {}
bool ReadAtOffset(uint8_t*, size_t, off64_t) const { return false; }
};
-class BadWriter : public zip_archive::Writer {
+class BadWriter final : public zip_archive::Writer {
public:
BadWriter() : Writer() {}
diff --git a/zip_cd_entry_map.cc b/zip_cd_entry_map.cc
index f187c06..87ed7cf 100644
--- a/zip_cd_entry_map.cc
+++ b/zip_cd_entry_map.cc
@@ -16,40 +16,27 @@
#include "zip_cd_entry_map.h"
-#include <android-base/logging.h>
-#include <log/log.h>
-
-/*
- * Round up to the next highest power of 2.
- *
- * Found on http://graphics.stanford.edu/~seander/bithacks.html.
- */
-static uint32_t RoundUpPower2(uint32_t val) {
- val--;
- val |= val >> 1;
- val |= val >> 2;
- val |= val >> 4;
- val |= val >> 8;
- val |= val >> 16;
- val++;
-
- return val;
-}
-
static uint32_t ComputeHash(std::string_view name) {
return static_cast<uint32_t>(std::hash<std::string_view>{}(name));
}
+template <typename ZipStringOffset>
+const std::string_view ToStringView(ZipStringOffset& entry, const uint8_t *start) {
+ auto name = reinterpret_cast<const char*>(start + entry.name_offset);
+ return std::string_view{name, entry.name_length};
+}
+
// Convert a ZipEntry to a hash table index, verifying that it's in a valid range.
-std::pair<ZipError, uint64_t> CdEntryMapZip32::GetCdEntryOffset(std::string_view name,
- const uint8_t* start) const {
+template <typename ZipStringOffset>
+std::pair<ZipError, uint64_t> CdEntryMapZip32<ZipStringOffset>::GetCdEntryOffset(
+ std::string_view name, const uint8_t* start) const {
const uint32_t hash = ComputeHash(name);
// NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
uint32_t ent = hash & (hash_table_size_ - 1);
while (hash_table_[ent].name_offset != 0) {
- if (hash_table_[ent].ToStringView(start) == name) {
- return {kSuccess, hash_table_[ent].name_offset};
+ if (ToStringView(hash_table_[ent], start) == name) {
+ return {kSuccess, static_cast<uint64_t>(hash_table_[ent].name_offset)};
}
ent = (ent + 1) & (hash_table_size_ - 1);
}
@@ -58,7 +45,8 @@
return {kEntryNotFound, 0};
}
-ZipError CdEntryMapZip32::AddToMap(std::string_view name, const uint8_t* start) {
+template <typename ZipStringOffset>
+ZipError CdEntryMapZip32<ZipStringOffset>::AddToMap(std::string_view name, const uint8_t* start) {
const uint64_t hash = ComputeHash(name);
uint32_t ent = hash & (hash_table_size_ - 1);
@@ -67,7 +55,7 @@
* Further, we guarantee that the hashtable size is not 0.
*/
while (hash_table_[ent].name_offset != 0) {
- if (hash_table_[ent].ToStringView(start) == name) {
+ if (ToStringView(hash_table_[ent], start) == name) {
// We've found a duplicate entry. We don't accept duplicates.
ALOGW("Zip: Found duplicate entry %.*s", static_cast<int>(name.size()), name.data());
return kDuplicateEntry;
@@ -82,46 +70,26 @@
return kSuccess;
}
-void CdEntryMapZip32::ResetIteration() {
+template <typename ZipStringOffset>
+void CdEntryMapZip32<ZipStringOffset>::ResetIteration() {
current_position_ = 0;
}
-std::pair<std::string_view, uint64_t> CdEntryMapZip32::Next(const uint8_t* cd_start) {
+template <typename ZipStringOffset>
+std::pair<std::string_view, uint64_t> CdEntryMapZip32<ZipStringOffset>::Next(
+ const uint8_t* cd_start) {
while (current_position_ < hash_table_size_) {
const auto& entry = hash_table_[current_position_];
current_position_ += 1;
if (entry.name_offset != 0) {
- return {entry.ToStringView(cd_start), entry.name_offset};
+ return {ToStringView(entry, cd_start), static_cast<uint64_t>(entry.name_offset)};
}
}
// We have reached the end of the hash table.
return {};
}
-CdEntryMapZip32::CdEntryMapZip32(uint16_t num_entries) {
- /*
- * Create hash table. We have a minimum 75% load factor, possibly as
- * low as 50% after we round off to a power of 2. There must be at
- * least one unused entry to avoid an infinite loop during creation.
- */
- hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
- hash_table_ = {
- reinterpret_cast<ZipStringOffset*>(calloc(hash_table_size_, sizeof(ZipStringOffset))), free};
-}
-
-std::unique_ptr<CdEntryMapInterface> CdEntryMapZip32::Create(uint16_t num_entries) {
- auto entry_map = new CdEntryMapZip32(num_entries);
- CHECK(entry_map->hash_table_ != nullptr)
- << "Zip: unable to allocate the " << entry_map->hash_table_size_
- << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
- return std::unique_ptr<CdEntryMapInterface>(entry_map);
-}
-
-std::unique_ptr<CdEntryMapInterface> CdEntryMapZip64::Create() {
- return std::unique_ptr<CdEntryMapInterface>(new CdEntryMapZip64());
-}
-
ZipError CdEntryMapZip64::AddToMap(std::string_view name, const uint8_t* start) {
const auto [it, added] =
entry_table_.insert({name, name.data() - reinterpret_cast<const char*>(start)});
@@ -154,3 +122,17 @@
return *iterator_++;
}
+
+std::unique_ptr<CdEntryMapInterface> CdEntryMapInterface::Create(uint64_t num_entries,
+ size_t cd_length, uint16_t max_file_name_length) {
+ using T = std::unique_ptr<CdEntryMapInterface>;
+ if (num_entries > UINT16_MAX)
+ return T(new CdEntryMapZip64());
+
+ uint16_t num_entries_ = static_cast<uint16_t>(num_entries);
+ if (cd_length > ZipStringOffset20::offset_max ||
+ max_file_name_length > ZipStringOffset20::length_max) {
+ return T(new CdEntryMapZip32<ZipStringOffset32>(num_entries_));
+ }
+ return T(new CdEntryMapZip32<ZipStringOffset20>(num_entries_));
+}
diff --git a/zip_cd_entry_map.h b/zip_cd_entry_map.h
index 4957f75..838acad 100644
--- a/zip_cd_entry_map.h
+++ b/zip_cd_entry_map.h
@@ -23,8 +23,30 @@
#include <string_view>
#include <utility>
+#include <android-base/logging.h>
+#include <log/log.h>
+
#include "zip_error.h"
+/*
+ * Round up to the next highest power of 2.
+ *
+ * Found on http://graphics.stanford.edu/~seander/bithacks.html.
+ *
+ * TODO: could switch to use std::bit_ceil() once ready
+ */
+static constexpr uint32_t RoundUpPower2(uint32_t val) {
+ val--;
+ val |= val >> 1;
+ val |= val >> 2;
+ val |= val >> 4;
+ val |= val >> 8;
+ val |= val >> 16;
+ val++;
+
+ return val;
+}
+
// This class is the interface of the central directory entries map. The map
// helps to locate a particular cd entry based on the filename.
class CdEntryMapInterface {
@@ -45,6 +67,9 @@
// iterator to points to the next element. Returns an empty pair we have read
// past boundary.
virtual std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) = 0;
+
+ static std::unique_ptr<CdEntryMapInterface> Create(uint64_t num_entries,
+ size_t cd_length, uint16_t max_file_name_length);
};
/**
@@ -54,42 +79,69 @@
* string (on 64 bit, 8 bytes) and the length to read from that pointer,
* 2 bytes. Because of alignment, the structure consumes 16 bytes, wasting
* 6 bytes.
- *
- * ZipStringOffset stores a 4 byte offset from a fixed location in the memory
+ */
+
+/**
+ * ZipStringOffset20 stores 20-bit for offset from a fixed location in the memory
+ * mapped file instead of the entire address, with 12-bit for filename length (i.e.
+ * typical PATH_MAX), consuming 4 bytes in total
+ */
+struct ZipStringOffset20 {
+ static constexpr size_t offset_max = (1u << 20) - 1;
+ static constexpr size_t length_max = (1u << 12) - 1;
+ uint32_t name_offset : 20;
+ uint16_t name_length : 12;
+};
+
+static_assert(sizeof(struct ZipStringOffset20) == 4);
+
+/**
+ * ZipStringOffset32 stores a 4 byte offset from a fixed location in the memory
* mapped file instead of the entire address, consuming 8 bytes with alignment.
*/
-struct ZipStringOffset {
+struct ZipStringOffset32 {
uint32_t name_offset;
uint16_t name_length;
-
- const std::string_view ToStringView(const uint8_t* start) const {
- return std::string_view{reinterpret_cast<const char*>(start + name_offset), name_length};
- }
};
// This implementation of CdEntryMap uses an array hash table. It uses less
// memory than std::map; and it's used as the default implementation for zip
// archives without zip64 extension.
+template <typename ZipStringOffset>
class CdEntryMapZip32 : public CdEntryMapInterface {
public:
- static std::unique_ptr<CdEntryMapInterface> Create(uint16_t num_entries);
-
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
- private:
- explicit CdEntryMapZip32(uint16_t num_entries);
+ explicit CdEntryMapZip32(uint16_t num_entries) {
+ /*
+ * Create hash table. We have a minimum 75% load factor, possibly as
+ * low as 50% after we round off to a power of 2. There must be at
+ * least one unused entry to avoid an infinite loop during creation.
+ */
+ hash_table_size_ = RoundUpPower2(1 + (num_entries * 4) / 3);
+ hash_table_.reset(static_cast<ZipStringOffset*>(
+ calloc(hash_table_size_, sizeof(ZipStringOffset))));
+
+ CHECK(hash_table_ != nullptr)
+ << "Zip: unable to allocate the " << hash_table_size_
+ << " entry hash_table, entry size: " << sizeof(ZipStringOffset);
+ }
// We know how many entries are in the Zip archive, so we can have a
// fixed-size hash table. We define a load factor of 0.75 and over
// allocate so the maximum number entries can never be higher than
// ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
+ struct FreeDeleter {
+ void operator()(void* ptr) const { ::free(ptr); }
+ };
+ std::unique_ptr<ZipStringOffset[], FreeDeleter> hash_table_;
uint32_t hash_table_size_{0};
- std::unique_ptr<ZipStringOffset[], decltype(&free)> hash_table_{nullptr, free};
+ private:
// The position of element for the current iteration.
uint32_t current_position_{0};
};
@@ -97,16 +149,14 @@
// This implementation of CdEntryMap uses a std::map
class CdEntryMapZip64 : public CdEntryMapInterface {
public:
- static std::unique_ptr<CdEntryMapInterface> Create();
-
ZipError AddToMap(std::string_view name, const uint8_t* start) override;
std::pair<ZipError, uint64_t> GetCdEntryOffset(std::string_view name,
const uint8_t* cd_start) const override;
void ResetIteration() override;
std::pair<std::string_view, uint64_t> Next(const uint8_t* cd_start) override;
- private:
CdEntryMapZip64() = default;
+ private:
std::map<std::string_view, uint64_t> entry_table_;
diff --git a/zip_writer.cc b/zip_writer.cc
index 25b1da4..7f46f5f 100644
--- a/zip_writer.cc
+++ b/zip_writer.cc
@@ -236,7 +236,8 @@
if (flags & ZipWriter::kCompress) {
file_entry.compression_method = kCompressDeflated;
- int32_t result = PrepareDeflate();
+ int compression_level = (flags & ZipWriter::kDefaultCompression) ? 6 : 9;
+ int32_t result = PrepareDeflate(compression_level);
if (result != kNoError) {
return result;
}
@@ -315,7 +316,7 @@
return kNoError;
}
-int32_t ZipWriter::PrepareDeflate() {
+int32_t ZipWriter::PrepareDeflate(int compression_level) {
CHECK(state_ == State::kWritingZip);
// Initialize the z_stream for compression.
@@ -323,8 +324,8 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
- int zerr = deflateInit2(z_stream_.get(), Z_BEST_COMPRESSION, Z_DEFLATED, -MAX_WBITS,
- DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ int zerr = deflateInit2(z_stream_.get(), compression_level, Z_DEFLATED,
+ -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
#pragma GCC diagnostic pop
if (zerr != Z_OK) {
diff --git a/zip_writer_test.cc b/zip_writer_test.cc
index d324d4b..d29d262 100644
--- a/zip_writer_test.cc
+++ b/zip_writer_test.cc
@@ -45,6 +45,55 @@
}
};
+TEST_F(zipwriter, WriteEmptyUncompressedZipWithOneFile) {
+ ZipWriter writer(file_);
+
+ const char* expected = "";
+
+ ASSERT_EQ(0, writer.StartEntry("file.txt", 0));
+ ASSERT_EQ(0, writer.FinishEntry());
+ ASSERT_EQ(0, writer.Finish());
+
+ ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
+
+ ZipEntry data;
+ ASSERT_EQ(0, FindEntry(handle, "file.txt", &data));
+ EXPECT_EQ(kCompressStored, data.method);
+ EXPECT_EQ(0u, data.has_data_descriptor);
+ EXPECT_EQ(strlen(expected), data.compressed_length);
+ ASSERT_EQ(strlen(expected), data.uncompressed_length);
+ ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data));
+
+ CloseArchive(handle);
+}
+
+TEST_F(zipwriter, WriteEmptyCompressedZipWithOneFile) {
+ ZipWriter writer(file_);
+
+ const char* expected = "";
+
+ ASSERT_EQ(0, writer.StartEntry("file.txt", ZipWriter::kCompress));
+ ASSERT_EQ(0, writer.FinishEntry());
+ ASSERT_EQ(0, writer.Finish());
+
+ ASSERT_GE(0, lseek(fd_, 0, SEEK_SET));
+
+ ZipArchiveHandle handle;
+ ASSERT_EQ(0, OpenArchiveFd(fd_, "temp", &handle, false));
+
+ ZipEntry data;
+ ASSERT_EQ(0, FindEntry(handle, "file.txt", &data));
+ EXPECT_EQ(kCompressDeflated, data.method);
+ EXPECT_EQ(0u, data.has_data_descriptor);
+ ASSERT_EQ(strlen(expected), data.uncompressed_length);
+ ASSERT_TRUE(AssertFileEntryContentsEq(expected, handle, &data));
+
+ CloseArchive(handle);
+}
+
TEST_F(zipwriter, WriteUncompressedZipWithOneFile) {
ZipWriter writer(file_);
diff --git a/ziptool.cpp b/ziptool.cpp
index 8afd8f1..0b42716 100644
--- a/ziptool.cpp
+++ b/ziptool.cpp
@@ -210,7 +210,7 @@
}
}
-class TestWriter : public zip_archive::Writer {
+class TestWriter final : public zip_archive::Writer {
public:
bool Append(uint8_t* buf, size_t size) {
crc = static_cast<uint32_t>(crc32(crc, reinterpret_cast<const Bytef*>(buf),