| /* |
| * Copyright (c) Meta Platforms, Inc. and affiliates. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "bcc_zip.h" |
| |
| #include <fcntl.h> |
| #include <limits.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/mman.h> |
| #include <unistd.h> |
| |
| // Specification of ZIP file format can be found here: |
| // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT |
| // For a high level overview of the structure of a ZIP file see |
| // sections 4.3.1 - 4.3.6. |
| |
| // Data structures appearing in ZIP files do not contain any |
| // padding and they might be misaligned. To allow us to safely |
| // operate on pointers to such structures and their members, without |
| // worrying of platform specific alignment issues, we define |
| // unaligned_uint16_t and unaligned_uint32_t types with no alignment |
| // requirements. |
| typedef struct { |
| uint8_t raw[2]; |
| } unaligned_uint16_t; |
| |
| static uint16_t unaligned_uint16_read(unaligned_uint16_t value) { |
| uint16_t return_value; |
| memcpy(&return_value, value.raw, sizeof(return_value)); |
| return return_value; |
| } |
| |
| typedef struct { |
| uint8_t raw[4]; |
| } unaligned_uint32_t; |
| |
| static uint32_t unaligned_uint32_read(unaligned_uint32_t value) { |
| uint32_t return_value; |
| memcpy(&return_value, value.raw, sizeof(return_value)); |
| return return_value; |
| } |
| |
| #define END_OF_CD_RECORD_MAGIC 0x06054b50 |
| |
| // See section 4.3.16 of the spec. |
| struct end_of_central_directory_record { |
| // Magic value equal to END_OF_CD_RECORD_MAGIC |
| unaligned_uint32_t magic; |
| |
| // Number of the file containing this structure or 0xFFFF if ZIP64 archive. |
| // Zip archive might span multiple files (disks). |
| unaligned_uint16_t this_disk; |
| |
| // Number of the file containing the beginning of the central directory or |
| // 0xFFFF if ZIP64 archive. |
| unaligned_uint16_t cd_disk; |
| |
| // Number of central directory records on this disk or 0xFFFF if ZIP64 |
| // archive. |
| unaligned_uint16_t cd_records; |
| |
| // Number of central directory records on all disks or 0xFFFF if ZIP64 |
| // archive. |
| unaligned_uint16_t cd_records_total; |
| |
| // Size of the central directory recrod or 0xFFFFFFFF if ZIP64 archive. |
| unaligned_uint32_t cd_size; |
| |
| // Offset of the central directory from the beginning of the archive or |
| // 0xFFFFFFFF if ZIP64 archive. |
| unaligned_uint32_t cd_offset; |
| |
| // Length of comment data following end of central driectory record. |
| unaligned_uint16_t comment_length; |
| |
| // Up to 64k of arbitrary bytes. |
| // uint8_t comment[comment_length] |
| }; |
| |
| #define CD_FILE_HEADER_MAGIC 0x02014b50 |
| #define FLAG_ENCRYPTED (1 << 0) |
| #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) |
| |
| // See section 4.3.12 of the spec. |
| struct central_directory_file_header { |
| // Magic value equal to CD_FILE_HEADER_MAGIC. |
| unaligned_uint32_t magic; |
| unaligned_uint16_t version; |
| // Minimum zip version needed to extract the file. |
| unaligned_uint16_t min_version; |
| unaligned_uint16_t flags; |
| unaligned_uint16_t compression; |
| unaligned_uint16_t last_modified_time; |
| unaligned_uint16_t last_modified_date; |
| unaligned_uint32_t crc; |
| unaligned_uint32_t compressed_size; |
| unaligned_uint32_t uncompressed_size; |
| unaligned_uint16_t file_name_length; |
| unaligned_uint16_t extra_field_length; |
| unaligned_uint16_t file_comment_length; |
| // Number of the disk where the file starts or 0xFFFF if ZIP64 archive. |
| unaligned_uint16_t disk; |
| unaligned_uint16_t internal_attributes; |
| unaligned_uint32_t external_attributes; |
| // Offset from the start of the disk containing the local file header to the |
| // start of the local file header. |
| unaligned_uint32_t offset; |
| }; |
| |
| #define LOCAL_FILE_HEADER_MAGIC 0x04034b50 |
| |
| // See section 4.3.7 of the spec. |
| struct local_file_header { |
| // Magic value equal to LOCAL_FILE_HEADER_MAGIC. |
| unaligned_uint32_t magic; |
| // Minimum zip version needed to extract the file. |
| unaligned_uint16_t min_version; |
| unaligned_uint16_t flags; |
| unaligned_uint16_t compression; |
| unaligned_uint16_t last_modified_time; |
| unaligned_uint16_t last_modified_date; |
| unaligned_uint32_t crc; |
| unaligned_uint32_t compressed_size; |
| unaligned_uint32_t uncompressed_size; |
| unaligned_uint16_t file_name_length; |
| unaligned_uint16_t extra_field_length; |
| }; |
| |
| struct bcc_zip_archive { |
| void* data; |
| uint32_t size; |
| uint32_t cd_offset; |
| uint32_t cd_records; |
| }; |
| |
| static void* check_access(struct bcc_zip_archive* archive, uint32_t offset, |
| uint32_t size) { |
| if (offset + size > archive->size || offset > offset + size) { |
| return NULL; |
| } |
| return (char *) archive->data + offset; |
| } |
| |
| // Returns 0 on success, -1 on error and -2 if the eocd indicates |
| // the archive uses features which are not supported. |
| static int try_parse_end_of_central_directory(struct bcc_zip_archive* archive, |
| uint32_t offset) { |
| struct end_of_central_directory_record* eocd = check_access( |
| archive, offset, sizeof(struct end_of_central_directory_record)); |
| if (eocd == NULL || |
| unaligned_uint32_read(eocd->magic) != END_OF_CD_RECORD_MAGIC) { |
| return -1; |
| } |
| |
| uint16_t comment_length = unaligned_uint16_read(eocd->comment_length); |
| if (offset + sizeof(struct end_of_central_directory_record) + |
| comment_length != |
| archive->size) { |
| return -1; |
| } |
| |
| uint16_t cd_records = unaligned_uint16_read(eocd->cd_records); |
| if (unaligned_uint16_read(eocd->this_disk) != 0 || |
| unaligned_uint16_read(eocd->cd_disk) != 0 || |
| unaligned_uint16_read(eocd->cd_records_total) != cd_records) { |
| // This is a valid eocd, but we only support single-file non-ZIP64 archives. |
| return -2; |
| } |
| |
| uint32_t cd_offset = unaligned_uint32_read(eocd->cd_offset); |
| uint32_t cd_size = unaligned_uint32_read(eocd->cd_size); |
| if (check_access(archive, cd_offset, cd_size) == NULL) { |
| return -1; |
| } |
| |
| archive->cd_offset = cd_offset; |
| archive->cd_records = cd_records; |
| return 0; |
| } |
| |
| static int find_central_directory(struct bcc_zip_archive* archive) { |
| if (archive->size <= sizeof(struct end_of_central_directory_record)) { |
| return -1; |
| } |
| |
| int rc = -1; |
| // Because the end of central directory ends with a variable length array of |
| // up to 0xFFFF bytes we can't know exactly where it starts and need to |
| // search for it at the end of the file, scanning the (limit, offset] range. |
| int64_t offset = |
| (int64_t)archive->size - sizeof(struct end_of_central_directory_record); |
| int64_t limit = offset - (1 << 16); |
| for (; offset >= 0 && offset > limit && rc == -1; offset--) { |
| rc = try_parse_end_of_central_directory(archive, offset); |
| } |
| |
| return rc; |
| } |
| |
| struct bcc_zip_archive* bcc_zip_archive_open(const char* path) { |
| int fd = open(path, O_RDONLY); |
| if (fd < 0) { |
| return NULL; |
| } |
| |
| off_t size = lseek(fd, 0, SEEK_END); |
| if (size == (off_t)-1 || size > UINT32_MAX) { |
| close(fd); |
| return NULL; |
| } |
| |
| void* data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); |
| close(fd); |
| |
| if (data == MAP_FAILED) { |
| return NULL; |
| } |
| |
| struct bcc_zip_archive* archive = malloc(sizeof(struct bcc_zip_archive)); |
| if (archive == NULL) { |
| munmap(data, size); |
| return NULL; |
| }; |
| |
| archive->data = data; |
| archive->size = size; |
| if (find_central_directory(archive)) { |
| munmap(data, size); |
| free(archive); |
| archive = NULL; |
| } |
| |
| return archive; |
| } |
| |
| void bcc_zip_archive_close(struct bcc_zip_archive* archive) { |
| munmap(archive->data, archive->size); |
| free(archive); |
| } |
| |
| static struct local_file_header* local_file_header_at_offset( |
| struct bcc_zip_archive* archive, uint32_t offset) { |
| struct local_file_header* lfh = |
| check_access(archive, offset, sizeof(struct local_file_header)); |
| if (lfh == NULL || |
| unaligned_uint32_read(lfh->magic) != LOCAL_FILE_HEADER_MAGIC) { |
| return NULL; |
| } |
| return lfh; |
| } |
| |
| static int get_entry_at_offset(struct bcc_zip_archive* archive, uint32_t offset, |
| struct bcc_zip_entry* out) { |
| struct local_file_header* lfh = local_file_header_at_offset(archive, offset); |
| offset += sizeof(struct local_file_header); |
| if (lfh == NULL) { |
| return -1; |
| }; |
| |
| uint16_t flags = unaligned_uint16_read(lfh->flags); |
| if ((flags & FLAG_ENCRYPTED) || (flags & FLAG_HAS_DATA_DESCRIPTOR)) { |
| return -1; |
| } |
| |
| uint16_t name_length = unaligned_uint16_read(lfh->file_name_length); |
| const char* name = check_access(archive, offset, name_length); |
| offset += name_length; |
| if (name == NULL) { |
| return -1; |
| } |
| |
| uint16_t extra_field_length = unaligned_uint16_read(lfh->extra_field_length); |
| if (check_access(archive, offset, extra_field_length) == NULL) { |
| return -1; |
| } |
| offset += extra_field_length; |
| |
| uint32_t compressed_size = unaligned_uint32_read(lfh->compressed_size); |
| void* data = check_access(archive, offset, compressed_size); |
| if (data == NULL) { |
| return -1; |
| } |
| |
| out->compression = unaligned_uint16_read(lfh->compression); |
| out->name_length = name_length; |
| out->name = name; |
| out->data = data; |
| out->data_length = compressed_size; |
| out->data_offset = offset; |
| |
| return 0; |
| } |
| |
| static struct central_directory_file_header* cd_file_header_at_offset( |
| struct bcc_zip_archive* archive, uint32_t offset) { |
| struct central_directory_file_header* cdfh = check_access( |
| archive, offset, sizeof(struct central_directory_file_header)); |
| if (cdfh == NULL || |
| unaligned_uint32_read(cdfh->magic) != CD_FILE_HEADER_MAGIC) { |
| return NULL; |
| } |
| return cdfh; |
| } |
| |
| int bcc_zip_archive_find_entry(struct bcc_zip_archive* archive, |
| const char* file_name, |
| struct bcc_zip_entry* out) { |
| size_t file_name_length = strlen(file_name); |
| |
| uint32_t offset = archive->cd_offset; |
| for (uint32_t i = 0; i < archive->cd_records; ++i) { |
| struct central_directory_file_header* cdfh = |
| cd_file_header_at_offset(archive, offset); |
| offset += sizeof(struct central_directory_file_header); |
| if (cdfh == NULL) { |
| return -1; |
| } |
| |
| uint16_t cdfh_name_length = unaligned_uint16_read(cdfh->file_name_length); |
| const char* cdfh_name = check_access(archive, offset, cdfh_name_length); |
| if (cdfh_name == NULL) { |
| return -1; |
| } |
| |
| uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags); |
| if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && |
| (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && |
| file_name_length == cdfh_name_length && |
| memcmp(file_name, (char*) archive->data + offset, file_name_length) == 0) { |
| return get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset), |
| out); |
| } |
| |
| offset += cdfh_name_length; |
| offset += unaligned_uint16_read(cdfh->extra_field_length); |
| offset += unaligned_uint16_read(cdfh->file_comment_length); |
| } |
| |
| return -1; |
| } |
| |
| int bcc_zip_archive_find_entry_at_offset(struct bcc_zip_archive* archive, |
| uint32_t target, |
| struct bcc_zip_entry* out) { |
| uint32_t offset = archive->cd_offset; |
| for (uint32_t i = 0; i < archive->cd_records; ++i) { |
| struct central_directory_file_header* cdfh = |
| cd_file_header_at_offset(archive, offset); |
| offset += sizeof(struct central_directory_file_header); |
| if (cdfh == NULL) { |
| return -1; |
| } |
| |
| uint16_t cdfh_flags = unaligned_uint16_read(cdfh->flags); |
| if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && |
| (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0) { |
| if (get_entry_at_offset(archive, unaligned_uint32_read(cdfh->offset), |
| out)) { |
| return -1; |
| } |
| |
| if ((char*) out->data <= (char*) archive->data + target && |
| (char*) archive->data + target < (char*) out->data + out->data_length) { |
| return 0; |
| } |
| } |
| |
| offset += unaligned_uint16_read(cdfh->file_name_length); |
| offset += unaligned_uint16_read(cdfh->extra_field_length); |
| offset += unaligned_uint16_read(cdfh->file_comment_length); |
| } |
| |
| return -1; |
| } |
| |
| struct bcc_zip_archive* bcc_zip_archive_open_and_find( |
| const char* path, struct bcc_zip_entry* out) { |
| struct bcc_zip_archive* archive = NULL; |
| const char* separator = strstr(path, "!/"); |
| if (separator == NULL || separator - path >= PATH_MAX) { |
| return NULL; |
| } |
| |
| char archive_path[PATH_MAX]; |
| strncpy(archive_path, path, separator - path); |
| archive_path[separator - path] = 0; |
| archive = bcc_zip_archive_open(archive_path); |
| if (archive == NULL) { |
| return NULL; |
| } |
| |
| if (bcc_zip_archive_find_entry(archive, separator + 2, out)) { |
| bcc_zip_archive_close(archive); |
| return NULL; |
| } |
| |
| return archive; |
| } |