bsdiff: support for lazy reading from extent files in bspatch
Previously, bspatch used to read the whole content of a file to be
patched into a memory buffer, only to copy this data while generating
the new file content, which is written to a second buffer. The said
read buffer entails an unnecessary memory allocation requirement,
especially since it's being indexed almost linearly. This behavior was
further extended to support extents (i.e. a list of <offset, length>
pairs), which are used extensively during Chrome OS updates.
This change introduces extent files, which let users open files through
a layer of extents as ordinary glibc file handles. This in turn allows
us to easily convert reads from a memory buffer into direct reads from
an extent file. Extent files are buffered on the outer level (done for
us by glibc), but otherwise use a system file descriptor for the
underlying I/O; this proved to be the most efficient combination when
applying actual update payloads. Since we are reading a single byte at
a time using fread(2), and since the program is decidedly
single-threaded, we shift to using _unlocked variants, which appear to
reduce the total update time significantly without otherwise affecting
memory/CPU footprint.
We expect this to cut bspatch's memory usage by nearly one half. Note
that in general it is possible to use the same abstraction for
implementing direct writing to the target file; however, due to the way
we implement delta updates, there is risk that such writes might clobber
the read data, and so further support is needed to mark safe operations
(i.e. no read/write dependencies) as such.
This CL obsoletes the previous ebuild patch for handling extent
arguments, which is therefore removed. It also (i) gets rid of logic for
special handling of /dev/fd filenames, which is deemed redundant; (ii)
fixes the Makefile (via a separate patch) and changes the ebuild to use
it, for uniformity; (iii) updates the ebuild to EAPI version 4; (iv)
sets -Wall -Werror and fixes eliminates a warning due to bsdiff.c; (v)
enhances man pages for both bsdiff/bspatch.
BUG=chromium:229705
TEST=Passes update engine unittests with new bspatch
TEST=delta payload with BSDIFF operations updates correctly on x86-alex
Change-Id: I4bb4afa42e43279048093e7a7f0ef96406b0c9e0
Reviewed-on: https://gerrit.chromium.org/gerrit/49595
Reviewed-by: Gilad Arnold <garnold@chromium.org>
Tested-by: Gilad Arnold <garnold@chromium.org>
Commit-Queue: Gilad Arnold <garnold@chromium.org>
diff --git a/Makefile b/Makefile
index a522607..e85d277 100644
--- a/Makefile
+++ b/Makefile
@@ -1,15 +1,34 @@
-CFLAGS += -O3 -lbz2
+BINARIES = bsdiff bspatch
-PREFIX ?= /usr/local
-INSTALL_PROGRAM ?= ${INSTALL} -c -s -m 555
-INSTALL_MAN ?= ${INSTALL} -c -m 444
+INSTALL = install
+CFLAGS += -O3 -Wall -Werror
-all: bsdiff bspatch
-bsdiff: bsdiff.c
-bspatch: bspatch.c
+DESTDIR ?=
+PREFIX = /usr
+BINDIR = $(PREFIX)/bin
+DATADIR = $(PREFIX)/share
+MANDIR = $(DATADIR)/man
+MAN1DIR = $(MANDIR)/man1
+INSTALL_PROGRAM ?= $(INSTALL) -c -m 755
+INSTALL_MAN ?= $(INSTALL) -c -m 444
+
+.PHONY: all clean
+all: $(BINARIES)
+clean:
+ rm -f *.o $(BINARIES)
+
+bsdiff: bsdiff.o
+bsdiff: LDLIBS += -lbz2 -ldivsufsort -ldivsufsort64
+bspatch: bspatch.o extents.o exfile.o
+bspatch: LDLIBS += -lbz2
+
+bspatch.o: extents.h exfile.h
+extents.o: extents.h exfile.h
+exfile.o: exfile.h
install:
- ${INSTALL_PROGRAM} bsdiff bspatch ${PREFIX}/bin
-.ifndef WITHOUT_MAN
- ${INSTALL_MAN} bsdiff.1 bspatch.1 ${PREFIX}/man/man1
-.endif
+ mkdir -p $(DESTDIR)$(BINDIR) $(DESTDIR)$(MAN1DIR)
+ $(INSTALL_PROGRAM) $(BINARIES) $(DESTDIR)$(BINDIR)
+ifndef WITHOUT_MAN
+ $(INSTALL_MAN) $(BINARIES:=.1) $(DESTDIR)$(MAN1DIR)
+endif
diff --git a/bsdiff.1 b/bsdiff.1
index ead6c4d..659f5ff 100644
--- a/bsdiff.1
+++ b/bsdiff.1
@@ -33,20 +33,21 @@
.Nd generate a patch between two binary files
.Sh SYNOPSIS
.Nm
-.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac
+.Ar oldfile newfile patchfile
.Sh DESCRIPTION
.Nm
compares
-.Ao Ar oldfile Ac
+.Ar oldfile
to
-.Ao Ar newfile Ac
+.Ar newfile
and writes to
-.Ao Ar patchfile Ac
-a binary patch suitable for use by bspatch(1).
+.Ar patchfile
+a binary patch suitable for use by
+.Xr bspatch 1 .
When
-.Ao Ar oldfile Ac
+.Ar oldfile
and
-.Ao Ar newfile Ac
+.Ar newfile
are two versions of an executable program, the
patches produced are on average a factor of five smaller
than those produced by any other binary patch tool known
@@ -54,7 +55,7 @@
.Pp
.Nm
uses memory equal to 17 times the size of
-.Ao Ar oldfile Ac ,
+.Ar oldfile ,
and requires
an absolute minimum working set size of 8 times the size of oldfile.
.Sh SEE ALSO
diff --git a/bsdiff.c b/bsdiff.c
index 11d955c..90d9134 100644
--- a/bsdiff.c
+++ b/bsdiff.c
@@ -58,7 +58,7 @@
return i;
}
-static off_t search(off_t *I,u_char *old,off_t oldsize,
+static off_t search(saidx_t *I,u_char *old,off_t oldsize,
u_char *new,off_t newsize,off_t st,off_t en,off_t *pos)
{
off_t x,y;
@@ -108,7 +108,7 @@
u_char *old,*new;
off_t oldsize,newsize;
saidx_t *I;
- off_t scan,pos,len;
+ off_t scan,pos=0,len;
off_t lastscan,lastpos,lastoffset;
off_t oldscore,scsc;
off_t s,Sf,lenf,Sb,lenb;
diff --git a/bspatch.1 b/bspatch.1
index 82a2781..ecac8db 100644
--- a/bspatch.1
+++ b/bspatch.1
@@ -33,24 +33,42 @@
.Nd apply a patch built with bsdiff(1)
.Sh SYNOPSIS
.Nm
-.Ao Ar oldfile Ac Ao Ar newfile Ac Ao Ar patchfile Ac
+.Ar oldfile newfile patchfile
+.Op Ar old-extents new-extents
.Sh DESCRIPTION
.Nm
generates
-.Ao Ar newfile Ac
+.Ar newfile
from
-.Ao Ar oldfile Ac
+.Ar oldfile
and
-.Ao Ar patchfile Ac
+.Ar patchfile ,
where
-.Ao Ar patchfile Ac
-is a binary patch built by bsdiff(1).
+.Ar patchfile
+is a binary patch built by
+.Xr bsdiff 1 .
+.Pp
+When provided,
+.Ar old-extents
+and
+.Ar new-extents
+instruct
+.Nm
+to read specific chunks of data from the old file and to write to specific
+locations in the new file, respectively. Each is a comma-separated list of
+extents of the form
+.Ar offset : Ns Ar length ,
+where
+.Ar offset
+is either -1 or a non-negative integer and
+.Ar length
+is a positive integer. An offset value of -1 denotes a sparse extent, namely a
+sequence of zeros that entails neither reading nor writing of actual file
+content.
.Pp
.Nm
uses memory equal to the size of
-.Ao Ar oldfile Ac
-plus the size of
-.Ao Ar newfile Ac ,
+.Ar newfile ,
but can tolerate a very small working set without a dramatic loss
of performance.
.Sh SEE ALSO
diff --git a/bspatch.c b/bspatch.c
index 4be99c7..9f18301 100644
--- a/bspatch.c
+++ b/bspatch.c
@@ -29,274 +29,18 @@
#endif
#include <bzlib.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
#include <err.h>
-#include <unistd.h>
#include <fcntl.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
#include <sys/types.h> // android
-#define JOIN(a, b) __JOIN(a, b)
-#define __JOIN(a, b) a ## b
-#define COMPILE_ASSERT(expr, message) \
- typedef char JOIN(message, JOIN(_, __LINE__)) [(expr) ? 1 : -1]
+#include "exfile.h"
+#include "extents.h"
-COMPILE_ASSERT(sizeof(int64_t) == 8, int64_t_64_bit);
-COMPILE_ASSERT(sizeof(off_t) == 8, off_t_64_bit);
-
-#define MIN(a, b) \
- ((a) < (b) ? (a) : (b))
-
-static const char kFdFilePrefix[] = "/dev/fd/";
-
-// Reads next int from *ints. The int should be terminated with a comma
-// or NULL char. *ints will be updated to the space right after the comma
-// or set to NULL if this was the last number. This assumes the input is
-// a valid string, as validated with PositionsStringIsValid().
-// Returns 1 on success.
-int NextInt64(const char** ints, int64_t *out) {
- if (!ints[0])
- return 0;
- int r = sscanf(*ints, "%" PRIi64, out);
- if (r == 1) {
- const char* next_comma = strchr(*ints, ',');
- const char* next_colon = strchr(*ints, ':');
- if (!next_comma && !next_colon)
- *ints = NULL;
- else if (!next_comma)
- *ints = next_colon + 1;
- else if (!next_colon)
- *ints = next_comma + 1;
- else
- *ints = MIN(next_comma, next_colon) + 1;
- return 1;
- }
- return 0;
-}
-
-COMPILE_ASSERT(sizeof(intmax_t) == 8, intmax_t_not_64_bit);
-
-// Returns 1 if str can be converted to int64_t without over/underflowing.
-// str is assumed to point to an optional negative sign followed by numbers,
-// optionally followed by non-numeric characters, followed by '\0'.
-int IsValidInt64(const char* str) {
- char* end_ptr = 0;
- errno = 0;
- intmax_t result = strtoimax(str, &end_ptr, /* base: */ 10);
- return errno == 0;
-}
-
-// Input validator. Make sure the positions string is well formatted.
-// All numerical values are checked to make sure they don't over/underflow
-// int64_t. Returns 1 if valid.
-int PositionsStringIsValid(const char* positions) {
- if (positions == NULL)
- errx(1, "bad string");
-
- // Special case: empty string is valid
- if (!positions[0])
- return 1;
-
- // Use a state machine to determine if the string is valid.
- // Key: (s): state, ((s)) valid end state.
- // n (negative_valid) is a boolean that starts out as true.
- // If n is true, ':' is the delimiter, otherwise ','.
- //
- // .--------------------------.
- // | | n ? ':' : ',' ; n = !n
- // V '-'&&n 0-9 |
- // start->(0)------------->(1)----->((2))---.
- // `---------------------> <--' 0-9
- // 0-9
- int state = 0;
- int negative_valid = 1;
- const char* number_start = positions;
- for (;; positions++) {
- char c = *positions;
- switch (state) {
- case 0:
- if (c == '-' && negative_valid) {
- state = 1;
- continue;
- }
- if (isdigit(c)) {
- state = 2;
- continue;
- }
- return 0;
- case 1:
- if (isdigit(c)) {
- state = 2;
- continue;
- }
- return 0;
- case 2:
- if (isdigit(c))
- continue;
- // number_start must point to a valid number
- if (!IsValidInt64(number_start)) {
- return 0;
- }
- if ((negative_valid && c == ':') ||
- (!negative_valid && c == ',')) {
- state = 0;
- number_start = positions + 1;
- negative_valid = !negative_valid;
- continue;
- }
- return (c == '\0');
- }
- }
-}
-
-static const int64_t kMaxLength = sizeof(size_t) > 4 ? INT64_MAX : SIZE_MAX;
-
-// Reads into a buffer a series of byte ranges from filename.
-// Each range is a pair of comma-separated ints from positions.
-// -1 as an offset means a sparse-hole.
-// E.g. If positions were "1,5:23,4:-1,8:3,7", then we would return a buffer
-// consisting of 5 bytes from offset 1 of the file, followed by
-// 4 bytes from offset 23, then 8 bytes of all zeros, then 7 bytes from
-// offset 3 in the file.
-// Returns NULL on error.
-static char* PositionedRead(const char* filename,
- const char* positions,
- ssize_t* old_size) {
- if (!PositionsStringIsValid(positions)) {
- errx(1, "invalid positions string for read\n");
- }
-
- // Get length
- const char* p = positions;
- int64_t length = 0;
- for (;;) {
- int64_t value;
- if (0 == NextInt64(&p, &value)) {
- break;
- }
- int r = NextInt64(&p, &value);
- if (r == 0) {
- errx(1, "bad length parse\n");
- }
- if (value < 0) {
- errx(1, "length can't be negative\n");
- }
- length += value;
- }
-
- // Malloc
- if (length > 0x40000000) { // 1 GiB; sanity check
- errx(1, "Read length too long (exceeds 1 GiB)");
- }
- // Following bsdiff convention, allocate length + 1 to avoid malloc(0)
- char* buf = malloc(length + 1);
- if (buf == NULL) {
- errx(1, "malloc failed\n");
- }
- char* buf_tail = buf;
-
- int fd = -1;
- int should_close = 1;
- if (strlen(filename) > strlen(kFdFilePrefix) &&
- !strncmp(filename, kFdFilePrefix, strlen(kFdFilePrefix))) {
- sscanf(filename + strlen(kFdFilePrefix), "%d", &fd);
- should_close = 0;
- } else {
- fd = open(filename, O_RDONLY);
- }
- if (fd < 0) {
- errx(1, "open failed for read\n");
- }
-
- // Read bytes
- p = positions;
- for (;;) {
- int64_t offset, read_length;
- if (NextInt64(&p, &offset) == 0) {
- break;
- }
- if (offset < 0) {
- errx(1, "no support for sparse positions "
- "yet during read\n");
- }
- if (NextInt64(&p, &read_length) == 0) {
- errx(1, "bad length parse (should never happen)\n");
- }
- if (read_length < 0) {
- errx(1, "length can't be negative "
- "(should never happen)\n");
- }
- if (read_length > kMaxLength) {
- errx(1, "read length too large\n");
- }
- ssize_t rc = pread(fd, buf_tail, (size_t)read_length, (off_t)offset);
- if (rc != read_length) {
- errx(1, "read failed\n");
- }
- buf_tail += rc;
- }
- if (should_close)
- close(fd);
- *old_size = length;
- return buf;
-}
-
-static void PositionedWrite(const char* filename,
- const char* positions,
- const char* buf,
- ssize_t new_size) {
- if (!PositionsStringIsValid(positions)) {
- errx(1, "invalid positions string for write\n");
- }
- int fd = -1;
- int should_close = 1;
- if (strlen(filename) > strlen(kFdFilePrefix) &&
- !strncmp(filename, kFdFilePrefix, strlen(kFdFilePrefix))) {
- sscanf(filename + strlen(kFdFilePrefix), "%d", &fd);
- should_close = 0;
- } else {
- fd = open(filename, O_WRONLY | O_CREAT, 0666);
- }
- if (fd < 0) {
- errx(1, "open failed for write\n");
- }
-
- for (;;) {
- int64_t offset, length;
- if (NextInt64(&positions, &offset) == 0) {
- break;
- }
- if (NextInt64(&positions, &length) == 0) {
- errx(1, "bad length parse for write\n");
- }
- if (length < 0) {
- errx(1, "length can't be negative for write\n");
- }
- if (length > kMaxLength) {
- errx(1, "write length too large\n");
- }
-
- if (offset < 0) {
- // Sparse hole. Skip.
- } else {
- ssize_t rc = pwrite(fd, buf, (size_t)length, (off_t)offset);
- if (rc != length) {
- errx(1, "write failed\n");
- }
- }
- buf += length;
- new_size -= length;
- }
- if (new_size != 0) {
- errx(1, "output position length doesn't match new size\n");
- }
- if (should_close)
- close(fd);
-}
static off_t offtin(u_char *buf)
{
@@ -316,28 +60,43 @@
return y;
}
+/* Parses an extent string ex_str, returning a pointer to a newly allocated
+ * array of extents. The number of extents is stored in ex_count_p (if
+ * provided). */
+static ex_t *parse_extent_str(const char *ex_str, size_t *ex_count_p)
+{
+ size_t ex_count = (size_t)-1;
+ ex_t *ex_arr = extents_parse(ex_str, NULL, &ex_count);
+ if (!ex_arr)
+ errx(1, (ex_count == (size_t)-1 ?
+ "error parsing extents" :
+ "error allocating extent array"));
+ if (ex_count_p)
+ *ex_count_p = ex_count;
+ return ex_arr;
+}
+
+#define USAGE_TEMPLATE_STR \
+ "usage: %s oldfile newfile patchfile [old-extents new-extents]\n" \
+ "with extents taking the form \"off_1:len_1,...,off_n:len_n\"\n"
+
int main(int argc,char * argv[])
{
FILE * f, * cpf, * dpf, * epf;
BZFILE * cpfbz2, * dpfbz2, * epfbz2;
int cbz2err, dbz2err, ebz2err;
- int fd;
+ FILE *old_file = NULL, *new_file = NULL;
ssize_t oldsize,newsize;
ssize_t bzctrllen,bzdatalen;
u_char header[32],buf[8];
- u_char *old, *new;
+ u_char *new;
off_t oldpos,newpos;
off_t ctrl[3];
off_t lenread;
- off_t i;
+ off_t i, j;
- if ((argc != 6) && (argc != 4)) {
- errx(1,"usage: %s oldfile newfile patchfile \\\n"
- " [in_offset,in_length,in_offset,in_length,... \\\n"
- " out_offset,out_length,"
- "out_offset,out_length,...]\n",argv[0]);
- }
- int using_positioning = (argc == 6);
+ if ((argc != 6) && (argc != 4)) errx(1, USAGE_TEMPLATE_STR, argv[0]);
+ int using_extents = (argc == 6);
/* Open patch file */
if ((f = fopen(argv[3], "r")) == NULL)
@@ -400,18 +159,21 @@
if ((epfbz2 = BZ2_bzReadOpen(&ebz2err, epf, 0, 0, NULL, 0)) == NULL)
errx(1, "BZ2_bzReadOpen, bz2err = %d", ebz2err);
- // Read
-
- if (!using_positioning) {
- if(((fd=open(argv[1],O_RDONLY,0))<0) ||
- ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
- ((old=malloc(oldsize+1))==NULL) ||
- (lseek(fd,0,SEEK_SET)!=0) ||
- (read(fd,old,oldsize)!=oldsize) ||
- (close(fd)==-1)) err(1,"%s",argv[1]);
+ /* Open input file for reading. */
+ if (using_extents) {
+ size_t ex_count = 0;
+ ex_t *ex_arr = parse_extent_str(argv[4], &ex_count);
+ old_file = exfile_fopen(argv[1], "r", ex_arr, ex_count, free);
} else {
- old = PositionedRead(argv[1], argv[4], &oldsize);
+ old_file = fopen(argv[1], "r");
}
+ if (!old_file ||
+ fseek(old_file, 0, SEEK_END) != 0 ||
+ (oldsize = ftell(old_file)) < 0 ||
+ fseek(old_file, 0, SEEK_SET) != 0)
+ err(1, "cannot obtain the size of %s", argv[1]);
+ off_t old_file_pos = 0;
+
if((new=malloc(newsize+1))==NULL) err(1,NULL);
oldpos=0;newpos=0;
@@ -440,10 +202,25 @@
((dbz2err != BZ_OK) && (dbz2err != BZ_STREAM_END)))
errx(1, "Corrupt patch\n");
- /* Add old data to diff string */
- for(i=0;i<ctrl[0];i++)
- if((oldpos+i>=0) && (oldpos+i<oldsize))
- new[newpos+i]+=old[oldpos+i];
+ /* Add old data to diff string. It is enough to fseek once, at
+ * the beginning of the sequence, to avoid unnecessary
+ * overhead. */
+ j = newpos;
+ if ((i = oldpos) < 0) {
+ j -= i;
+ i = 0;
+ }
+ if (i != old_file_pos && fseek(old_file, i, SEEK_SET) < 0)
+ err(1, "error seeking input file to offset %" PRIdMAX,
+ (intmax_t)i);
+ if ((old_file_pos = oldpos + ctrl[0]) > oldsize)
+ old_file_pos = oldsize;
+ while (i++ < old_file_pos) {
+ u_char c;
+ if (fread_unlocked(&c, 1, 1, old_file) != 1)
+ err(1, "error reading from input file");
+ new[j++] += c;
+ }
/* Adjust pointers */
newpos+=ctrl[0];
@@ -464,6 +241,9 @@
oldpos+=ctrl[2];
};
+ /* Close input file. */
+ fclose(old_file);
+
/* Clean up the bzip2 reads */
BZ2_bzReadClose(&cbz2err, cpfbz2);
BZ2_bzReadClose(&dbz2err, dpfbz2);
@@ -472,16 +252,19 @@
err(1, "fclose(%s)", argv[3]);
/* Write the new file */
- if (!using_positioning) {
- if(((fd=open(argv[2],O_CREAT|O_TRUNC|O_WRONLY,0666))<0) ||
- (write(fd,new,newsize)!=newsize) || (close(fd)==-1))
- err(1,"%s",argv[2]);
+ if (using_extents) {
+ size_t ex_count = 0;
+ ex_t *ex_arr = parse_extent_str(argv[5], &ex_count);
+ new_file = exfile_fopen(argv[2], "w", ex_arr, ex_count, free);
} else {
- PositionedWrite(argv[2], argv[5], new, newsize);
+ new_file = fopen(argv[2], "w");
}
+ if (!new_file ||
+ fwrite_unlocked(new, 1, newsize, new_file) != newsize ||
+ fclose(new_file) == EOF)
+ err(1,"%s",argv[2]);
free(new);
- free(old);
return 0;
}
diff --git a/exfile.c b/exfile.c
new file mode 100644
index 0000000..1cc324f
--- /dev/null
+++ b/exfile.c
@@ -0,0 +1,413 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "exfile.h"
+
+/*
+ * Extent files implementation. Some things worth noting:
+ *
+ * - We are using glibc's buffered FILE objects for the underlying file I/O;
+ * this contributes to improved performance, especially with badly fragmented
+ * extents. However, the FILE handle we return to the caller is decidedly
+ * unbuffered: making it buffered too seems superfluous, causing excess data
+ * copying and memory use.
+ *
+ * - We maintain the "logical" file position separately from the "physical"
+ * (underlying) file position. The latter is updated lazily whenever actual
+ * file I/O is about to be performed.
+ *
+ * - The logical position of an extent file is internally represented by the
+ * current extent index (curr_ex_idx) and the position within the current
+ * extent (curr_ex_pos), as well as an absolute logical position (curr_pos).
+ * In general, curr_pos should equal the total length of all extents prior to
+ * curr_ex_idx, plus curr_ex_pos. Also, curr_ex_idx may range between 0 and
+ * the total extent count; if it is exactly the latter, then curr_ex_pos must
+ * be zero, representing the fact that the we are at the logical end of the
+ * file. Otherwise, curr_ex_pos may range between 0 and the length of the
+ * current extent; if it is exactly the latter, then this is equivalent to
+ * position zero on the next extent. All functions should honor this
+ * duality.
+ *
+ * - Seeking is done efficiently at O(log(D)), where D is the
+ * number of extents between the current position and the new one. This seems
+ * like a good midway for supporting both sequential and random access.
+ */
+
+
+#define TRUE 1
+#define FALSE 0
+
+#define arraysize(x) (sizeof(x) / sizeof(*(x)))
+
+
+/* Extent prefix length. */
+typedef struct {
+ size_t prec; /* total length of preceding extents */
+ size_t total; /* total length including current extent */
+} prefix_len_t;
+
+/* Extent file logical modes. Used as index to the mapping from logical modes
+ * to open(2) and fopen(3) modes below. */
+typedef enum {
+ EXFILE_MODE_RO,
+ EXFILE_MODE_WO,
+ EXFILE_MODE_RW,
+ EXFILE_MODE_MAX /* sentinel */
+} exfile_mode_t;
+
+/* An extent file control object (aka "cookie"). */
+typedef struct {
+ int fd; /* underlying file descriptor */
+ size_t ex_count; /* number of extents (non-zero) */
+ ex_t *ex_arr; /* array of extents */
+ prefix_len_t *prefix_len_arr; /* total lengths of extent prefixes */
+ void (*ex_free)(void *); /* extent array free function */
+ size_t total_ex_len; /* total length of all extents (constant) */
+ off_t curr_file_pos; /* current underlying file position */
+ size_t curr_ex_idx; /* current extent index */
+ size_t curr_ex_pos; /* current position within extent */
+ off_t curr_pos; /* current logical file position */
+} exfile_t;
+
+
+/* Mapping from fopen(3) modes to extent file logical modes. */
+static const struct {
+ const char *fopen_mode;
+ exfile_mode_t mode;
+} fopen_mode_to_mode[] = {
+ {"r", EXFILE_MODE_RO},
+ {"r+", EXFILE_MODE_RW},
+ {"w", EXFILE_MODE_WO},
+ {"w+", EXFILE_MODE_RW},
+};
+
+
+/* Mapping from extent file logical modes to open(2) flags. */
+static const int mode_to_open_flags[EXFILE_MODE_MAX] = {
+ O_RDONLY,
+ O_WRONLY,
+ O_RDWR,
+};
+
+
+/* Searches an array |ex_arr| of |ex_count| extents and returns the index of
+ * the extent that contains the location |pos|. Uses an array |prefix_len_arr|
+ * of corresponding prefix lengths. The total complexity is O(log(D)), where D
+ * is the distance between the returned extent index and |init_ex_idx|. */
+static size_t
+ex_arr_search(size_t ex_count, const ex_t *ex_arr,
+ const prefix_len_t *prefix_len_arr, size_t pos,
+ size_t init_ex_idx)
+{
+ assert(ex_arr && ex_count);
+ const size_t last_ex_idx = ex_count - 1;
+ assert(init_ex_idx <= ex_count);
+ assert(pos < prefix_len_arr[last_ex_idx].total);
+ if (init_ex_idx == ex_count)
+ init_ex_idx = last_ex_idx; /* adjustment for purposes of the search below */
+
+ /* First, search in exponentially increasing leaps from the current extent,
+ * until an interval bounding the target position was obtained. Here i and j
+ * are the left and right (inclusive) index boundaries, respectively. */
+ ssize_t i = init_ex_idx;
+ ssize_t j = i;
+ size_t leap = 1;
+ /* Go left, as needed. */
+ while (i > 0 && pos < prefix_len_arr[i].prec) {
+ j = i - 1;
+ if ((i -= leap) < 0)
+ i = 0;
+ leap <<= 1;
+ }
+ /* Go right, as needed. */
+ while (j < last_ex_idx && pos >= prefix_len_arr[j].total) {
+ i = j + 1;
+ if ((j += leap) > last_ex_idx)
+ j = last_ex_idx;
+ leap <<= 1;
+ }
+
+ /* Then, perform a binary search between i and j. */
+ size_t k = 0;
+ while (1) {
+ k = (i + j) / 2;
+ if (pos < prefix_len_arr[k].prec)
+ j = k - 1;
+ else if (pos >= prefix_len_arr[k].total)
+ i = k + 1;
+ else
+ break;
+ }
+
+ return k;
+}
+
+/* Performs I/O operations (either read or write) on an extent file, advancing
+ * through consecutive extents and updating the logical/physical file position
+ * as we go. */
+static ssize_t
+exfile_io(exfile_t *xf, int do_read, char *buf, size_t size)
+{
+ if (xf->curr_ex_idx == xf->ex_count)
+ return 0; /* end-of-extent-file */
+
+ /* Reading or writing? */
+ typedef ssize_t (io_func_t)(int, void *, size_t);
+ io_func_t *io_func;
+ ssize_t error_ret_val;
+ if (do_read) {
+ io_func = read;
+ error_ret_val = -1;
+ } else {
+ io_func = (io_func_t *)write;
+ error_ret_val = 0; /* must not return a negative value when writing */
+ }
+
+ /* Start processing data along extents. */
+ const ex_t *curr_ex = xf->ex_arr + xf->curr_ex_idx;
+ assert(curr_ex->len >= xf->curr_ex_pos);
+ size_t curr_ex_rem_len = curr_ex->len - xf->curr_ex_pos;
+ ssize_t total_bytes = 0;
+ while (size) {
+ /* Advance to the next extent of non-zero length. */
+ while (curr_ex_rem_len == 0) {
+ xf->curr_ex_idx++;
+ xf->curr_ex_pos = 0;
+ if (xf->curr_ex_idx == xf->ex_count)
+ return total_bytes; /* end-of-extent-file */
+ curr_ex++;
+ curr_ex_rem_len = curr_ex->len;
+ }
+
+ const int is_real_ex = (curr_ex->off >= 0);
+
+ /* Seek to the correct file position, as necessary. */
+ if (is_real_ex) {
+ const off_t file_pos = curr_ex->off + xf->curr_ex_pos;
+ if (xf->curr_file_pos != file_pos) {
+ if (lseek(xf->fd, file_pos, SEEK_SET) == (off_t)-1) {
+ xf->curr_file_pos = -1; /* unknown file position */
+ return total_bytes ? total_bytes : error_ret_val;
+ }
+ xf->curr_file_pos = file_pos;
+ }
+ }
+
+ /* Process data to the end of the current extent or the requested
+ * count, whichever is smaller. */
+ size_t io_count = (size < curr_ex_rem_len ? size : curr_ex_rem_len);
+ ssize_t io_bytes = io_count;
+ if (is_real_ex)
+ io_bytes = io_func(xf->fd, buf, io_count);
+ else if (do_read)
+ memset(buf, 0, io_count);
+
+ /* Stop on error. */
+ if (io_bytes < 0) {
+ if (total_bytes == 0)
+ total_bytes = error_ret_val;
+ break;
+ }
+
+ /* Update read state. */
+ total_bytes += io_bytes;
+ if (is_real_ex)
+ xf->curr_file_pos += io_bytes;
+ xf->curr_ex_pos += io_bytes;
+ xf->curr_pos += io_bytes;
+
+ /* If we didn't read the whole extent, finish; delegate handling of
+ * partial read/write back to the caller. */
+ if ((curr_ex_rem_len -= io_bytes) > 0)
+ break;
+
+ /* Update total count and buffer pointer. */
+ size -= io_bytes;
+ buf += io_bytes;
+ }
+
+ return total_bytes;
+}
+
+/* Reads up to |size| bytes from an extent file into |buf|. This implements the
+ * cookie_read_function_t interface and is a thin wrapper around exfile_io()
+ * (see above). Returns the number of bytes read, or -1 on error. */
+static ssize_t
+exfile_read(void *cookie, char *buf, size_t size)
+{
+ return exfile_io((exfile_t *)cookie, TRUE, buf, size);
+}
+
+/* Writes up to |size| bytes from |buf| to an extent file. This implements the
+ * cookie_write_function_t interface and is a thin wrapper around exfile_io()
+ * (see above). Returns the number of bytes written; must NOT return a negative
+ * value. */
+static ssize_t
+exfile_write(void *cookie, const char *buf, size_t size)
+{
+ return exfile_io((exfile_t *)cookie, FALSE, (char *)buf, size);
+}
+
+/* Performs seek on an extent file, repositioning it to the value of |*pos_p|
+ * according to |whence|. This implements the cookie_seek_function_t interface.
+ * On success, stores the resulting logical position measured in bytes along
+ * contiguous extents into |*pos_p| and returns 0; otherwise returns -1. */
+static int
+exfile_seek(void *cookie, off64_t *pos_p, int whence)
+{
+ exfile_t *xf = (exfile_t *)cookie;
+
+ /* Compute the absolute logical target position. */
+ off64_t new_pos = *pos_p;
+ if (whence == SEEK_CUR)
+ new_pos += xf->curr_pos;
+ else if (whence == SEEK_END)
+ new_pos += xf->total_ex_len;
+
+ /* Ensure that the target position is valid. Note that repositioning the
+ * file right past the last extent is considered valid, in line with normal
+ * seek behavior, although no write (nor read) can be performed there. */
+ if (new_pos < 0 || new_pos > xf->total_ex_len)
+ return -1;
+
+ if (new_pos != (off64_t)xf->curr_pos) {
+ /* Find the extend that contains the requested logical position; handle
+ * special case upfront, for efficiency. */
+ size_t new_ex_idx = 0;
+ if (new_pos == (off64_t)xf->total_ex_len)
+ new_ex_idx = xf->ex_count;
+ else if (new_pos)
+ new_ex_idx = ex_arr_search(xf->ex_count, xf->ex_arr,
+ xf->prefix_len_arr, new_pos,
+ xf->curr_ex_idx);
+
+ /* Set the logical position markers. */
+ xf->curr_ex_idx = new_ex_idx;
+ xf->curr_ex_pos =
+ (new_ex_idx < xf->ex_count ?
+ (size_t)(new_pos - xf->prefix_len_arr[new_ex_idx].prec) : 0);
+ xf->curr_pos = (off_t)new_pos;
+ }
+
+ *pos_p = new_pos;
+ return 0;
+}
+
+/* Closes an open extent file. This implements the cookie_close_function_t
+ * interface. Always returns 0 (success). */
+static int
+exfile_close(void *cookie)
+{
+ exfile_t *xf = (exfile_t *)cookie;
+ if (xf) {
+ if (xf->fd >= 0)
+ close(xf->fd);
+ free(xf->prefix_len_arr);
+ if (xf->ex_free)
+ xf->ex_free(xf->ex_arr);
+ free(xf);
+ }
+ return 0;
+}
+
+static const cookie_io_functions_t cookie_io_funcs = {
+ .read = exfile_read,
+ .write = exfile_write,
+ .seek = exfile_seek,
+ .close = exfile_close,
+};
+
+static FILE *
+exfile_open(int fd, const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ /* Argument sanity check. */
+ if (!(ex_arr && ex_count && (fd >= 0 || path) && (fd < 0 || !path)))
+ return NULL;
+
+ /* Validate mode argument. */
+ exfile_mode_t mode = EXFILE_MODE_MAX;
+ int i;
+ for (i = 0; i < arraysize(fopen_mode_to_mode); i++)
+ if (!strcmp(fopen_mode_to_mode[i].fopen_mode, fopen_mode)) {
+ mode = fopen_mode_to_mode[i].mode;
+ break;
+ }
+ if (mode == EXFILE_MODE_MAX)
+ return NULL;
+
+ /* Open the underlying file, if not already provided. */
+ int do_close_fd = FALSE;
+ if (fd < 0) {
+ if ((fd = open(path, mode_to_open_flags[mode])) < 0)
+ return NULL;
+ do_close_fd = TRUE;
+ }
+
+ /* Allocate memory and open file streams, for both the underlying file and
+ * the handle returned to the caller. */
+ exfile_t *xf = NULL;
+ prefix_len_t *prefix_len_arr = NULL;
+ FILE *handle = NULL;
+ if (!((xf = (exfile_t *)calloc(1, sizeof(exfile_t))) &&
+ (prefix_len_arr =
+ (prefix_len_t *)malloc(sizeof(prefix_len_t) * ex_count)) &&
+ (handle = fopencookie(xf, fopen_mode, cookie_io_funcs)))) {
+ /* If a file was opened above, close it. */
+ if (do_close_fd)
+ close(fd);
+ if (xf)
+ xf->fd = -1; /* invalidate prior to calling exfile_close() */
+
+ free(prefix_len_arr);
+ if (handle)
+ fclose(handle); /* will call exfile_close already */
+ else
+ exfile_close(xf);
+ return NULL;
+ }
+
+ /* Compute the prefix lengths. */
+ size_t prefix_len = 0;
+ for (i = 0; i < ex_count; i++) {
+ prefix_len_arr[i].prec = prefix_len;
+ prefix_len += ex_arr[i].len;
+ prefix_len_arr[i].total = prefix_len;
+ }
+
+ /* Configure control object, including physical/logical file position. */
+ xf->fd = fd;
+ xf->ex_count = ex_count;
+ xf->ex_arr = ex_arr;
+ xf->prefix_len_arr = prefix_len_arr;
+ xf->ex_free = ex_free;
+ xf->total_ex_len = prefix_len_arr[ex_count - 1].total;
+ xf->curr_file_pos = lseek(fd, 0, SEEK_CUR);
+ xf->curr_ex_idx = 0;
+ xf->curr_ex_pos = 0;
+ xf->curr_pos = 0;
+
+ /* Return the external stream handle. */
+ return handle;
+}
+
+FILE *
+exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ return exfile_open(-1, path, fopen_mode, ex_arr, ex_count, ex_free);
+}
+
+FILE *
+exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *))
+{
+ return exfile_open(fd, NULL, fopen_mode, ex_arr, ex_count, ex_free);
+}
diff --git a/exfile.h b/exfile.h
new file mode 100644
index 0000000..a804456
--- /dev/null
+++ b/exfile.h
@@ -0,0 +1,49 @@
+#ifndef __EXFILE_H
+#define __EXFILE_H
+
+#include <stdio.h>
+
+/*
+ * Extent files.
+ *
+ * This modules provides a familiar interface for handling files through an
+ * indirection layer of extents, which are contiguous chunks of variable length
+ * at arbitrary offsets within a file. Once an extent file handle is obtained,
+ * users may read, write and seek as they do with ordinary files, having the I/O
+ * with the underlying file done for them by the extent file implementation. The
+ * implementation supports "sparse extents", which are assumed to contain zeros
+ * but otherwise have no actual representation in the underlying file; these are
+ * denoted by negative offset values.
+ *
+ * Unlike ordinary files, the size of an extent file is fixed; it is not
+ * truncated on open, nor is writing past the extent span allowed. Also, writing
+ * to a sparse extent has no effect and will not raise an error.
+ */
+
+
+/* An extent, defined by an offset and a length. */
+typedef struct {
+ off_t off; /* the extent offset; negative indicates a sparse extent */
+ size_t len; /* the extent length */
+} ex_t;
+
+
+/* Opens a file |path| with use mode |fopen_mode| for use with an array of
+ * extents |ex_arr| of length |ex_count|. The mode string can be either of "r"
+ * (read-only), "w" (write-only) or "r+"/"w+" (read-write); the underlying file
+ * is neither created (if not present) nor truncated (if present) when opened
+ * for writing. The function |ex_free|, if not NULL, will be called to
+ * deallocate the extent array once the file object is closed. Returns a FILE
+ * pointer that can be used with ordinary stream functions (e.g. fread), or
+ * NULL if opening the file has failed. */
+FILE *exfile_fopen(const char *path, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *));
+
+/* Associates an extent file stream with an already open file descriptor |fd|.
+ * The |fopen_mode| argument is as decribed above and must be compatible with
+ * the mode of |fd|. All other arguments, behaviors and return values are as
+ * those of exfile_fopen (see above). */
+FILE *exfile_fdopen(int fd, const char *fopen_mode, ex_t *ex_arr,
+ size_t ex_count, void (*ex_free)(void *));
+
+#endif /* __EXFILE_H */
diff --git a/extents.c b/extents.c
new file mode 100644
index 0000000..8e1ec19
--- /dev/null
+++ b/extents.c
@@ -0,0 +1,126 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "extents.h"
+
+
+#define TRUE 1
+#define FALSE 0
+
+/* Minimum/maximum values for arbitrary integer types. */
+#define UNSIGNED_INT_MAX(t) (~((t)0))
+#define SIGNED_INT_MAX(t) ((t)((uintmax_t)UNSIGNED_INT_MAX(t) >> 1))
+#define MAX(a,b) ((a) > (b) ? (a) : (b))
+#define INT_TYPE_MAX(t) MAX(UNSIGNED_INT_MAX(t), SIGNED_INT_MAX(t))
+
+/* The maximum accepted value for a given integer type when parsed as a signed
+ * long long integer. This is defined to be the smaller of the maximum value
+ * that can be represented by this type and LLONG_MAX. This bound allows us to
+ * properly check that parsed values do not exceed the capacity of their
+ * intended store, regardless of how its size relates to that of a signed long
+ * long integer. Note: this may mean that we are losing the most significant
+ * bit of an unsigned 64-bit field (e.g. size_t on some platforms), however
+ * still permitting values up to 2^62, which is more than enough for all
+ * practical purposes. */
+#define LLONG_MAX_BOUND(s) \
+ ((uintmax_t)(s) < (uintmax_t)LLONG_MAX ? (long long)(s) : LLONG_MAX)
+#define MAX_ALLOWED(t) LLONG_MAX_BOUND(INT_TYPE_MAX(t))
+
+/* Get the type of a struct field. */
+#define FIELD_TYPE(t, f) typeof(((t *)0)->f)
+
+
+/* Reads a long long integer from |s| into |*val_p|. Returns a pointer to the
+ * character immediately following the specified |delim|, unless (a) parsing
+ * failed (overflow or no valid digits); (b) the read value is less than
+ * |min_val| or greater than |max_val|; (c) the delimiter character is not
+ * |delim|, or the string ends although |may_end| is false. In any of these
+ * cases, returns NULL. */
+const char *
+read_llong(const char *s, long long *val_p, long long min_val,
+ long long max_val, char delim, int may_end)
+{
+ assert(val_p);
+ const char *next_s;
+ errno = 0;
+ long long val = strtoll(s, (char **)&next_s, 10);
+ if (((val == LLONG_MAX || val == LLONG_MIN) && errno == ERANGE) ||
+ next_s == s || val < min_val || val > max_val ||
+ (*next_s ? *next_s != delim : !may_end))
+ return NULL; /* bad value or delimiter */
+ *val_p = val;
+ if (*next_s)
+ next_s++; /* skip delimeter */
+ return next_s;
+}
+
+
+/* Reads a comma-separated list of "offset:length" extents from |ex_str|. If
+ * |ex_arr| is NULL, then |ex_count| is ignored and it attempts to parse valid
+ * extents until the end of the string is reached. Otherwise, stores up to
+ * |ex_count| extents into |ex_arr|, which must be of at least this size.
+ * Returns the number of correctly parsed extents, or -1 if a malformed extent
+ * was found. */
+static ssize_t
+extents_read(const char *ex_str, ex_t *ex_arr, size_t ex_count)
+{
+ size_t i;
+ size_t last_i = ex_count - 1;
+ if (!ex_arr) {
+ ex_count = SIZE_MAX;
+ last_i = 0;
+ }
+ for (i = 0; *ex_str && i < ex_count; i++) {
+ long long raw_off = 0, raw_len = 0;
+ if (!((ex_str = read_llong(ex_str, &raw_off, -1,
+ MAX_ALLOWED(FIELD_TYPE(ex_t, off)),
+ ':', FALSE)) &&
+ (ex_str = read_llong(ex_str, &raw_len, 1,
+ MAX_ALLOWED(FIELD_TYPE(ex_t, len)),
+ ',', i >= last_i))))
+ return -1; /* parsing error */
+ if (ex_arr) {
+ ex_arr[i].off = raw_off;
+ ex_arr[i].len = raw_len;
+ }
+ }
+ return i;
+}
+
+
+ex_t *
+extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p)
+{
+ /* Sanity checks: a string must be provided; if an array is provided, an
+ * array count must be given as well. */
+ if (!ex_str || (ex_arr && !ex_count_p))
+ return NULL;
+
+ /* Parse string and count extents. */
+ ssize_t ret = extents_read(ex_str, NULL, 0);
+ if (ret < 0)
+ return NULL; /* parsing error */
+ size_t ex_count = (size_t)ret;
+
+ /* Input is good, commit to extent count. */
+ if (ex_count_p) {
+ size_t alloc_ex_count = *ex_count_p;
+ *ex_count_p = ex_count;
+ if (ex_arr && alloc_ex_count < ex_count)
+ return NULL; /* insufficient allocated space */
+ }
+ if (ex_count == 0)
+ return NULL; /* no extents, nothing to do */
+
+ /* Allocate extent array, if needed. */
+ if (!(ex_arr || (ex_arr = (ex_t *)malloc(sizeof(ex_t) * ex_count))))
+ return NULL; /* allocation failed */
+
+ /* Populate the extent array. */
+ extents_read(ex_str, ex_arr, ex_count);
+
+ return ex_arr;
+}
diff --git a/extents.h b/extents.h
new file mode 100644
index 0000000..7af1458
--- /dev/null
+++ b/extents.h
@@ -0,0 +1,22 @@
+#ifndef __EXTENTS_H
+#define __EXTENTS_H
+
+#include "exfile.h"
+
+
+/* Parses a string representation |ex_str| and populates an array |ex_arr|
+ * consisting of |*ex_count_p| extents. The string is expected to be a
+ * comma-separated list of pairs of the form "offset:length". An offset may be
+ * -1 or a non-negative integer; the former indicates a sparse extent
+ * (consisting of zeros). A length is a positive integer. If |ex_arr| is NULL,
+ * |*ex_count_p| is ignored and a new array is allocated based on the actual
+ * number of extents parsed. Upon success, returns a pointer to the populated
+ * array of extents and stores the actual number of extents at the location
+ * pointed to be |ex_count_p| (if provided). If the string parses correctly but
+ * the operation otherwise fails (allocation error, array too small), returns
+ * NULL but still store the number of parsed extents. Otherwise, returns NULL
+ * and does not store anything. If a new array was allocated, then it should be
+ * deallocated with free(3). */
+ex_t *extents_parse(const char *ex_str, ex_t *ex_arr, size_t *ex_count_p);
+
+#endif /* __EXTENTS_H */