Code drop from //branches/cupcake/...@124589
diff --git a/Android.mk b/Android.mk
new file mode 100644
index 0000000..a6fa250
--- /dev/null
+++ b/Android.mk
@@ -0,0 +1,35 @@
+LOCAL_PATH:= $(call my-dir)
+
+xdelta3_cflags := \
+	-O3 \
+	-fno-function-sections -fno-data-sections -fno-inline \
+	-DSUPPORT_ANDROID_PRELINK_TAGS \
+	-DGENERIC_ENCODE_TABLES=0 \
+	-DREGRESSION_TEST=0 \
+	-DSECONDARY_DJW=1 \
+	-DSECONDARY_FGK=1 \
+	-DXD3_DEBUG=0 \
+	-DXD3_MAIN=0 \
+	-DXD3_POSIX=1 \
+	-DXD3_USE_LARGEFILE64=1
+
+include $(CLEAR_VARS)
+
+LOCAL_LDLIBS += -lm
+LOCAL_CFLAGS += $(xdelta3_cflags)
+LOCAL_SRC_FILES := xdelta3.c
+LOCAL_C_INCLUDES:= $(LOCAL_PATH)/
+LOCAL_MODULE := libxdelta3
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+
+LOCAL_LDLIBS += -lm
+LOCAL_CFLAGS += $(xdelta3_cflags) -DXD3_MAIN=1
+LOCAL_SRC_FILES := xdelta3.c
+LOCAL_C_INCLUDES:= $(LOCAL_PATH)/
+LOCAL_MODULE := xdelta3
+
+include $(BUILD_HOST_EXECUTABLE)
+
+
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..5b6e7c6
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,340 @@
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+                       59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..bbc6d2a
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,310 @@
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
+
+UNAME = $(shell uname)
+CYGWIN = $(findstring CYGWIN, $(UNAME))
+DARWIN = $(findstring Darwin, $(UNAME))
+PYVER = 2.5
+
+ifeq ("$(CYGWIN)", "")
+SWIGTGT = xdelta3module.so
+PYTGT = build/lib.linux-i686-$(PYVER)/xdelta3main.so
+else
+SWIGTGT = xdelta3module.dll
+PYTGT = build/lib.cygwin-1.5.24-i686-$(PYVER)/xdelta3main.dll
+endif
+
+SOURCES = xdelta3-cfgs.h \
+	  xdelta3-decode.h \
+	  xdelta3-djw.h \
+	  xdelta3-fgk.h \
+	  xdelta3-hash.h \
+	  xdelta3-list.h \
+	  xdelta3-main.h \
+	  xdelta3-merge.h \
+	  xdelta3-python.h \
+	  xdelta3-second.h \
+	  xdelta3-test.h \
+	  xdelta3.c \
+	  xdelta3.h
+
+TARGETS = xdelta3-debug \
+	  xdelta3 \
+	  xdelta3-debug2 \
+	  xdelta3-debug3 \
+	  xdelta3.o \
+	  xdelta3_wrap.o \
+	  xdelta3-32 \
+	  xdelta3-64 \
+	  xdelta3-everything \
+	  xdelta3-Opg \
+	  xdelta3-64-O \
+	  xdelta3-Op \
+	  xdelta3-decoder xdelta3-decoder-nomain.o \
+	  xdelta3-nosec.o xdelta3-all.o xdelta3-fgk.o \
+	  xdelta3-noext xdelta3-tools \
+	  xdelta3-notools \
+	  xdelta3_wrap.c xdelta3.py \
+	  $(PYTGT) $(SWIGTGT)
+
+PYTHON = python
+
+WIXDIR = "/cygdrive/c/Program Files/wix2.0.4820"
+
+# -arch x86_64
+CFLAGS= -Wall -Wshadow -fno-builtin
+
+# $Format: "REL=$Xdelta3Version$" $
+REL=3.0u
+
+RELDIR = xdelta$(REL)
+
+EXTRA = Makefile COPYING linkxd3lib.c badcopy.c xdelta3.swig \
+	draft-korn-vcdiff.txt xdelta3.vcproj badcopy.vcproj \
+	xdelta3-regtest.py xdelta3-test.py setup.py \
+	examples/Makefile examples/small_page_test.c \
+	examples/README examples/encode_decode_test.c \
+	examples/compare_test.c examples/speed_test.c \
+	examples/test.h examples/checksum_test.cc \
+	xdelta3.py xdelta3_wrap.c xdelta3.wxs xdelta3.wxi \
+	testing/cmp.h testing/delta.h testing/file.h \
+	testing/modify.h testing/random.h testing/segment.h \
+	testing/sizes.h testing/test.h testing/Makefile \
+	README readme.txt
+
+SWIG_FLAGS = -DXD3_DEBUG=1 \
+	      -DEXTERNAL_COMPRESSION=0 \
+	      -DXD3_USE_LARGEFILE64=1 \
+	      -DGENERIC_ENCODE_TABLES=1 \
+	      -DSECONDARY_DJW=1 \
+	      -DVCDIFF_TOOLS=1 \
+	      -DSWIG_MODULE=1
+
+all: xdelta3-debug xdelta3
+
+all-py: all $(PYTGT) $(SWIGTGT)
+
+all-targets: $(TARGETS)
+
+all-targets-test: all-targets test
+
+pytgt: $(PYTGT)
+swigtgt: $(SWIGTGT)
+
+test:
+	./xdelta3-debug test
+
+tar:
+	tar --exclude ".svn" -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(EXTRA)
+	rm -rf /tmp/$(RELDIR)
+	mkdir /tmp/$(RELDIR)
+	(cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz)
+	tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR)
+	+tar -tzf ./$(RELDIR).tar.gz
+	rm -rf /tmp/$(RELDIR)
+
+zip:
+	tar --exclude ".svn" -czf /tmp/$(RELDIR)-tmp.tar.gz $(SOURCES) $(EXTRA)
+	rm -rf /tmp/$(RELDIR)
+	mkdir /tmp/$(RELDIR)
+	(cd /tmp/$(RELDIR) && tar -xzf ../$(RELDIR)-tmp.tar.gz)
+	tar -czf ./$(RELDIR).tar.gz -C /tmp $(RELDIR)
+	+zip -r $(RELDIR).zip /tmp/$(RELDIR)
+	rm -rf /tmp/$(RELDIR)
+
+clean:
+	rm -f $(TARGETS)
+	rm -rf build Debug Release core cifs* *.stackdump *.exe \
+		xdelta3.ncb xdelta3.suo xdelta3.sln xdelta3.wixobj xdelta3.msi
+
+wix: xdelta3.wxs xdelta3.wxi readme.txt Release\xdelta3.exe
+	$(WIXDIR)/candle.exe xdelta3.wxs -out xdelta3.wixobj
+	$(WIXDIR)/light.exe xdelta3.wixobj -out xdelta3.msi
+
+xdelta3: $(SOURCES)
+	$(CC) $(CFLAGS) -O3 xdelta3.c -lm -o xdelta3 \
+	      -DGENERIC_ENCODE_TABLES=0 \
+	      -DREGRESSION_TEST=1 \
+	      -DSECONDARY_DJW=1 \
+	      -DSECONDARY_FGK=1 \
+	      -DXD3_DEBUG=0 \
+	      -DXD3_MAIN=1 \
+	      -DXD3_POSIX=1 \
+	      -DXD3_USE_LARGEFILE64=1
+
+xdelta3-debug: $(SOURCES)
+	$(CC) -g $(CFLAGS) xdelta3.c -lm -o xdelta3-debug \
+		-DGENERIC_ENCODE_TABLES=1 \
+		-DREGRESSION_TEST=1 \
+		-DSECONDARY_DJW=1 \
+		-DSECONDARY_FGK=1 \
+		-DXD3_DEBUG=1 \
+		-DXD3_MAIN=1 \
+		-DXD3_STDIO=1 \
+		-DXD3_USE_LARGEFILE64=1
+
+xdelta3-32: $(SOURCES)
+	$(CC) -g $(CFLAGS) xdelta3.c -lm -o xdelta3-32 \
+	      -DXD3_DEBUG=1 \
+	      -DXD3_USE_LARGEFILE64=0 \
+	      -DREGRESSION_TEST=1 \
+	      -DSECONDARY_DJW=1 \
+	      -DSECONDARY_FGK=1 \
+	      -DXD3_MAIN=1 \
+	      -DXD3_POSIX=1
+
+xdelta3-debug2: $(SOURCES)
+	$(CC) -g $(CFLAGS) \
+		xdelta3.c -o xdelta3-debug2 \
+		-DXD3_DEBUG=2 \
+		-DXD3_MAIN=1 \
+		-DXD3_STDIO=1 \
+		-DXD3_USE_LARGEFILE64=1 \
+		-DGENERIC_ENCODE_TABLES=1 \
+		-DREGRESSION_TEST=1 \
+		-DSECONDARY_DJW=1 \
+		-DSECONDARY_FGK=1 \
+		-lm
+
+xdelta3-debug3: $(SOURCES)
+	$(CC) -g $(CFLAGS) xdelta3.c -o xdelta3-debug3 \
+		-DXD3_MAIN=1 \
+		-DGENERIC_ENCODE_TABLES=1 \
+		-DXD3_USE_LARGEFILE64=1 \
+		-DXD3_STDIO=1 \
+		-DREGRESSION_TEST=1 \
+		-DXD3_DEBUG=3 \
+		-DSECONDARY_DJW=1 \
+		-DSECONDARY_FGK=1 \
+		-lm
+
+$(PYTGT): $(SOURCES) setup.py
+	$(PYTHON) setup.py install --verbose --compile --force
+
+xdelta3_wrap.c xdelta3.py: xdelta3.swig
+	swig -python xdelta3.swig
+
+xdelta3.o: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) -c xdelta3.c $(SWIG_FLAGS) -o xdelta3.o
+
+xdelta3_wrap.o: xdelta3_wrap.c
+	$(CC) -O3 $(CFLAGS) $(SWIG_FLAGS) \
+	      -DHAVE_CONFIG_H \
+	      -I/usr/include/python$(PYVER) \
+	      -I/usr/lib/python$(PYVER)/config \
+	      -fpic \
+	      -c xdelta3_wrap.c
+
+xdelta3module.dll: xdelta3_wrap.o xdelta3.o
+	gcc -shared -Wl,--enable-auto-image-base \
+		xdelta3.o \
+		xdelta3_wrap.o \
+		-L/usr/lib/python$(PYVER)/config \
+		-lpython$(PYVER) \
+		-o xdelta3module.dll
+	cp $(SWIGTGT) /usr/lib/python$(PYVER)/site-packages
+
+ifeq ("$(DARWIN)", "")
+xdelta3module.so: xdelta3_wrap.o xdelta3.o
+	ld -shared xdelta3.o xdelta3_wrap.o \
+		-o xdelta3module.so \
+		/usr/lib/libpython$(PYVER).so \
+		-lc
+else
+xdelta3module.so: xdelta3_wrap.o xdelta3.o
+	gcc -Wl,-F. -bundle -undefined dynamic_lookup $(CFLAGS) \
+		xdelta3.o xdelta3_wrap.o -o xdelta3module.so
+endif
+
+xdelta3-decoder: $(SOURCES)
+	$(CC) -O3 -Wall -Wshadow xdelta3.c \
+	    -DXD3_ENCODER=0 -DXD3_MAIN=1 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
+	    -DXD3_STDIO=1 -DEXTERNAL_COMPRESSION=0 -DVCDIFF_TOOLS=0 \
+	    -o xdelta3-decoder
+
+xdelta3-decoder-nomain.o: $(SOURCES) linkxd3lib.c
+	$(CC) -O3 -Wall -Wshadow xdelta3.c linkxd3lib.c \
+	    -DXD3_ENCODER=0 -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 \
+	    -o xdelta3-decoder-nomain.o
+	strip xdelta3-decoder-nomain.o
+
+xdelta3-O++: $(SOURCES)
+	$(CXX) -g -O3 $(CFLAGS) xdelta3.c \
+		-o xdelta3-O++ \
+		-DXD3_MAIN=1 \
+		-DSECONDARY_DJW=1 \
+		-DREGRESSION_TEST=1 \
+		-lm
+
+xdelta3-Op: $(SOURCES)
+	$(CC) -g -O3 $(CFLAGS) xdelta3.c \
+		-o xdelta3-Op \
+		-DXD3_POSIX=1 \
+		-DXD3_MAIN=1 \
+		-DREGRESSION_TEST=1 \
+		-lm
+
+xdelta3-64: $(SOURCES)
+	$(CC) -g $(CFLAGS) \
+		xdelta3.c \
+		-o xdelta3-64 \
+		-DXD3_POSIX=1 \
+		-DXD3_MAIN=1 \
+		-DREGRESSION_TEST=1 \
+		-DXD3_DEBUG=0 \
+		-DXD3_USE_LARGEFILE64=1 \
+		-lm
+
+xdelta3-64-O: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) \
+		xdelta3.c \
+		-o xdelta3-64-O \
+		-DXD3_POSIX=1 \
+		-DXD3_MAIN=1 \
+		-DXD3_USE_LARGEFILE64=1 \
+		-lm
+
+xdelta3-everything: $(SOURCES)
+	$(CC) -g $(CFLAGS) \
+		xdelta3.c \
+		-o xdelta3-everything \
+		-DXD3_MAIN=1 \
+		-DVCDIFF_TOOLS=1 \
+		-DREGRESSION_TEST=1 \
+		-DSECONDARY_FGK=1 \
+		-DSECONDARY_DJW=1 \
+		-DGENERIC_ENCODE_TABLES=1 \
+		-DGENERIC_ENCODE_TABLES_COMPUTE=1 \
+		-DXD3_POSIX=1 \
+		-DEXTERNAL_COMPRESSION=1 \
+		-DXD3_DEBUG=1 \
+		-lm
+
+xdelta3-Opg: $(SOURCES)
+	$(CC) -pg -g -O3 $(CFLAGS) \
+		xdelta3.c \
+		-o xdelta3-Opg \
+		-DXD3_MAIN=1 \
+		-DSECONDARY_DJW=1 \
+		-DSECONDARY_FGK=1 \
+		-DXD3_POSIX=1 \
+		-DXD3_USE_LARGEFILE64=1 \
+		-DREGRESSION_TEST=1
+
+xdelta3-nosec.o: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=0 -DSECONDARY_DJW=0 -o xdelta3-nosec.o
+
+xdelta3-all.o: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=1 -o xdelta3-all.o
+
+xdelta3-fgk.o: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) -c xdelta3.c -DSECONDARY_FGK=1 -DSECONDARY_DJW=0 -o xdelta3-fgk.o
+
+xdelta3-noext: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -DEXTERNAL_COMPRESSION=0 -o xdelta3-noext
+
+xdelta3-tools: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -o xdelta3-tools
+
+xdelta3-notools: $(SOURCES)
+	$(CC) -O3 $(CFLAGS) xdelta3.c -DXD3_MAIN=1 -DVCDIFF_TOOLS=0 -o xdelta3-notools
diff --git a/README b/README
new file mode 100644
index 0000000..be7c6ce
--- /dev/null
+++ b/README
@@ -0,0 +1,34 @@
+Xdelta 3.x readme.txt
+Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+<josh.macdonald@gmail.com>
+
+
+Thanks for downloading Xdelta!
+
+This directory contains the Xdelta3 command-line interface (CLI) and source
+distribution for VCDIFF differential compression, a.k.a. delta
+compression. The latest information and downloads are available here:
+
+  http://xdelta.org/
+  http://code.google.com/p/xdelta/
+
+The command-line syntax:
+
+  http://code.google.com/p/xdelta/wiki/CommandLineSyntax
+
+Run 'xdelta3 -h' for brief help.  Run 'xdelta3 test' for built-in tests.
+
+Sample commands (like gzip, -e means encode, -d means decode)
+
+  xdelta3 -9 -S djw -e -vfs OLD_FILE NEW_FILE DELTA_FILE
+  xdelta3 -d -vfs OLD_FILE DELTA_FILE DECODED_FILE
+
+File bug reports and browse open support issues here:
+
+  http://code.google.com/p/xdelta/issues/list
+
+The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++
+and Cygwin builds.  Xdelta3 is covered under the terms of the GPL, see
+COPYING.
+
+Commercial inquiries welcome, please contact <josh.macdonald@gmail.com>
diff --git a/README.android b/README.android
new file mode 100644
index 0000000..bbe0b31
--- /dev/null
+++ b/README.android
@@ -0,0 +1,7 @@
+The contents of this directory are the xdelta3.0u package, downloaded from
+
+   http://xdelta.googlecode.com/files/xdelta3.0u.tar.gz
+
+on 12 Oct 2008.  I added the Android.mk file and this README.android
+file to the directory; nothing else has been touched.
+
diff --git a/badcopy.c b/badcopy.c
new file mode 100644
index 0000000..03abc63
--- /dev/null
+++ b/badcopy.c
@@ -0,0 +1,158 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#define BUFSZ (1 << 22)
+
+#ifdef WIN32
+// whatever
+static 
+double drand48() {
+  double r = rand() / (double)RAND_MAX;
+  return r;
+}
+long lrand48() {
+	long l = 0;
+	int i;
+	for (i = 0; i < 32; i++) {
+		l = l ^ (l << 2) ^ (l << 1) ^ rand();
+	}
+	return l;
+}
+#endif
+
+#ifdef _WIN32
+#define XD3_WIN32 1
+#else
+#define XD3_POSIX 1
+#endif
+#define XD3_MAIN 1
+#define main notmain
+#define EXTERNAL_COMPRESSION 0
+#define XD3_USE_LARGEFILE64 1
+#include "xdelta3.c"
+#undef main
+
+
+double error_prob   = 0.0001;
+usize_t mean_change  = 100;
+xoff_t total_change = 0;
+xoff_t total_size   = 0;
+usize_t max_change   = 0;
+usize_t num_change   = 0;
+
+
+static usize_t
+edist (usize_t mean, usize_t max)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / drand48 ());
+  usize_t x = (usize_t) (mean_d * erand + 0.5);
+
+  return (x < max) ? (x > 0 ? x : 1) : max;
+}
+
+void modify (char *buf, usize_t size)
+{
+  usize_t bufpos = 0, j;
+  usize_t last_end = 0;
+
+  for (;; /* bufpos and j are incremented in the inner loop */)
+    {
+      /* The size of the next modification. */
+      usize_t next_size = edist (mean_change, 1 << 31);
+      /* The expected interval of such a change. */
+      double expect_interval = ((double) next_size * (1.0 - error_prob)) / error_prob;
+      /* The number of bytes until the next modification. */
+      usize_t next_mod  = edist ((usize_t)expect_interval, 1 << 31);
+
+      if (next_size + next_mod + bufpos > size) { break; }
+
+      if (max_change < next_size) { max_change = next_size; }
+
+      bufpos += next_mod;
+
+      fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", 
+		  total_size + (xoff_t)last_end, 
+		  total_size + (xoff_t)bufpos, 
+		  bufpos - last_end);
+      fprintf (stderr, "ADD:  %I64u-%I64u (%u) is change %u\n", 
+		  total_size + (xoff_t)bufpos, 
+		  total_size + (xoff_t)(bufpos + next_size),
+		  next_size, num_change);
+
+      total_change += next_size;
+      num_change   += 1;
+
+      for (j = 0; j < next_size; j += 1, bufpos += 1)
+	{
+	  buf[bufpos] = (char)(lrand48 () >> 3);
+	}
+
+      last_end = bufpos;
+    }
+
+  fprintf (stderr, "COPY: %I64u-%I64u (%u)\n", 
+	  total_size + last_end, 
+	  total_size + size, size - last_end);
+
+  total_size += size;
+}
+
+int main(int argc, char **argv)
+{
+  main_file inp, out;
+  char *buf = malloc(BUFSZ);
+  int c, ret;
+  main_file_init(&inp);
+  main_file_init(&out);
+  option_force = 1;
+  if (argc > 5)
+    {
+      fprintf (stderr, "usage: badcopy [byte_error_prob [mean_error_size]]\n");
+      return 1;
+    }
+
+  if (argc > 4) { mean_change = atoi (argv[4]); }
+  if (argc > 3) { error_prob  = atof (argv[3]); }
+  fprintf (stderr, "mean change = %u; error_prob = %0.10f\n", mean_change, error_prob);
+
+  if ((ret = main_file_open (&inp, argv[1], XO_READ)) != 0) {
+	  return 1;
+  }
+  if ((ret = main_file_open (&out, argv[2], XO_WRITE)) != 0) {
+	  return 1;
+  }
+
+  if (error_prob < 0.0 || error_prob > 1.0)
+    {
+      fprintf (stderr, "warning: error probability out of range\n");
+      return 1;
+    }
+
+  do
+    {
+		if ((ret = main_file_read (&inp, buf, BUFSZ, &c, "read failed")) != 0) {
+			return 1;
+		}
+
+        if (c == 0) { break; }
+
+        modify (buf, c);
+
+		if ((ret = main_file_write (&out, buf, c, "write failed")) != 0) {
+			return 1;
+		}
+    }
+  while (c == BUFSZ);
+
+  if ((ret = main_file_close (&out)))
+    {
+      return 1;
+    }
+
+  fprintf (stderr, "add_prob %f; %u adds; total_change %u of %u bytes; add percentage %f; max add size %u\n",
+	   error_prob, num_change, total_change, total_size, (double) total_change / (double) total_size, max_change);
+
+  return 0;
+}
diff --git a/badcopy.vcproj b/badcopy.vcproj
new file mode 100644
index 0000000..50683f6
--- /dev/null
+++ b/badcopy.vcproj
@@ -0,0 +1,218 @@
+<?xml version="1.0" encoding="Windows-1252"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="badcopy"
+	ProjectGUID="{FED2964C-7114-41AC-81EE-68A4D2B67503}"
+	RootNamespace="badcopy"
+	Keyword="Win32Proj"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			OutputDirectory="Debug"
+			IntermediateDirectory="Debug"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				Optimization="0"
+				PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE;"
+				MinimalRebuild="true"
+				BasicRuntimeChecks="3"
+				RuntimeLibrary="3"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="4"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			OutputDirectory="Release"
+			IntermediateDirectory="Release"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;"
+				RuntimeLibrary="2"
+				UsePrecompiledHeader="0"
+				WarningLevel="3"
+				Detect64BitPortabilityProblems="true"
+				DebugInformationFormat="3"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				LinkIncremental="2"
+				GenerateDebugInformation="true"
+				SubSystem="1"
+				OptimizeReferences="2"
+				EnableCOMDATFolding="2"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCWebDeploymentTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+			<File
+				RelativePath=".\releases\xdelta30h.ppc-osx.bin"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\badcopy.c"
+				>
+			</File>
+		</Filter>
+		<File
+			RelativePath=".\release\BuildLog.htm"
+			>
+		</File>
+		<File
+			RelativePath=".\debug\BuildLog.htm"
+			>
+		</File>
+		<File
+			RelativePath=".\www\xdelta3-api-guide.html"
+			>
+		</File>
+		<File
+			RelativePath=".\www\xdelta3-cmdline.html"
+			>
+		</File>
+		<File
+			RelativePath=".\www\xdelta3.html"
+			>
+		</File>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/draft-korn-vcdiff.txt b/draft-korn-vcdiff.txt
new file mode 100644
index 0000000..1487deb
--- /dev/null
+++ b/draft-korn-vcdiff.txt
@@ -0,0 +1,1322 @@
+                                                     David G. Korn, AT&T Labs
+				             Joshua P. MacDonald, UC Berkeley
+                                                 Jeffrey C. Mogul, Compaq WRL
+Internet-Draft                                       Kiem-Phong Vo, AT&T Labs
+Expires: 09 November 2002                                    09 November 2001
+
+
+        The VCDIFF Generic Differencing and Compression Data Format
+
+                         draft-korn-vcdiff-06.txt
+
+
+
+Status of this Memo
+
+    This document is an Internet-Draft and is in full conformance
+    with all provisions of Section 10 of RFC2026.
+
+    Internet-Drafts are working documents of the Internet Engineering
+    Task Force (IETF), its areas, and its working groups.  Note that
+    other groups may also distribute working documents as
+    Internet-Drafts.
+
+    Internet-Drafts are draft documents valid for a maximum of six
+    months and may be updated, replaced, or obsoleted by other
+    documents at any time.  It is inappropriate to use Internet-
+    Drafts as reference material or to cite them other than as
+    "work in progress."
+
+    The list of current Internet-Drafts can be accessed at
+    http://www.ietf.org/ietf/1id-abstracts.txt
+
+    The list of Internet-Draft Shadow Directories can be accessed at
+    http://www.ietf.org/shadow.html.
+
+
+Abstract
+
+    This memo describes a general, efficient and portable data format
+    suitable for encoding compressed and/or differencing data so that
+    they can be easily transported among computers.
+
+
+Table of Contents:
+
+    1.  EXECUTIVE SUMMARY ............................................  2
+    2.  CONVENTIONS ..................................................  3
+    3.  DELTA INSTRUCTIONS ...........................................  4
+    4.  DELTA FILE ORGANIZATION ......................................  5
+    5.  DELTA INSTRUCTION ENCODING ...................................  9
+    6.  DECODING A TARGET WINDOW ..................................... 14
+    7.  APPLICATION-DEFINED CODE TABLES .............................. 16
+    8.  PERFORMANCE .................................................. 16
+    9.  FURTHER ISSUES ............................................... 17
+   10.  SUMMARY ...................................................... 18
+   11.  ACKNOWLEDGEMENTS ............................................. 18
+   12.  SECURITY CONSIDERATIONS ...................................... 18
+   13.  SOURCE CODE AVAILABILITY ..................................... 18
+   14.  INTELLECTUAL PROPERTY RIGHTS ................................. 18
+   15.  IANA CONSIDERATIONS .......................................... 19
+   16.  REFERENCES ................................................... 19
+   17.  AUTHOR'S ADDRESS ............................................. 20
+
+
+1.  EXECUTIVE SUMMARY
+
+    Compression and differencing techniques can greatly improve storage
+    and transmission of files and file versions.  Since files are often
+    transported across machines with distinct architectures and performance
+    characteristics, such data should be encoded in a form that is portable
+    and can be decoded with little or no knowledge of the encoders.
+    This document describes Vcdiff, a compact portable encoding format
+    designed for these purposes.
+
+    Data differencing is the process of computing a compact and invertible
+    encoding of a "target file" given a "source file".  Data compression
+    is similar but without the use of source data.  The UNIX utilities diff,
+    compress, and gzip are well-known examples of data differencing and
+    compression tools.  For data differencing, the computed encoding is
+    called a "delta file", and, for data compression, it is called
+    a "compressed file".  Delta and compressed files are good for storage
+    and transmission as they are often smaller than the originals.
+
+    Data differencing and data compression are traditionally treated
+    as distinct types of data processing.  However, as shown in the Vdelta
+    technique by Korn and Vo [1], compression can be thought of as a special
+    case of differencing in which the source data is empty. The basic idea
+    is to unify the string parsing scheme used in the Lempel-Ziv'77 style
+    compressors [2], and the block-move technique of Tichy [3].  Loosely
+    speaking, this works as follows:
+
+        a. Concatenate source and target data.
+        b. Parse the data from left to right as in LZ'77 but
+	   make sure that a parsed segment starts the target data.
+        c. Start to output when reaching target data.
+
+    Parsing is based on string matching algorithms such as suffix trees [4]
+    or hashing with different time and space performance characteristics.
+    Vdelta uses a fast string matching algorithm that requires less memory
+    than other techniques [5,6].  However, even with this algorithm, the
+    memory requirement can still be prohibitive for large files.  A common
+    way to deal with memory limitation is to partition an input file into
+    chunks called "windows" and process them separately. Here, except for
+    unpublished work by Vo, little has been done on designing effective
+    windowing schemes. Current techniques, including Vdelta, simply use
+    source and target windows with corresponding addresses across source
+    and target files.
+
+    String matching and windowing algorithms have large influence on the
+    compression rate of delta and compressed files. However, it is desirable
+    to have a portable encoding format that is independent of such algorithms.
+    This enables construction of client-server applications in which a server
+    may serve clients with unknown computing characteristics.  Unfortunately,
+    all current differencing and compressing tools, including Vdelta, fall
+    short in this respect. Their storage formats are closely intertwined
+    with the implemented string matching and/or windowing algorithms.
+
+    The encoding format Vcdiff proposed here addresses the above issues.
+    Vcdiff achieves the below characteristics:
+
+	Output compactness:
+            The basic encoding format compactly represents compressed or delta
+	    files. Applications can further extend the basic encoding format
+	    with "secondary encoders" to achieve more compression.
+
+	Data portability:
+	    The basic encoding format is free from machine byte order and
+	    word size issues. This allows data to be encoded on one machine
+	    and decoded on a different machine with different architecture.
+
+    	Algorithm genericity:
+	    The decoding algorithm is independent from string matching and
+	    windowing algorithms. This allows competition among implementations
+	    of the encoder while keeping the same decoder.
+
+    	Decoding efficiency:
+	    Except for secondary encoder issues, the decoding algorithm runs
+	    in time proportional to the size of the target file and uses space
+	    proportional to the maximal window size.  Vcdiff differs from more
+	    conventional compressors in that it uses only byte-aligned
+	    data, thus avoiding bit-level operations, which improves
+	    decoding speed at the slight cost of compression efficiency.
+
+    The Vcdiff data format and the algorithms for decoding data shall be
+    described next.  Since Vcdiff treats compression as a special case of
+    differencing, we shall use the term "delta file" to indicate the
+    compressed output for both cases.
+
+
+2. CONVENTIONS
+
+    The basic data unit is a byte.  For portability, Vcdiff shall limit
+    a byte to its lower eight bits even on machines with larger bytes.
+    The bits in a byte are ordered from right to left so that the least
+    significant bit (LSB) has value 1, and the most significant bit (MSB),
+    has value 128.
+
+    For purposes of exposition in this document, we adopt the convention
+    that the LSB is numbered 0, and the MSB is numbered 7.  Bit numbers
+    never appear in the encoded format itself.
+
+    Vcdiff encodes unsigned integer values using a portable variable-sized
+    format (originally introduced in the Sfio library [7]). This encoding
+    treats an integer as a number in base 128. Then, each digit in this
+    representation is encoded in the lower seven bits of a byte. Except for
+    the least significant byte, other bytes have their most significant bit
+    turned on to indicate that there are still more digits in the encoding.
+    The two key properties of this integer encoding that are beneficial
+    to a data compression format are:
+
+	a. The encoding is portable among systems using 8-bit bytes, and
+        b. Small values are encoded compactly.
+
+    For example, consider the value 123456789 which can be represented with
+    four 7-bit digits whose values are 58, 111, 26, 21 in order from most
+    to least significant. Below is the 8-bit byte encoding of these digits.
+    Note that the MSBs of 58, 111 and 26 are on.
+
+                 +-------------------------------------------+
+                 | 10111010 | 11101111 | 10011010 | 00010101 |
+                 +-------------------------------------------+
+                   MSB+58     MSB+111    MSB+26     0+21
+
+
+    Henceforth, the terms "byte" and "integer" will refer to a byte and an
+    unsigned integer as described.
+
+
+    From time to time, algorithms are exhibited to clarify the descriptions
+    of parts of the Vcdiff format. On such occasions, the C language will be
+    used to make precise the algorithms.  The C code shown in this
+    document is meant for clarification only, and is not part of the
+    actual specification of the Vcdiff format.
+
+    In this specification, the key words "MUST", "MUST NOT",
+    "SHOULD", "SHOULD NOT", and "MAY" document are to be interpreted as
+    described in RFC2119 [12].
+
+
+3.  DELTA INSTRUCTIONS
+
+    A large target file is partitioned into non-overlapping sections
+    called "target windows". These target windows are processed separately
+    and sequentially based on their order in the target file.
+
+    A target window T of length t may be compared against some source data
+    segment S of length s. By construction, this source data segment S
+    comes either from the source file, if one is used, or from a part of
+    the target file earlier than T.  In this way, during decoding, S is
+    completely known when T is being decoded.
+
+    The choices of T, t, S and s are made by some window selection algorithm
+    which can greatly affect the size of the encoding. However, as seen later,
+    these choices are encoded so that no knowledge of the window selection
+    algorithm is needed during decoding.
+
+    Assume that S[j] represents the jth byte in S, and T[k] represents
+    the kth byte in T.  Then, for the delta instructions, we treat the data
+    windows S and T as substrings of a superstring U formed by concatenating
+    them like this:
+
+        S[0]S[1]...S[s-1]T[0]T[1]...T[t-1]
+
+    The "address" of a byte in S or T is referred to by its location in U.
+    For example, the address of T[k] is s+k.
+
+    The instructions to encode and direct the reconstruction of a target
+    window are called delta instructions. There are three types:
+
+	ADD: This instruction has two arguments, a size x and a sequence of
+	    x bytes to be copied.
+	COPY: This instruction has two arguments, a size x and an address p
+	    in the string U. The arguments specify the substring of U that
+	    must be copied. We shall assert that such a substring must be
+	    entirely contained in either S or T.
+	RUN: This instruction has two arguments, a size x and a byte b that
+	    will be repeated x times.
+
+    Below are example source and target windows and the delta instructions
+    that encode the target window in terms of the source window.
+
+        a b c d e f g h i j k l m n o p
+        a b c d w x y z e f g h e f g h e f g h e f g h z z z z
+
+        COPY  4, 0
+        ADD   4, w x y z
+        COPY  4, 4
+        COPY 12, 24
+	RUN   4, z
+
+
+    Thus, the first letter 'a' in the target window is at location 16
+    in the superstring. Note that the fourth instruction, "COPY 12, 24",
+    copies data from T itself since address 24 is position 8 in T.
+    This instruction also shows that it is fine to overlap the data to be
+    copied with the data being copied from as long as the latter starts
+    earlier. This enables efficient encoding of periodic sequences,
+    i.e., sequences with regularly repeated subsequences. The RUN instruction
+    is a compact way to encode a sequence repeating the same byte even though
+    such a sequence can be thought of as a periodic sequence with period 1.
+
+    To reconstruct the target window, one simply processes one delta
+    instruction at a time and copy the data either from the source window
+    or the being reconstructed target window based on the type of the
+    instruction and the associated address, if any.
+
+
+4.  DELTA FILE ORGANIZATION
+
+    A Vcdiff delta file starts with a Header section followed by a sequence
+    of Window sections. The Header section includes magic bytes to identify
+    the file type, and information concerning data processing beyond the
+    basic encoding format. The Window sections encode the target windows.
+
+    Below is the overall organization of a delta file. The indented items
+    refine the ones immediately above them. An item in square brackets may
+    or may not be present in the file depending on the information encoded
+    in the Indicator byte above it.
+
+        Header
+	    Header1                                  - byte
+	    Header2                                  - byte
+	    Header3                                  - byte
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+
+[@@@ Why is compressor ID not an integer? ]
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+that defining the [Secondary compressor ID] and the corresponding
+VCD_DECOMPRESS Hdr_Indicator bit in this draft has no real value.  An
+implementation of this specification won't be able to decode a VCDIFF
+encoded with this option if it doesn't know about any secondary
+compressors.  It seems that you should specify the bits related to
+secondary compressors once you have defined the first a secondary
+compressor.  I can imagine a secondary-compressor might want to supply
+extra information, such as a dictionary of some kind, in which case
+this speculative treatment wouldn't go far enough.]
+
+	    [Length of code table data]              - integer
+	    [Code table data]
+	      	Size of near cache                   - byte
+	        Size of same cache                   - byte
+	        Compressed code table data
+	Window1
+	    Win_Indicator                            - byte
+	    [Source segment size]                    - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+	        Length of the delta encoding         - integer
+	        The delta encoding
+	            Size of the target window        - integer
+	            Delta_Indicator                  - byte
+	            Length of data for ADDs and RUNs - integer
+	            Length of instructions and sizes - integer
+	            Length of addresses for COPYs    - integer
+	            Data section for ADDs and RUNs   - array of bytes
+	            Instructions and sizes section   - array of bytes
+	            Addresses section for COPYs      - array of bytes
+	Window2
+	...
+
+
+
+4.1 The Header Section
+
+    Each delta file starts with a header section organized as below.
+    Note the convention that square-brackets enclose optional items.
+
+	    Header1                                  - byte = 0xE6
+	    Header2                                  - byte = 0xD3
+	    Header3                                  - byte = 0xD4
+
+HMMM
+
+0xD6
+0xC3
+0xC4
+
+	    Header4                                  - byte
+	    Hdr_Indicator                            - byte
+	    [Secondary compressor ID]                - byte
+	    [Length of code table data]              - integer
+	    [Code table data]
+
+    The first three Header bytes are the ASCII characters 'V', 'C' and 'D'
+    with their most significant bits turned on (in hexadecimal, the values
+    are 0xE6, 0xD3, and 0xD4). The fourth Header byte is currently set to
+    zero. In the future, it might be used to indicate the version of Vcdiff.
+
+    The Hdr_Indicator byte shows if there are any initialization data
+    required to aid in the reconstruction of data in the Window sections.
+    This byte MAY have non-zero values for either, both, or neither of
+    the two bits VCD_DECOMPRESS and VCD_CODETABLE below:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_DECOMPRESS
+	                +---- VCD_CODETABLE
+
+    If bit 0 (VCD_DECOMPRESS) is non-zero, this indicates that a secondary
+    compressor may have been used to further compress certain parts of the
+    delta encoding data as described in Sections 4.3 and 6. In that case,
+    the ID of the secondary compressor is given next. If this bit is zero,
+    the compressor ID byte is not included.
+
+[@@@ If we aren't defining any secondary compressors yet, then it seems
+this bit has no real value yet..]
+
+    If bit 1 (VCD_CODETABLE) is non-zero, this indicates that an
+    application-defined code table is to be used for decoding the delta
+    instructions. This table itself is compressed.  The length of the data
+    comprising this compressed code table and the data follow next. Section 7
+    discusses application-defined code tables.  If this bit is zero, the code
+    table data length and the code table data are not included.
+
+    If both bits are set, then the compressor ID byte is included
+    before the code table data length and the code table data.
+
+
+4.2 The Format of a Window Section
+
+    Each Window section is organized as follows:
+
+	    Win_Indicator                            - byte
+	    [Source segment length]                  - integer
+	    [Source segment position]                - integer
+            The delta encoding of the target window
+
+
+    Below are the detail of the various items:
+
+[@@@ Here, I want to replace the Win_Indicator with a source-count,
+followed by source-count length/position pairs?]
+
+        Win_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	                ^ ^
+	                | |
+	                | +-- VCD_SOURCE
+	                +---- VCD_TARGET
+
+
+	    If bit 0 (VCD_SOURCE) is non-zero, this indicates that a segment
+            of data from the "source" file was used as the corresponding
+            source window of data to encode the target window. The decoder
+	    will use this same source data segment to decode the target window.
+
+	    If bit 1 (VCD_TARGET) is non-zero, this indicates that a segment
+            of data from the "target" file was used as the corresponding
+	    source window of data to encode the target window. As above, this
+	    same source data segment is used to decode the target window.
+
+	    The Win_Indicator byte MUST NOT have more than one of the bits
+	    set (non-zero).  It MAY have none of these bits set.
+
+	    If one of these bits is set, the byte is followed by two
+            integers to indicate respectively the length and position of
+            the source data segment in the relevant file.  If the
+            indicator byte is zero, the target window was compressed
+            by itself without comparing against another data segment,
+            and these two integers are not included.
+
+        The delta encoding of the target window:
+            This contains the delta encoding of the target window either
+            in terms of the source data segment (i.e., VCD_SOURCE
+            or VCD_TARGET was set) or by itself if no source window
+            is specified. This data format is discussed next.
+
+
+4.3 The Delta Encoding of a Target Window
+
+    The delta encoding of a target window is organized as follows:
+
+	Length of the delta encoding            - integer
+	The delta encoding
+	    Length of the target window         - integer
+	    Delta_Indicator                     - byte
+	    Length of data for ADDs and RUNs    - integer
+	    Length of instructions section      - integer
+	    Length of addresses for COPYs       - integer
+	    Data section for ADDs and RUNs      - array of bytes
+	    Instructions and sizes section      - array of bytes
+	    Addresses section for COPYs         - array of bytes
+
+
+	Length of the delta encoding:
+	    This integer gives the total number of remaining bytes that
+	    comprise data of the delta encoding for this target window.
+
+        The delta encoding:
+	    This contains the data representing the delta encoding which
+	    is described next.
+
+    	Length of the target window:
+	    This integer indicates the actual size of the target window
+            after decompression. A decoder can use this value to allocate
+            memory to store the uncompressed data.
+
+	Delta_Indicator:
+	    This byte is a set of bits, as shown:
+
+	    7 6 5 4 3 2 1 0
+	   +-+-+-+-+-+-+-+-+
+	   | | | | | | | | |
+	   +-+-+-+-+-+-+-+-+
+	              ^ ^ ^
+	              | | |
+	              | | +-- VCD_DATACOMP
+	              | +---- VCD_INSTCOMP
+	              +------ VCD_ADDRCOMP
+
+		VCD_DATACOMP:	bit value 1.
+		VCD_INSTCOMP:	bit value 2.
+		VCD_ADDRCOMP:	bit value 4.
+
+            As discussed, the delta encoding consists of COPY, ADD and RUN
+            instructions. The ADD and RUN instructions have accompanying
+            unmatched data (that is, data that does not specifically match
+            any data in the source window or in some earlier part of the
+            target window) and the COPY instructions have addresses of where
+	    the matches occur. OPTIONALLY, these types of data MAY be further
+	    compressed using a secondary compressor. Thus, Vcdiff separates
+            the encoding of the delta instructions into three parts:
+
+	        a. The unmatched data in the ADD and RUN instructions,
+	        b. The delta instructions and accompanying sizes, and
+                c. The addresses of the COPY instructions.
+
+            If the bit VCD_DECOMPRESS (Section 4.1) was on, each of these
+            sections may have been compressed using the specified secondary
+            compressor. The bit positions 0 (VCD_DATACOMP), 1 (VCD_INSTCOMP),
+            and 2 (VCD_ADDRCOMP) respectively indicate, if non-zero, that
+            the corresponding parts are compressed. Then, these parts MUST
+	    be decompressed before decoding the delta instructions.
+
+	Length of data for ADDs and RUNs:
+	    This is the length (in bytes) of the section of data storing
+            the unmatched data accompanying the ADD and RUN instructions.
+
+	Length of instructions section:
+	    This is the length (in bytes) of the delta instructions and
+            accompanying sizes.
+
+	Length of addresses for COPYs:
+	    This is the length (in bytes) of the section storing
+            the addresses of the COPY instructions.
+
+    	Data section for ADDs and RUNs:
+	    This sequence of bytes encodes the unmatched data for the ADD
+            and RUN instructions.
+
+	Instructions and sizes section:
+	    This sequence of bytes encodes the instructions and their sizes.
+
+	Addresses section for COPYs:
+	    This sequence of bytes encodes the addresses of the COPY
+	    instructions.
+
+
+5. DELTA INSTRUCTION ENCODING
+
+    The delta instructions described in Section 3 represent the results of
+    string matching. For many data differencing applications in which the
+    changes between source and target data are small, any straightforward
+    representation of these instructions would be adequate.  However, for
+    applications including data compression, it is important to encode
+    these instructions well to achieve good compression rates.  From our
+    experience, the following observations can be made:
+
+    a. The addresses in COPY instructions are locations of matches and
+       often occur close by or even exactly equal to one another. This is
+       because data in local regions are often replicated with minor changes.
+       In turn, this means that coding a newly matched address against some
+       set of recently matched addresses can be beneficial.
+
+    b. The matches are often short in length and separated by small amounts
+       of unmatched data. That is, the lengths of COPY and ADD instructions
+       are often small. This is particularly true of binary data such as
+       executable files or structured data such as HTML or XML. In such cases,
+       compression can be improved by combining the encoding of the sizes
+       and the instruction types as well as combining the encoding of adjacent
+       delta instructions with sufficiently small data sizes.
+
+    The below subsections discuss how the Vcdiff data format provides
+    mechanisms enabling encoders to use the above observations to improve
+    compression rates.
+
+
+5.1 Address Encoding Modes of COPY Instructions
+
+    As mentioned earlier, addresses of COPY instructions often occur close
+    to one another or are exactly equal. To take advantage of this phenomenon
+    and encode addresses of COPY instructions more efficiently, the Vcdiff
+    data format supports the use of two different types of address caches.
+    Both the encoder and decoder maintain these caches, so that decoder's
+    caches remain synchronized with the encoder's caches.
+
+    a. A "near" cache is an array with "s_near" slots, each containing an
+       address used for encoding addresses nearby to previously encoded
+       addresses (in the positive direction only).  The near cache also
+       maintains a "next_slot" index to the near cache.  New entries to the
+       near cache are always inserted in the next_slot index, which maintains
+       a circular buffer of the s_near most recent addresses.
+
+    b. A "same" cache is an array with "s_same" multiple of 256 slots, each
+       containing an address.  The same cache maintains a hash table of recent
+       addresses used for repeated encoding of the exact same address.
+
+
+    By default, the parameters s_near and s_same are respectively set to 4
+    and 3. An encoder MAY modify these values, but then it MUST encode the
+    new values in the encoding itself, as discussed in Section 7, so that
+    the decoder can properly set up its own caches.
+
+    At the start of processing a target window, an implementation
+    (encoder or decoder) initializes all of the slots in both caches
+    to zero.  The next_slot pointer of the near cache is set
+    to point to slot zero.
+
+    Each time a COPY instruction is processed by the encoder or
+    decoder, the implementation's caches are updated as follows, where
+    "addr" is the address in the COPY instruction.
+
+    a. The slot in the near cache referenced by the next_slot
+       index is set to addr.  The next_slot index is then incremented
+       modulo s_near.
+
+    b. The slot in the same cache whose index is addr%(s_same*256)
+       is set to addr. [We use the C notations of % for modulo and
+       * for multiplication.]
+
+
+5.2 Example code for maintaining caches
+
+    To make clear the above description, below are example cache data
+    structures and algorithms to initialize and update them:
+
+        typedef struct _cache_s
+        {
+	    int*  near;      /* array of size s_near        */
+            int   s_near;
+            int   next_slot; /* the circular index for near */
+            int*  same;      /* array of size s_same*256    */
+            int   s_same;
+        } Cache_t;
+
+        cache_init(Cache_t* ka)
+        {
+	    int   i;
+
+            ka->next_slot = 0;
+            for(i = 0; i < ka->s_near; ++i)
+                 ka->near[i] = 0;
+
+            for(i = 0; i < ka->s_same*256; ++i)
+                 ka->same[i] = 0;
+        }
+
+        cache_update(Cache_t* ka, int addr)
+        {
+	    if(ka->s_near > 0)
+            {   ka->near[ka->next_slot] = addr;
+                ka->next_slot = (ka->next_slot + 1) % ka->s_near;
+            }
+
+            if(ka->s_same > 0)
+                ka->same[addr % (ka->s_same*256)] = addr;
+        }
+
+
+5.3 Encoding of COPY instruction addresses
+
+    The address of a COPY instruction is encoded using different modes
+    depending on the type of cached address used, if any.
+
+    Let "addr" be the address of a COPY instruction to be decoded and "here"
+    be the current location in the target data (i.e., the start of the data
+    about to be encoded or decoded).  Let near[j] be the jth element in
+    the near cache, and same[k] be the kth element in the same cache.
+    Below are the possible address modes:
+
+	VCD_SELF: This mode has value 0. The address was encoded by itself
+            as an integer.
+
+	VCD_HERE: This mode has value 1. The address was encoded as
+	    the integer value "here - addr".
+
+	Near modes: The "near modes" are in the range [2,s_near+1]. Let m
+	    be the mode of the address encoding. The address was encoded
+	    as the integer value "addr - near[m-2]".
+
+	Same modes: The "same modes" are in the range
+	    [s_near+2,s_near+s_same+1]. Let m be the mode of the encoding.
+	    The address was encoded as a single byte b such that
+	    "addr == same[(m - (s_near+2))*256 + b]".
+
+
+5.3 Example code for encoding and decoding of COPY instruction addresses
+
+    We show example algorithms below to demonstrate use of address modes more
+    clearly. The encoder has freedom to choose address modes, the sample
+    addr_encode() algorithm merely shows one way of picking the address
+    mode. The decoding algorithm addr_decode() will uniquely decode addresses
+    regardless of the encoder's algorithm choice.
+
+    Note that the address caches are updated immediately after an address is
+    encoded or decoded. In this way, the decoder is always synchronized with
+    the encoder.
+
+        int addr_encode(Cache_t* ka, int addr, int here, int* mode)
+        {
+	    int  i, d, bestd, bestm;
+
+	    /* Attempt to find the address mode that yields the
+	     * smallest integer value for "d", the encoded address
+	     * value, thereby minimizing the encoded size of the
+	     * address. */
+
+            bestd = addr; bestm = VCD_SELF;      /* VCD_SELF == 0 */
+
+            if((d = here-addr) < bestd)
+                { bestd = d; bestm = VCD_HERE; } /* VCD_HERE == 1 */
+
+            for(i = 0; i < ka->s_near; ++i)
+                if((d = addr - ka->near[i]) >= 0 && d < bestd)
+                    { bestd = d; bestm = i+2; }
+
+            if(ka->s_same > 0 && ka->same[d = addr%(ka->s_same*256)] == addr)
+                { bestd = d%256; bestm = ka->s_near + 2 + d/256; }
+
+            cache_update(ka,addr);
+
+            *mode = bestm; /* this returns the address encoding mode */
+            return  bestd; /* this returns the encoded address       */
+        }
+
+    Note that the addr_encode() algorithm chooses the best address mode using a
+    local optimization, but that may not lead to the best encoding efficiency
+    because different modes lead to different instruction encodings, as    described below.
+
+    The functions addrint() and addrbyte() used in addr_decode() obtain from
+    the "Addresses section for COPYs" (Section 4.3) an integer or a byte,
+    respectively. These utilities will not be described here.  We simply
+    recall that an integer is represented as a compact variable-sized string
+    of bytes as described in Section 2 (i.e., base 128).
+
+        int addr_decode(Cache_t* ka, int here, int mode)
+        {   int  addr, m;
+
+            if(mode == VCD_SELF)
+                 addr = addrint();
+            else if(mode == VCD_HERE)
+                 addr = here - addrint();
+            else if((m = mode - 2) >= 0 && m < ka->s_near) /* near cache */
+                 addr = ka->near[m] + addrint();
+            else /* same cache */
+            {    m = mode - (2 + ka->s_near);
+                 addr = ka->same[m*256 + addrbyte()];
+            }
+
+            cache_update(ka, addr);
+
+            return addr;
+        }
+
+
+5.4 Instruction Codes
+
+    As noted, the data sizes associated with delta instructions are often
+    small. Thus, compression efficiency can be improved by combining the sizes
+    and instruction types in a single encoding, as well by combining certain
+    pairs of adjacent delta instructions. Effective choices of when to perform
+    such combinations depend on many factors including the data being processed
+    and the string matching algorithm in use. For example, if many COPY
+    instructions have the same data sizes, it may be worth to encode these
+    instructions more compactly than others.
+
+    The Vcdiff data format is designed so that a decoder does not need to be
+    aware of the choices made in encoding algorithms. This is achieved with the
+    notion of an "instruction code table" containing 256 entries. Each entry
+    defines either a single delta instruction or a pair of instructions that
+    have been combined.  Note that the code table itself only exists in main
+    memory, not in the delta file (unless using an application-defined code
+    table, described in Section 7). The encoded data simply includes the index
+    of each instruction and, since there are only 256 indices, each index
+    can be represented as a single byte.
+
+    Each instruction code entry contains six fields, each of which
+    is a single byte with unsigned value:
+
+            +-----------------------------------------------+
+	    | inst1 | size1 | mode1 | inst2 | size2 | mode2 |
+	    +-----------------------------------------------+
+
+@@@ could be more compact
+
+    Each triple (inst,size,mode) defines a delta instruction. The meanings
+    of these fields are as follows:
+
+    inst: An "inst" field can have one of the four values: NOOP (0), ADD (1),
+	RUN (2) or COPY (3) to indicate the instruction types. NOOP means
+	that no instruction is specified. In this case, both the corresponding
+	size and mode fields will be zero.
+
+    size: A "size" field is zero or positive. A value zero means that the
+	size associated with the instruction is encoded separately as
+	an integer in the "Instructions and sizes section" (Section 6).
+	A positive value for "size" defines the actual data size.
+	Note that since the size is restricted to a byte, the maximum
+	value for any instruction with size implicitly defined in the code
+	table is 255.
+
+    mode: A "mode" field is significant only when the associated delta
+	instruction is a COPY. It defines the mode used to encode the
+	associated addresses. For other instructions, this is always zero.
+
+
+5.5 The Code Table
+
+    Following the discussions on address modes and instruction code tables,
+    we define a "Code Table" to have the data below:
+
+	s_near: the size of the near cache,
+	s_same: the size of the same cache,
+	i_code: the 256-entry instruction code table.
+
+    Vcdiff itself defines a "default code table" in which s_near is 4
+    and s_same is 3. Thus, there are 9 address modes for a COPY instruction.
+    The first two are VCD_SELF (0) and VCD_HERE (1). Modes 2, 3, 4 and 5
+    are for addresses coded against the near cache. And, modes 6, 7  and 8
+    are for addresses coded against the same cache.
+
+    The default instruction code table is depicted below, in a compact
+    representation that we use only for descriptive purposes.  See section 7
+    for the specification of how an instruction code table is represented
+    in the Vcdiff encoding format.  In the depiction, a zero value for
+    size indicates that the size is separately coded. The mode of non-COPY
+    instructions is represented as 0 even though they are not used.
+
+
+         TYPE      SIZE     MODE    TYPE     SIZE     MODE     INDEX
+        ---------------------------------------------------------------
+     1.  RUN         0        0     NOOP       0        0        0
+     2.  ADD    0, [1,17]     0     NOOP       0        0      [1,18]
+     3.  COPY   0, [4,18]     0     NOOP       0        0     [19,34]
+     4.  COPY   0, [4,18]     1     NOOP       0        0     [35,50]
+     5.  COPY   0, [4,18]     2     NOOP       0        0     [51,66]
+     6.  COPY   0, [4,18]     3     NOOP       0        0     [67,82]
+     7.  COPY   0, [4,18]     4     NOOP       0        0     [83,98]
+     8.  COPY   0, [4,18]     5     NOOP       0        0     [99,114]
+     9.  COPY   0, [4,18]     6     NOOP       0        0    [115,130]
+    10.  COPY   0, [4,18]     7     NOOP       0        0    [131,146]
+    11.  COPY   0, [4,18]     8     NOOP       0        0    [147,162]
+    12.  ADD       [1,4]      0     COPY     [4,6]      0    [163,174]
+    13.  ADD       [1,4]      0     COPY     [4,6]      1    [175,186]
+    14.  ADD       [1,4]      0     COPY     [4,6]      2    [187,198]
+    15.  ADD       [1,4]      0     COPY     [4,6]      3    [199,210]
+    16.  ADD       [1,4]      0     COPY     [4,6]      4    [211,222]
+    17.  ADD       [1,4]      0     COPY     [4,6]      5    [223,234]
+    18.  ADD       [1,4]      0     COPY       4        6    [235,238]
+    19.  ADD       [1,4]      0     COPY       4        7    [239,242]
+    20.  ADD       [1,4]      0     COPY       4        8    [243,246]
+    21.  COPY        4      [0,8]   ADD        1        0    [247,255]
+        ---------------------------------------------------------------
+
+    In the above depiction, each numbered line represents one or more
+    entries in the actual instruction code table (recall that an entry in
+    the instruction code table may represent up to two combined delta
+    instructions.) The last column ("INDEX") shows which index value or
+    range of index values of the entries covered by that line. The notation
+    [i,j] means values from i through j, inclusive. The first 6 columns of
+    a line in the depiction describe the pairs of instructions used for
+    the corresponding index value(s).
+
+    If a line in the depiction includes a column entry using the [i,j]
+    notation, this means that the line is instantiated for each value
+    in the range from i to j, inclusive.  The notation "0, [i,j]" means
+    that the line is instantiated for the value 0 and for each value
+    in the range from i to j, inclusive.
+
+    If a line in the depiction includes more than one entry using the [i,j]
+    notation, implying a "nested loop" to convert the line to a range of
+    table entries, the first such [i,j] range specifies the outer loop,
+    and the second specifies the inner loop.
+
+    The below examples should make clear the above description:
+
+    Line 1 shows the single RUN instruction with index 0. As the size field
+    is 0, this RUN instruction always has its actual size encoded separately.
+
+    Line 2 shows the 18 single ADD instructions. The ADD instruction with
+    size field 0 (i.e., the actual size is coded separately) has index 1.
+    ADD instructions with sizes from 1 to 17 use code indices 2 to 18 and
+    their sizes are as given (so they will not be separately encoded.)
+
+    Following the single ADD instructions are the single COPY instructions
+    ordered by their address encoding modes. For example, line 11 shows the
+    COPY instructions with mode 8, i.e., the last of the same cache.
+    In this case, the COPY instruction with size field 0 has index 147.
+    Again, the actual size of this instruction will be coded separately.
+
+    Lines 12 to 21 show the pairs of instructions that are combined together.
+    For example, line 12 depicts the 12 entries in which an ADD instruction
+    is combined with an immediately following COPY instruction. The entries
+    with indices 163, 164, 165 represent the pairs in which the ADD
+    instructions all have size 1 while the COPY instructions has mode
+    0 (VCD_SELF) and sizes 4, 5 and 6 respectively.
+
+    The last line, line 21, shows the eight instruction pairs where the first
+    instruction is a COPY and the second is an ADD. In this case, all COPY
+    instructions have size 4 with mode ranging from 0 to 8 and all the ADD
+    instructions have size 1. Thus, the entry with largest index 255
+    combines a COPY instruction of size 4 and mode 8 with an ADD instruction
+    of size 1.
+
+    The choice of the minimum size 4 for COPY instructions in the default code
+    table was made from experiments that showed that excluding small matches
+    (less then 4 bytes long) improved the compression rates.
+
+
+6. DECODING A TARGET WINDOW
+
+    Section 4.3 discusses that the delta instructions and associated data
+    are encoded in three arrays of bytes:
+
+        Data section for ADDs and RUNs,
+        Instructions and sizes section, and
+        Addresses section for COPYs.
+
+
+    Further, these data sections may have been further compressed by some
+    secondary compressor. Assuming that any such compressed data has been
+    decompressed so that we now have three arrays:
+
+	inst: bytes coding the instructions and sizes.
+        data: unmatched data associated with ADDs and RUNs.
+	addr: bytes coding the addresses of COPYs.
+
+    These arrays are organized as follows:
+
+	inst:
+	    a sequence of (index, [size1], [size2]) tuples, where "index"
+            is an index into the instruction code table, and size1 and size2
+            are integers that MAY or MAY NOT be included in the tuple as
+            follows. The entry with the given "index" in the instruction
+            code table potentially defines two delta instructions. If the
+            first delta instruction is not a VCD_NOOP and its size is zero,
+            then size1 MUST be present. Otherwise, size1 MUST be omitted and
+            the size of the instruction (if it is not VCD_NOOP) is as defined
+            in the table. The presence or absence of size2 is defined
+            similarly with respect to the second delta instruction.
+
+	data:
+	    a sequence of data values, encoded as bytes.
+
+	addr:
+	    a sequence of address values. Addresses are normally encoded as
+            integers as described in Section 2 (i.e., base 128).
+	    Since the same cache emits addresses in the range [0,255],
+	    however, same cache addresses are always encoded as a
+	    single byte.
+
+    To summarize, each tuple in the "inst" array includes an index to some
+    entry in the instruction code table that determines:
+
+    a. Whether one or two instructions were encoded and their types.
+
+    b. If the instructions have their sizes encoded separately, these
+       sizes will follow, in order, in the tuple.
+
+    c. If the instructions have accompanying data, i.e., ADDs or RUNs,
+       their data will be in the array "data".
+
+    d. Similarly, if the instructions are COPYs, the coded addresses are
+       found in the array "addr".
+
+    The decoding procedure simply processes the arrays by reading one code
+    index at a time, looking up the corresponding instruction code entry,
+    then consuming the respective sizes, data and addresses following the
+    directions in this entry. In other words, the decoder maintains an implicit
+    next-element pointer for each array; "consuming" an instruction tuple,
+    data, or address value implies incrementing the associated pointer.
+
+    For example, if during the processing of the target window, the next
+    unconsumed tuple in the inst array has index value 19, then the first
+    instruction is a COPY, whose size is found as the immediately following
+    integer in the inst array.  Since the mode of this COPY instruction is
+    VCD_SELF, the corresponding address is found by consuming the next
+    integer in the addr array.  The data array is left intact. As the second
+    instruction for code index 19 is a NOOP, this tuple is finished.
+
+
+7. APPLICATION-DEFINED CODE TABLES
+
+    Although the default code table used in Vcdiff is good for general
+    purpose encoders, there are times when other code tables may perform
+    better. For example, to code a file with many identical segments of data,
+    it may be advantageous to have a COPY instruction with the specific size
+    of these data segments so that the instruction can be encoded in a single
+    byte. Such a special code table MUST then be encoded in the delta file
+    so that the decoder can reconstruct it before decoding the data.
+
+    Vcdiff allows an application-defined code table to be specified
+    in a delta file with the following data:
+
+	Size of near cache            - byte
+	Size of same cache            - byte
+	Compressed code table data
+
+    The "compressed code table data" encodes the delta between the default
+    code table (source) and the new code table (target) in the same manner as
+    described in Section 4.3 for encoding a target window in terms of a
+    source window. This delta is computed using the following steps:
+
+    a.  Convert the new instruction code table into a string, "code", of
+	1536 bytes using the below steps in order:
+
+        i. Add in order the 256 bytes representing the types of the first
+	   instructions in the instruction pairs.
+       ii. Add in order the 256 bytes representing the types of the second
+	   instructions in the instruction pairs.
+      iii. Add in order the 256 bytes representing the sizes of the first
+	   instructions in the instruction pairs.
+       iv. Add in order the 256 bytes representing the sizes of the second
+	   instructions in the instruction pairs.
+        v. Add in order the 256 bytes representing the modes of the first
+	   instructions in the instruction pairs.
+       vi. Add in order the 256 bytes representing the modes of the second
+	   instructions in the instruction pairs.
+
+    b.  Similarly, convert the default instruction code table into
+	a string "dflt".
+
+    c.  Treat the string "code" as a target window and "dflt" as the
+	corresponding source data and apply an encoding algorithm to
+	compute the delta encoding of "code" in terms of "dflt".
+	This computation MUST use the default code table for encoding
+	the delta instructions.
+
+    The decoder can then reverse the above steps to decode the compressed
+    table data using the method of Section 6, employing the default code
+    table, to generate the new code table.  Note that the decoder does not
+    need to know anything about the details of the encoding algorithm used
+    in step (c). The decoder is still able to decode the new code table
+    because the Vcdiff format is independent from the choice of encoding
+    algorithm, and because the encoder in step (c) uses the known, default
+    code table.
+
+
+8. PERFORMANCE
+
+    The encoding format is compact. For compression only, using the LZ-77
+    string parsing strategy and without any secondary compressors, the typical
+    compression rate is better than Unix compress and close to gzip.  For
+    differencing, the data format is better than all known methods in
+    terms of its stated goal, which is primarily decoding speed and
+    encoding efficiency.
+
+    We compare the performance of compress, gzip and Vcdiff using the
+    archives of three versions of the Gnu C compiler, gcc-2.95.1.tar,
+    gcc-2.95.2.tar and gcc-2.95.3.tar. The experiments were done on an
+    SGI-MIPS3, 400MHZ. Gzip was used at its default compression level.
+    Vcdiff timings were done using the Vcodex/Vcdiff software (Section 13).
+    As string and window matching typically dominates the computation during
+    compression, the Vcdiff compression times were directly due to the
+    algorithms used in the Vcodex/Vcdiff software. However, the decompression
+    times should be generic and representative of any good implementation
+    of the Vcdiff data format. Timing was done by running each program
+    three times and taking the average of the total cpu+system times.
+
+    Below are the different Vcdiff runs:
+
+	Vcdiff: vcdiff is used as compressor only.
+
+	Vcdiff-d: vcdiff is used as a differencer only. That is, it only
+		compares target data against source data.  Since the files
+		involved are large, they are broken into windows. In this
+		case, each target window starting at some file offset in
+		the target file is compared against a source window with
+		the same file offset (in the source file). The source
+		window is also slightly larger than the target window
+		to increase matching opportunities. The -d option also gives
+		a hint to the string matching algorithm of Vcdiff that
+		the two files are very similar with long stretches of matches.
+		The algorithm takes advantage of this to minimize its
+		processing of source data and save time.
+
+	Vcdiff-dc: This is similar to Vcdiff-d but vcdiff can also compare
+		target data against target data as applicable. Thus, vcdiff
+		both computes differences and compresses data. The windowing
+		algorithm is the same as above. However, the above hint is
+		recinded in this case.
+
+	Vcdiff-dcs: This is similar to Vcdiff-dc but the windowing algorithm
+		uses a content-based heuristic to select source data segments
+		that are more likely to match with a given target window.
+		Thus, the source data segment selected for a target window
+		often will not be aligned with the file offsets of this
+		target window.
+
+
+                gcc-2.95.1    gcc-2.95.2    compression   decompression
+    raw size      55746560      55797760
+    compress         -          19939390       13.85s	      7.09s
+    gzip             -          12973443       42.99s         5.35s
+    Vcdiff           -          15358786       20.04s         4.65s
+    Vcdiff-d         -            100971       10.93s         1.92s
+    Vcdiff-dc        -             97246       20.03s         1.84s
+    Vcdiff-dcs       -            256445       44.81s         1.84s
+
+		TABLE 1. Compressing gcc-2.95.2.tar given gcc-2.95.1
+
+
+    TABLE 1 shows the raw sizes of gcc-2.95.1.tar and gcc-2.95.2.tar and the
+    sizes of the compressed results. As a pure compressor, the compression
+    rate for Vcdiff is worse than gzip and better than compress. The last
+    three rows shows that when two file versions are very similar, differencing
+    can have dramatically good compression rates. Vcdiff-d and Vcdiff-dc use
+    the same simple window selection method but Vcdiff-dc also does compression
+    so its output is slightly smaller. Vcdiff-dcs uses a heuristic based on
+    data content to search for source data that likely will match a given target
+    window. Although it does a good job, the heuristic did not always find the
+    best matches which are given by the simple algorithm of Vcdiff-d.  As a
+    result, the output size is slightly larger. Note also that there is a large
+    cost in computing matching windows this way. Finally, the compression times
+    of Vcdiff-d is nearly half of that of Vcdiff-dc. It is tempting to conclude
+    that the compression feature causes the additional time in Vcdiff-dc
+    relative to Vcdiff-d.  However, this is not the case. The hint given to
+    the Vcdiff string matching algorithm that the two files are likely to
+    have very long stretches of matches helps the algorithm to minimize
+    processing of the "source data", thus saving half the time. However, as we
+    shall see below when this hint is wrong, the result is even longer time.
+
+
+                gcc-2.95.2    gcc-2.95.3    compression   decompression
+    raw size      55797760      55787520
+    compress         -          19939453       13.54s	      7.00s
+    gzip             -          12998097       42.63s         5.62s
+    Vcdiff           -          15371737       20.09s         4.74s
+    Vcdiff-d         -          26383849       71.41s         6.41s
+    Vcdiff-dc        -          14461203       42.48s         4.82s
+    Vcdiff-dcs       -           1248543       61.18s         1.99s
+
+		TABLE 2. Compressing gcc-2.95.3.tar given gcc-2.95.2
+
+
+    TABLE 2 shows the raw sizes of gcc-2.95.2.tar and gcc-2.95.3.tar and
+    the sizes of the compressed results. In this case, the tar file of
+    gcc-2.95.3 is rearranged in a way that makes the straightforward method
+    of matching file offsets for source and target windows fail. As a
+    result, Vcdiff-d performs rather dismally both in time and output size.
+    The large time for Vcdiff-d is directly due to fact that the string
+    matching algorithm has to work much harder to find matches when the hint
+    that two files have long matching stretches fails to hold. On the other
+    hand, Vcdiff-dc does both differencing and compression resulting in good
+    output size. Finally, the window searching heuristic used in Vcdiff-dcs is
+    effective in finding the right matching source windows for target windows
+    resulting a small output size. This shows why the data format needs to
+    have a way to specify matching windows to gain performance. Finally,
+    we note that the decoding times are always good regardless of how
+    the string matching or window searching algorithms perform.
+
+
+9. FURTHER ISSUES
+
+    This document does not address a few issues:
+
+    Secondary compressors:
+        As discussed in Section 4.3, certain sections in the delta encoding
+	of a window may be further compressed by a secondary compressor.
+	In our experience, the basic Vcdiff format is adequate for most
+	purposes so that secondary compressors are seldom needed. In
+        particular, for normal use of data differencing where the files to
+	be compared have long stretches of matches, much of the gain in
+	compression rate is already achieved by normal string matching.
+	Thus, the use of secondary compressors is seldom needed in this case.
+	However, for applications beyond differencing of such nearly identical
+	files, secondary compressors may be needed to achieve maximal
+	compressed results.
+
+        Therefore, we recommend to leave the Vcdiff data format defined
+	as in this document so that the use of secondary compressors
+ 	can be implemented when they become needed in the future.
+        The formats of the compressed data via such compressors or any
+	compressors that may be defined in the future are left open to
+	their implementations.  These could include Huffman encoding,
+	arithmetic encoding, and splay tree encoding [8,9].
+
+    Large file system vs. small file system:
+	As discussed in Section 4, a target window in a large file may be
+	compared against some source window in another file or in the same
+	file (from some earlier part). In that case, the file offset of the
+	source window is specified as a variable-sized integer in the delta
+	encoding. There is a possibility that the encoding was computed on
+	a system supporting much larger files than in a system where
+	the data may be decoded (e.g., 64-bit file systems vs. 32-bit file
+	systems). In that case, some target data may not be recoverable.
+	This problem could afflict any compression format, and ought
+	to be resolved with a generic negotiation mechanism in the
+	appropriate protocol(s).
+
+
+10.  SUMMARY
+
+    We have described Vcdiff, a general and portable encoding format for
+    compression and differencing. The format is good in that it allows
+    implementing a decoder without knowledge of the encoders. Further,
+    ignoring the use of secondary compressors not defined within the format,
+    the decoding algorithms runs in linear time and requires working space
+    proportional to window sizes.
+
+
+
+11. ACKNOWLEDGEMENTS
+
+    Thanks are due to Balachander Krishnamurthy, Jeff Mogul and Arthur Van Hoff
+    who provided much encouragement to publicize Vcdiff. In particular, Jeff
+    helped clarifying the description of the data format presented here.
+
+
+
+12. SECURITY CONSIDERATIONS
+
+    Vcdiff only provides a format to encode compressed and differenced data.
+    It does not address any issues concerning how such data are, in fact,
+    stored in a given file system or the run-time memory of a computer system.
+    Therefore, we do not anticipate any security issues with respect to Vcdiff.
+
+
+
+13. SOURCE CODE AVAILABILITY
+
+    Vcdiff is implemented as a data transforming method in Phong Vo's
+    Vcodex library. AT&T Corp. has made the source code for Vcodex available
+    for anyone to use to transmit data via HTTP/1.1 Delta Encoding [10,11].
+    The source code and according license is accessible at the below URL:
+
+          http://www.research.att.com/sw/tools
+
+
+14. INTELLECTUAL PROPERTY RIGHTS
+
+   The IETF has been notified of intellectual property rights claimed in
+   regard to some or all of the specification contained in this
+   document.  For more information consult the online list of claimed
+   rights, at <http://www.ietf.org/ipr.html>.
+
+   The IETF takes no position regarding the validity or scope of any
+   intellectual property or other rights that might be claimed to
+   pertain to the implementation or use of the technology described in
+   this document or the extent to which any license under such rights
+   might or might not be available; neither does it represent that it
+   has made any effort to identify any such rights.  Information on the
+   IETF's procedures with respect to rights in standards-track and
+   standards-related documentation can be found in BCP-11.  Copies of
+   claims of rights made available for publication and any assurances of
+   licenses to be made available, or the result of an attempt made to
+   obtain a general license or permission for the use of such
+   proprietary rights by implementors or users of this specification can
+   be obtained from the IETF Secretariat.
+
+
+
+15. IANA CONSIDERATIONS
+
+   The Internet Assigned Numbers Authority (IANA) administers the number
+   space for Secondary Compressor ID values.  Values and their meaning
+   must be documented in an RFC or other peer-reviewed, permanent, and
+   readily available reference, in sufficient detail so that
+   interoperability between independent implementations is possible.
+   Subject to these constraints, name assignments are First Come, First
+   Served - see RFC2434 [13].  Legal ID values are in the range 1..255.
+
+   This document does not define any values in this number space.
+
+
+16. REFERENCES
+
+    [1] D.G. Korn and K.P. Vo, Vdelta: Differencing and Compression,
+        Practical Reusable Unix Software, Editor B. Krishnamurthy,
+        John Wiley & Sons, Inc., 1995.
+
+    [2] J. Ziv and A. Lempel, A Universal Algorithm for Sequential Data
+        Compression, IEEE Trans. on Information Theory, 23(3):337-343, 1977.
+
+    [3] W. Tichy, The String-to-String Correction Problem with Block Moves,
+        ACM Transactions on Computer Systems, 2(4):309-321, November 1984.
+
+    [4] E.M. McCreight, A Space-Economical Suffix Tree Construction
+        Algorithm, Journal of the ACM, 23:262-272, 1976.
+
+    [5] J.J. Hunt, K.P. Vo, W. Tichy, An Empirical Study of Delta Algorithms,
+        IEEE Software Configuration and Maintenance Workshop, 1996.
+
+    [6] J.J. Hunt, K.P. Vo, W. Tichy, Delta Algorithms: An Empirical Analysis,
+        ACM Trans. on Software Engineering and Methodology, 7:192-214, 1998.
+
+    [7] D.G. Korn, K.P. Vo, Sfio: A buffered I/O Library,
+        Proc. of the Summer '91 Usenix Conference, 1991.
+
+    [8] D. W. Jones, Application of Splay Trees to Data Compression,
+        CACM, 31(8):996:1007.
+
+    [9] M. Nelson, J. Gailly, The Data Compression Book, ISBN 1-55851-434-1,
+        M&T Books, New York, NY, 1995.
+
+   [10] J.C. Mogul, F. Douglis, A. Feldmann, and B. Krishnamurthy,
+        Potential benefits of delta encoding and data compression for HTTP,
+        SIGCOMM '97, Cannes, France, 1997.
+
+   [11] J.C. Mogul, B. Krishnamurthy, F. Douglis, A. Feldmann,
+        Y. Goland, and A. Van Hoff, Delta Encoding in HTTP,
+        IETF, draft-mogul-http-delta-10, 2001.
+
+   [12] S. Bradner, Key words for use in RFCs to Indicate Requirement Levels,
+        RFC 2119, March 1997.
+
+   [13] T. Narten, H. Alvestrand, Guidelines for Writing an IANA
+        Considerations Section in RFCs, RFC2434, October 1998.
+
+
+
+17. AUTHOR'S ADDRESS
+
+    Kiem-Phong Vo (main contact)
+    AT&T Labs, Room D223
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: kpv@research.att.com
+    Phone: 1 973 360 8630
+
+    David G. Korn
+    AT&T Labs, Room D237
+    180 Park Avenue
+    Florham Park, NJ 07932
+    Email: dgk@research.att.com
+    Phone: 1 973 360 8602
+
+    Jeffrey C. Mogul
+    Western Research Laboratory
+    Compaq Computer Corporation
+    250 University Avenue
+    Palo Alto, California, 94305, U.S.A.
+    Email: JeffMogul@acm.org
+    Phone: 1 650 617 3304 (email preferred)
+
+    Joshua P. MacDonald
+    Computer Science Division
+    University of California, Berkeley
+    345 Soda Hall
+    Berkeley, CA 94720
+    Email: jmacd@cs.berkeley.edu
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100755
index 0000000..b21ebda
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,32 @@
+CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1 -DNDEBUG=0
+#CFLAGS = -O3 -Wall -I.. -DXD3_DEBUG=0 -fno-builtin -DNDEBUG=1
+# -pg
+
+SOURCES = small_page_test.c encode_decode_test.c speed_test.c
+
+DEPS = ../*.h ../*.c *.h
+
+TARGETS = small_page_test encode_decode_test speed_test32 speed_test64 compare_test checksum_test
+
+all: $(TARGETS)
+
+small_page_test: small_page_test.c $(DEPS)
+	$(CC) $(CFLAGS) small_page_test.c -o small_page_test -DXD3_USE_LARGEFILE64=0 -DSECONDARY_DJW=1
+
+encode_decode_test: encode_decode_test.c $(DEPS)
+	$(CC) $(CFLAGS) encode_decode_test.c -o encode_decode_test
+
+speed_test32: speed_test.c $(DEPS)
+	$(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=0 speed_test.c -o speed_test32
+
+speed_test64: speed_test.c $(DEPS)
+	$(CC) $(CFLAGS) -DXD3_USE_LARGEFILE64=1 speed_test.c -o speed_test64
+
+compare_test: compare_test.c
+	$(CC) $(CFLAGS) compare_test.c -o compare_test
+
+checksum_test: checksum_test.cc
+	$(CXX) $(CFLAGS) checksum_test.cc -o checksum_test
+
+clean:
+	rm -f *.exe *.stackdump $(TARGETS)
diff --git a/examples/README b/examples/README
new file mode 100644
index 0000000..60840bf
--- /dev/null
+++ b/examples/README
@@ -0,0 +1,8 @@
+Files in this directory demonstrate how to use the Xdelta3 API.  Copyrights
+are held by the respective authors and these files are not covered by the GPL.
+
+small_page_test.c -- how to use xdelta3 in an environment such as the kernel
+for small pages with little memory
+
+encode_decode_test.c -- how to use xdelta3 to process (encode/decode) data in
+multiple windows with the non-blocking API
diff --git a/examples/checksum_test.cc b/examples/checksum_test.cc
new file mode 100644
index 0000000..ab3ef6c
--- /dev/null
+++ b/examples/checksum_test.cc
@@ -0,0 +1,731 @@
+/* Copyright (C) 2007 Josh MacDonald */
+
+extern "C" {
+#include "test.h"
+}
+
+#include <list>
+#include <vector>
+#include <map>
+#include <algorithm>
+
+using std::list;
+using std::map;
+using std::vector;
+
+// MLCG parameters
+// a, a*
+uint32_t good_32bit_values[] = {
+    1597334677U, // ...
+    741103597U, 887987685U,
+};
+
+// a, a*
+uint64_t good_64bit_values[] = {
+    1181783497276652981ULL, 4292484099903637661ULL,
+    7664345821815920749ULL, // ...
+};
+
+struct true_type { };
+struct false_type { };
+
+template <typename Word>
+int bitsof();
+
+template<>
+int bitsof<uint32_t>() {
+    return 32;
+}
+
+template<>
+int bitsof<uint64_t>() {
+    return 64;
+}
+
+struct plain {
+    int operator()(const uint8_t &c) {
+	return c;
+    }
+};
+
+template <typename Word>
+struct hhash {  // take "h" of the high-bits as a hash value for this
+		// checksum, which are the most "distant" in terms of the
+		// spectral test for the rabin_karp MLCG.  For short windows,
+		// the high bits aren't enough, XOR "mask" worth of these in.
+    Word operator()(const Word& t, const int &h, const int &mask) {
+	return (t >> h) ^ (t & mask);
+    }
+};
+
+template <typename Word>
+Word good_word();
+
+template<>
+uint32_t good_word<uint32_t>() {
+    return good_32bit_values[0];
+}
+
+template<>
+uint64_t good_word<uint64_t>() {
+    return good_64bit_values[0];
+}
+
+// CLASSES
+
+#define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction
+#define MEMBER template <typename Word, \
+			 int CksumSize, \
+			 int CksumSkip, \
+			 typename Permute, \
+			 typename Hash, \
+                         int Compaction>
+
+MEMBER
+struct cksum_params {
+    typedef Word word_type;
+    typedef Permute permute_type;
+    typedef Hash hash_type;
+
+    enum { cksum_size = CksumSize,
+	   cksum_skip = CksumSkip,
+	   compaction = Compaction,
+    };
+};
+
+
+MEMBER
+struct rabin_karp {
+    typedef Word word_type;
+    typedef Permute permute_type;
+    typedef Hash hash_type;
+
+    enum { cksum_size = CksumSize,
+	   cksum_skip = CksumSkip, 
+	   compaction = Compaction,
+    };
+
+    // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
+    rabin_karp() {
+	multiplier = good_word<Word>();
+	powers = new Word[cksum_size];
+	powers[cksum_size - 1] = 1;
+	for (int i = cksum_size - 2; i >= 0; i--) {
+	    powers[i] = powers[i + 1] * multiplier;
+	}
+	product = powers[0] * multiplier;
+    }
+
+    ~rabin_karp() {
+	delete [] powers;
+    }
+
+    Word step(const uint8_t *ptr) {
+	Word h = 0;
+	for (int i = 0; i < cksum_size; i++) {
+	    h += permute_type()(ptr[i]) * powers[i];
+	}
+	return h;
+    }
+
+    Word state0(const uint8_t *ptr) {
+	incr_state = step(ptr);
+	return incr_state;
+    }
+
+    Word incr(const uint8_t *ptr) {
+	incr_state = multiplier * incr_state -
+	    product * permute_type()(ptr[-1]) +
+	    permute_type()(ptr[cksum_size - 1]);
+	return incr_state;
+    }
+
+    Word *powers;
+    Word  product;
+    Word  multiplier;
+    Word  incr_state;
+};
+
+MEMBER
+struct adler32_cksum {
+    typedef Word word_type;
+    typedef Permute permute_type;
+    typedef Hash hash_type;
+
+    enum { cksum_size = CksumSize,
+	   cksum_skip = CksumSkip, 
+	   compaction = Compaction,
+    };
+
+    Word step(const uint8_t *ptr) {
+	return xd3_lcksum (ptr, cksum_size);
+    }
+
+    Word state0(const uint8_t *ptr) {
+	incr_state = step(ptr);
+	return incr_state;
+    }
+
+    Word incr(const uint8_t *ptr) {
+	incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size);
+	return incr_state;
+    }
+
+    Word  incr_state;
+};
+
+// TESTS
+
+template <typename Word>
+struct file_stats {
+    typedef list<const uint8_t*> ptr_list;
+    typedef Word word_type;
+    typedef map<word_type, ptr_list> table_type;
+    typedef typename table_type::iterator table_iterator;
+    typedef typename ptr_list::iterator ptr_iterator;
+
+    int cksum_size;
+    int cksum_skip;
+    int unique;
+    int unique_values;
+    int count;
+    table_type table;
+
+    file_stats(int size, int skip)
+	: cksum_size(size),
+	  cksum_skip(skip),
+	  unique(0),
+	  unique_values(0),
+	  count(0) {
+    }
+
+    void reset() {
+	unique = 0;
+	unique_values = 0;
+	count = 0;
+	table.clear();
+    }
+
+    void update(const word_type &word, const uint8_t *ptr) {
+	table_iterator t_i = table.find(word);
+
+	count++;
+
+	if (t_i == table.end()) {
+	    table.insert(make_pair(word, ptr_list()));
+	}
+
+	ptr_list &pl = table[word];
+
+	for (ptr_iterator p_i = pl.begin();
+	     p_i != pl.end();
+	     ++p_i) {
+	    if (memcmp(*p_i, ptr, cksum_size) == 0) {
+		return;
+	    }
+	}
+
+	unique++;
+	pl.push_back(ptr);
+    }
+
+    void freeze() {
+	unique_values = table.size();
+	table.clear();
+    }
+};
+
+struct test_result_base;
+
+static vector<test_result_base*> all_tests;
+
+struct test_result_base {
+    virtual ~test_result_base() {
+    }
+    virtual void reset() = 0;
+    virtual void print() = 0;
+    virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
+    virtual void stat() = 0;
+    virtual int count() = 0;
+    virtual int dups() = 0;
+    virtual double uniqueness() = 0;
+    virtual double fullness() = 0;
+    virtual double collisions() = 0;
+    virtual double coverage() = 0;
+    virtual double compression() = 0;
+    virtual double time() = 0;
+    virtual double score() = 0;
+    virtual void set_score(double min_dups_frac, double min_time) = 0;
+    virtual double total_time() = 0;
+    virtual int total_count() = 0;
+    virtual int total_dups() = 0;
+};
+
+struct compare_h {
+    bool operator()(test_result_base *a,
+		    test_result_base *b) {
+	return a->score() < b->score();
+    }
+};
+
+MEMBER
+struct test_result : public test_result_base {
+    typedef Word word_type;
+    typedef Permute permute_type;
+    typedef Hash hash_type;
+
+    enum { cksum_size = CksumSize,
+	   cksum_skip = CksumSkip, 
+	   compaction = Compaction,
+    };
+
+    const char *test_name;
+    file_stats<Word> fstats;
+    int test_size;
+    int n_steps;
+    int n_incrs;
+    int s_bits;
+    int s_mask;
+    int t_entries;
+    int h_bits;
+    int h_buckets_full;
+    double h_score;
+    char *hash_table;
+    long accum_millis;
+    int accum_iters;
+
+    // These are not reset
+    double accum_time;
+    int accum_count;
+    int accum_dups;
+    int accum_colls;
+    int accum_size;
+
+    test_result(const char *name)
+	: test_name(name),
+	  fstats(cksum_size, cksum_skip),
+	  hash_table(NULL),
+	  accum_millis(0),
+	  accum_iters(0),
+	  accum_time(0.0),
+	  accum_count(0),
+	  accum_dups(0),
+	  accum_colls(0),
+	  accum_size(0) {
+	all_tests.push_back(this);
+    }
+
+    ~test_result() {
+	reset();
+    }
+
+    void reset() {
+	// size of file
+	test_size = -1;
+
+	// count
+	n_steps = -1;
+	n_incrs = -1;
+
+	// four values used by new_table()/summarize_table()
+	s_bits = -1;
+	s_mask = -1;
+	t_entries = -1;
+	h_bits = -1;
+	h_buckets_full = -1;
+
+	accum_millis = 0;
+	accum_iters = 0;
+
+	fstats.reset();
+
+	// temporary
+	if (hash_table) {
+	    delete(hash_table);
+	    hash_table = NULL;
+	}
+    }
+
+    int count() {
+	if (cksum_skip == 1) {
+	    return n_incrs;
+	} else {
+	    return n_steps;
+	}
+    }
+
+    int dups() {
+	return fstats.count - fstats.unique;
+    }
+
+    int colls() {
+	return fstats.unique - fstats.unique_values;
+    }
+
+    double uniqueness() {
+	return 1.0 - (double) dups() / count();
+    }
+
+    double fullness() {
+	return (double) h_buckets_full / (1 << h_bits);
+    }
+
+    double collisions() {
+	return (double) colls() / fstats.unique;
+    }
+
+    double coverage() {
+	return (double) h_buckets_full / uniqueness() / count();
+    }
+
+    double compression() {
+	return 1.0 - coverage();
+    }
+
+    double time() {
+	return (double) accum_millis / accum_iters;
+    }
+
+    double score() {
+	return h_score;
+    }
+
+    void set_score(double min_compression, double min_time) {
+	h_score = (compression() - 0.99 * min_compression)
+	        * (time() - 0.99 * min_time);
+    }
+
+    double total_time() {
+	return accum_time;
+    }
+
+    int total_count() {
+	return accum_count;
+    }
+
+    int total_dups() {
+	return accum_dups;
+    }
+
+    int total_colls() {
+	return accum_dups;
+    }
+
+    void stat() {
+	accum_time += time();
+	accum_count += count();
+	accum_dups += dups();
+	accum_colls += colls();
+	accum_size += test_size;
+    }
+
+    void print() {
+	if (fstats.count != count()) {
+	    fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
+	    abort();
+	}
+	printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n",
+	       test_name,
+	       cksum_size,
+	       cksum_skip,
+	       count(),
+	       100.0 * uniqueness(),
+	       h_buckets_full,
+	       100.0 * fullness(),
+	       100.0 * collisions(),
+	       100.0 * coverage(),
+	       h_bits,
+	       0.001 * accum_iters * test_size / accum_millis,
+	       accum_iters);
+    }
+
+    int size_log2 (int slots)
+    {
+	int bits = bitsof<word_type>() - 1;
+	int i;
+
+	for (i = 3; i <= bits; i += 1) {
+	    if (slots <= (1 << i)) {
+		return i - compaction;
+	    }
+	}
+
+	return bits;
+    }
+
+    void new_table(int entries) {
+	t_entries = entries;
+	h_bits = size_log2(entries);
+
+	int n = 1 << h_bits;
+
+	s_bits = bitsof<word_type>() - h_bits;
+	s_mask = n - 1;
+
+	hash_table = new char[n / 8];
+	memset(hash_table, 0, n / 8);
+    }
+
+    int get_table_bit(int i) {
+	return hash_table[i/8] & (1 << i%8);
+    }
+
+    int set_table_bit(int i) {
+	return hash_table[i/8] |= (1 << i%8);
+    }
+
+    void summarize_table() {
+	int n = 1 << h_bits;
+	int f = 0;
+	for (int i = 0; i < n; i++) {
+	    if (get_table_bit(i)) {
+		f++;
+	    }
+	}
+	h_buckets_full = f;
+    }
+
+    void get(const uint8_t* buf, const int buf_size, int test_iters) {
+	rabin_karp<SELF> test;
+	//adler32_cksum<SELF> test;
+	hash_type hash;
+	const uint8_t *ptr;
+	const uint8_t *end;
+	int last_offset;
+	int periods;
+	int stop;
+
+	test_size = buf_size;
+	last_offset = buf_size - cksum_size;
+
+	if (last_offset < 0) {
+	    periods = 0;
+	    n_steps = 0;
+	    n_incrs = 0;
+	    stop = -cksum_size;
+	} else {
+	    periods = last_offset / cksum_skip;
+	    n_steps = periods + 1;
+	    n_incrs = last_offset + 1;
+	    stop = last_offset - (periods + 1) * cksum_skip;
+	}
+
+	// Compute file stats once.
+	if (fstats.unique_values == 0) {
+	    if (cksum_skip == 1) {
+		for (int i = 0; i <= buf_size - cksum_size; i++) {
+		    fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i);
+		}
+	    } else {
+		ptr = buf + last_offset;
+		end = buf + stop;
+		
+		for (; ptr != end; ptr -= cksum_skip) {
+		    fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr);
+		}
+	    }
+	    fstats.freeze();
+	}
+
+	long start_test = get_millisecs_now();
+
+	if (cksum_skip != 1) {
+	    new_table(n_steps);
+
+	    for (int i = 0; i < test_iters; i++) {
+		ptr = buf + last_offset;
+		end = buf + stop;
+
+		for (; ptr != end; ptr -= cksum_skip) {
+		    set_table_bit(hash(test.step(ptr), s_bits, s_mask));
+		}
+	    }
+
+	    summarize_table();
+	}
+
+	stop = buf_size - cksum_size + 1;
+	if (stop < 0) {
+	    stop = 0;
+	}
+
+	if (cksum_skip == 1) {
+
+	    new_table(n_incrs);
+
+	    for (int i = 0; i < test_iters; i++) {
+		ptr = buf;
+		end = buf + stop;
+
+		if (ptr != end) {
+		    set_table_bit(hash(test.state0(ptr++), s_bits, s_mask));
+		}
+
+		for (; ptr != end; ptr++) {
+		    Word w = test.incr(ptr);
+		    assert(w == test.step(ptr));
+		    set_table_bit(hash(w, s_bits, s_mask));
+		}
+	    }
+
+	    summarize_table();
+	}
+
+	accum_iters += test_iters;
+	accum_millis += get_millisecs_now() - start_test;
+    }
+};
+
+template <typename Word>
+void print_array(const char *tname) {
+    printf("static const %s hash_multiplier[64] = {\n", tname);
+    Word p = 1;
+    for (int i = 0; i < 64; i++) {
+	printf("  %uU,\n", p);
+	p *= good_word<Word>();
+    }
+    printf("};\n", tname);
+}
+
+int main(int argc, char** argv) {
+  int i;
+  uint8_t *buf = NULL;
+  size_t buf_len = 0;
+  int ret;
+
+  if (argc <= 1) {
+    fprintf(stderr, "usage: %s file ...\n", argv[0]);
+    return 1;
+  }
+
+  //print_array<uint32_t>("uint32_t");
+
+#define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \
+      _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \
+      (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)
+
+#if 0
+
+  TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
+
+#endif
+
+#define TESTS(SKIP) \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 3)
+  
+#define TESTS_ALL(SKIP) \
+  TEST(uint32_t, 3, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 3, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
+  TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
+  TEST(uint32_t, 5, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 5, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 8, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 8, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
+  TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \
+  TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 13, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 13, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \
+  TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \
+  TEST(uint32_t, 21, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 21, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 34, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 34, SKIP, plain, hhash, 1); \
+  TEST(uint32_t, 55, SKIP, plain, hhash, 0); \
+  TEST(uint32_t, 55, SKIP, plain, hhash, 1)
+
+  TESTS(1); // *
+//   TESTS(2); // *
+//   TESTS(3); // *
+//   TESTS(5); // *
+//   TESTS(8); // *
+//   TESTS(9);
+//   TESTS(11);
+//   TESTS(13); // *
+  TESTS(15);
+//   TESTS(16);
+//   TESTS(21); // *
+//   TESTS(34); // *
+//   TESTS(55); // *
+//   TESTS(89); // *
+
+  for (i = 1; i < argc; i++) {
+    if ((ret = read_whole_file(argv[i],
+			       & buf,
+			       & buf_len))) {
+      return 1;
+    }
+
+    fprintf(stderr, "file %s is %zu bytes\n",
+	    argv[i], buf_len);
+
+    double min_time = -1.0;
+    double min_compression = 0.0;
+
+    for (vector<test_result_base*>::iterator i = all_tests.begin();
+	 i != all_tests.end(); ++i) {
+	test_result_base *test = *i;
+	test->reset();
+
+	int iters = 100;
+	long start_test = get_millisecs_now();
+
+	do {
+	    test->get(buf, buf_len, iters);
+	    iters *= 3;
+	    iters /= 2;
+	} while (get_millisecs_now() - start_test < 2000);
+
+	test->stat();
+
+	if (min_time < 0.0) {
+	    min_compression = test->compression();
+	    min_time = test->time();
+	}
+
+	if (min_time > test->time()) {
+	    min_time = test->time();
+	}
+
+	if (min_compression > test->compression()) {
+	    min_compression = test->compression();
+	}
+
+	test->print();
+    }
+
+//     for (vector<test_result_base*>::iterator i = all_tests.begin();
+// 	 i != all_tests.end(); ++i) {
+// 	test_result_base *test = *i;
+// 	test->set_score(min_compression, min_time);
+//     }	
+
+//     sort(all_tests.begin(), all_tests.end(), compare_h());
+    
+//     for (vector<test_result_base*>::iterator i = all_tests.begin();
+// 	 i != all_tests.end(); ++i) {
+// 	test_result_base *test = *i;
+// 	test->print();
+//     }	
+    
+    free(buf);
+    buf = NULL;
+  }
+
+  return 0;      
+}
diff --git a/examples/compare_test.c b/examples/compare_test.c
new file mode 100644
index 0000000..f3b3ea2
--- /dev/null
+++ b/examples/compare_test.c
@@ -0,0 +1,123 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <assert.h>
+
+#include "xdelta3.h"
+
+#define NUM (1<<20)
+#define ITERS 100
+
+/* From wikipedia on RDTSC */
+inline uint64_t rdtsc() {
+  uint32_t lo, hi;
+  asm volatile ("rdtsc" : "=a" (lo), "=d" (hi));
+  return (uint64_t)hi << 32 | lo;
+}
+
+typedef int (*test_func)(const char *s1, const char *s2, int n);
+
+void run_test(const char *buf1, const char *buf2,
+	      const char *name, test_func func) {
+  uint64_t start, end;
+  uint64_t accum = 0;
+  int i, x;
+
+  for (i = 0; i < ITERS; i++) {
+    start = rdtsc();
+    x = func(buf1, buf2, NUM);
+    end = rdtsc();
+    accum += end - start;
+    assert(x == NUM - 1);
+  }
+
+  accum /= ITERS;
+
+  printf("%s : %qu cycles\n", name, accum);
+}
+
+/* Build w/ -fno-builtin for this to be fast, this assumes that there
+ * is a difference at s1[n-1] */
+int memcmp_fake(const char *s1, const char *s2, int n) {
+  int x = memcmp(s1, s2, n);
+  return x < 0 ? n - 1 : n + 1;
+}
+
+#define UNALIGNED_OK 1
+static inline int
+test2(const char *s1c, const char *s2c, int n)
+{
+  int i = 0;
+#if UNALIGNED_OK
+  int nint = n / sizeof(int);
+
+  if (nint >> 3)
+    {
+      int j = 0;
+      const int *s1 = (const int*)s1c;
+      const int *s2 = (const int*)s2c;
+      int nint_8 = nint - 8;
+
+      while (i <= nint_8 &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&
+	     s1[i++] == s2[j++] &&	 
+	     s1[i++] == s2[j++]) { }
+
+      i = (i - 1) * sizeof(int);
+    }
+#endif
+
+  while (i < n && s1c[i] == s2c[i])
+    {
+      i++;
+    }
+  return i;
+}
+
+static inline int
+test1(const char *s1c, const char *s2c, int n) {
+  int i = 0;
+  while (i < n && s1c[i] == s2c[i])
+    {
+      i++;
+    }
+  return i;
+}
+
+int main(/*int argc, char **argv*/) {
+  char *buf1 = malloc(NUM+1);
+  char *buf2 = malloc(NUM+1);
+  int i;
+
+  for (i = 0; i < NUM; i++) {
+    buf1[i] = buf2[i] = rand();
+  }
+
+  buf2[NUM-1]++;
+
+  printf ("ALIGNED\n");
+
+  run_test(buf1, buf2, "memcmp", &memcmp_fake);
+  run_test(buf1, buf2, "test1", &test1);
+  run_test(buf1, buf2, "test2", &test2);
+
+  for (i = 0; i < NUM; i++) {
+    buf1[i] = buf2[i+1] = rand();
+  }
+
+  buf2[NUM]++;
+
+  printf ("UNALIGNED\n");
+
+  run_test(buf1, buf2+1, "memcmp", &memcmp_fake);
+  run_test(buf1, buf2+1, "test1", &test1);
+  run_test(buf1, buf2+1, "test2", &test2);
+
+  return 0;
+}
diff --git a/examples/encode_decode_test.c b/examples/encode_decode_test.c
new file mode 100644
index 0000000..7bcf109
--- /dev/null
+++ b/examples/encode_decode_test.c
@@ -0,0 +1,204 @@
+//
+// Permission to distribute this example by
+// Copyright (C) 2007 Ralf Junker
+// Ralf Junker <delphi@yunqa.de>
+// http://www.yunqa.de/delphi/
+
+//---------------------------------------------------------------------------
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+//---------------------------------------------------------------------------
+
+int code (
+  int encode,
+  FILE*  InFile,
+  FILE*  SrcFile ,
+  FILE* OutFile,
+  int BufSize )
+{
+  int r, ret;
+  struct stat statbuf;
+  xd3_stream stream;
+  xd3_config config;
+  xd3_source source;
+  void* Input_Buf;
+  int Input_Buf_Read;
+
+  if (BufSize < XD3_ALLOCSIZE)
+    BufSize = XD3_ALLOCSIZE;
+
+  memset (&stream, 0, sizeof (stream));
+  memset (&source, 0, sizeof (source));
+
+  xd3_init_config(&config, XD3_ADLER32);
+  config.winsize = BufSize;
+  xd3_config_stream(&stream, &config);
+
+  if (SrcFile)
+  {
+    r = fstat(fileno(SrcFile), &statbuf);
+    if (r)
+      return r;
+    source.size = statbuf.st_size;
+    source.blksize = BufSize;
+    source.curblk = malloc(source.blksize);
+
+    /* Load 1st block of stream. */
+    r = fseek(SrcFile, 0, SEEK_SET);
+    if (r)
+      return r;
+    source.onblk = fread((void*)source.curblk, 1, source.blksize, SrcFile);
+    source.curblkno = 0;
+    /* Set the stream. */
+    xd3_set_source(&stream, &source);
+  }
+
+  Input_Buf = malloc(BufSize);
+
+  fseek(InFile, 0, SEEK_SET);
+  do
+  {
+    Input_Buf_Read = fread(Input_Buf, 1, BufSize, InFile);
+    if (Input_Buf_Read < BufSize)
+    {
+      xd3_set_flags(&stream, XD3_FLUSH | stream.flags);
+    }
+    xd3_avail_input(&stream, Input_Buf, Input_Buf_Read);
+
+process:
+    if (encode)
+      ret = xd3_encode_input(&stream);
+    else
+      ret = xd3_decode_input(&stream);
+
+    switch (ret)
+    {
+    case XD3_INPUT:
+      {
+        fprintf (stderr,"XD3_INPUT\n");
+        continue;
+      }
+
+    case XD3_OUTPUT:
+      {
+        fprintf (stderr,"XD3_OUTPUT\n");
+        r = fwrite(stream.next_out, 1, stream.avail_out, OutFile);
+        if (r != (int)stream.avail_out)
+          return r;
+	xd3_consume_output(&stream);
+        goto process;
+      }
+
+    case XD3_GETSRCBLK:
+      {
+        fprintf (stderr,"XD3_GETSRCBLK %qd\n", source.getblkno);
+        if (SrcFile)
+        {
+          r = fseek(SrcFile, source.blksize * source.getblkno, SEEK_SET);
+          if (r)
+            return r;
+          source.onblk = fread((void*)source.curblk, 1,
+			       source.blksize, SrcFile);
+          source.curblkno = source.getblkno;
+        }
+        goto process;
+      }
+
+    case XD3_GOTHEADER:
+      {
+        fprintf (stderr,"XD3_GOTHEADER\n");
+        goto process;
+      }
+
+    case XD3_WINSTART:
+      {
+        fprintf (stderr,"XD3_WINSTART\n");
+        goto process;
+      }
+
+    case XD3_WINFINISH:
+      {
+        fprintf (stderr,"XD3_WINFINISH\n");
+        goto process;
+      }
+
+    default:
+      {
+        fprintf (stderr,"!!! INVALID %s %d !!!\n",
+		stream.msg, ret);
+        return ret;
+      }
+
+    }
+
+  }
+  while (Input_Buf_Read == BufSize);
+
+  free(Input_Buf);
+
+  free((void*)source.curblk);
+  xd3_close_stream(&stream);
+  xd3_free_stream(&stream);
+
+  return 0;
+
+};
+
+
+int main(int argc, char* argv[])
+{
+  FILE*  InFile;
+  FILE*  SrcFile;
+  FILE* OutFile;
+  int r;
+
+  if (argc != 3) {
+    fprintf (stderr, "usage: %s source input output\n", argv[0]);
+    return 1;
+  }
+
+  char *input = argv[2];
+  char *source = argv[1];
+  const char *output = "encoded.testdata";
+  const char *decoded = "decoded.testdata";
+
+  /* Encode */
+
+  InFile = fopen(input, "rb");
+  SrcFile = fopen(source, "rb");
+  OutFile = fopen(output, "wb");
+
+  r = code (1, InFile, SrcFile, OutFile, 0x1000);
+
+  fclose(OutFile);
+  fclose(SrcFile);
+  fclose(InFile);
+
+  if (r) {
+    fprintf (stderr, "Encode error: %d\n", r);
+    return r;
+  }
+
+  /* Decode */
+
+  InFile = fopen(output, "rb");
+  SrcFile = fopen(source, "rb");
+  OutFile = fopen(decoded, "wb");
+
+  r = code (0, InFile, SrcFile, OutFile, 0x1000);
+
+  fclose(OutFile);
+  fclose(SrcFile);
+  fclose(InFile);
+
+  if (r) {
+    fprintf (stderr, "Decode error: %d\n", r);
+    return r;
+  }
+
+  return 0;
+}
diff --git a/examples/small_page_test.c b/examples/small_page_test.c
new file mode 100755
index 0000000..2d9ae93
--- /dev/null
+++ b/examples/small_page_test.c
@@ -0,0 +1,202 @@
+/* Copyright (C) 2007 Josh MacDonald */
+
+#include <stdio.h>
+
+#define PAGE_SIZE 4096
+
+#define SPACE_MAX 131072   // how much memory per process
+#define OUTPUT_MAX 1024    // max size for output
+#define XD3_ALLOCSIZE 256  // internal size for various buffers
+#define IOPT_SIZE 128      // instruction buffer
+
+// SPACE_MAX of 32K is sufficient for most inputs with XD3_COMPLEVEL_1
+// XD3_COMPLEVEL_9 requires about 4x more space than XD3_COMPLEVEL_1
+
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+typedef struct _context {
+  uint8_t *buffer;
+  int allocated;
+} context_t;
+
+static int max_allocated = 0;
+
+void*
+process_alloc (void* opaque, usize_t items, usize_t size)
+{
+  context_t *ctx = (context_t*) opaque;
+  usize_t t = items * size;
+  void *ret;
+
+  if (ctx->allocated + t > SPACE_MAX)
+    {
+      return NULL;
+    }
+
+  ret = ctx->buffer + ctx->allocated;
+  ctx->allocated += t;
+  return ret;
+}
+
+void
+process_free (void* opaque, void *ptr)
+{
+}
+
+int
+process_page (int            is_encode,
+	      int          (*func) (xd3_stream *),
+	      const uint8_t *input,
+	      usize_t        input_size,
+	      const uint8_t *source,
+	      uint8_t       *output,
+	      usize_t       *output_size,
+	      usize_t        output_size_max,
+	      int            flags) {
+
+  /* On my x86 this is 1072 of objects on the stack */
+  xd3_stream stream;
+  xd3_config config;
+  xd3_source src;
+  context_t *ctx = calloc(SPACE_MAX, 1);
+  int ret;
+
+  memset (&config, 0, sizeof(config));
+
+  if (ctx == NULL)
+    {
+      printf("calloc failed\n");
+      return -1;
+    }
+
+  ctx->buffer = (uint8_t*)ctx;
+  ctx->allocated = sizeof(*ctx);
+
+  config.flags = flags;
+  config.winsize = PAGE_SIZE;
+  config.sprevsz = PAGE_SIZE;
+  config.srcwin_maxsz = PAGE_SIZE;
+  config.iopt_size = IOPT_SIZE;
+  config.alloc = &process_alloc;
+  config.freef = &process_free;
+  config.opaque = (void*) ctx;
+
+  src.size = PAGE_SIZE;
+  src.blksize = PAGE_SIZE;
+  src.onblk = PAGE_SIZE;
+  src.curblk = source;
+  src.curblkno = 0;
+
+  if ((ret = xd3_config_stream (&stream, &config)) != 0 ||
+      (ret = xd3_set_source (&stream, &src)) != 0 ||
+      (ret = xd3_process_stream (is_encode,
+				 &stream,
+				 func, 1,
+				 input, input_size,
+				 output, output_size,
+				 output_size_max)) != 0)
+    {
+      if (stream.msg != NULL)
+	{
+	  fprintf(stderr, "stream message: %s\n", stream.msg);
+	}
+    }
+
+  xd3_free_stream (&stream);
+  if (max_allocated < ctx->allocated)
+    {
+      max_allocated = ctx->allocated;
+      fprintf(stderr, "max allocated %d\n", max_allocated);
+    }
+
+  free(ctx);
+  return ret;
+}
+
+int test(int stride, int encode_flags)
+{
+  uint8_t frompg[PAGE_SIZE];
+  uint8_t topg[PAGE_SIZE];
+  uint8_t output[OUTPUT_MAX];
+  uint8_t reout[PAGE_SIZE];
+  usize_t output_size;
+  usize_t re_size;
+  int i, j, ret;
+
+  for (i = 0; i < PAGE_SIZE; i++)
+    {
+      topg[i] = frompg[i] = (rand() >> 3 ^ rand() >> 6 ^ rand() >> 9);
+    }
+
+  // change 1 byte every stride
+  if (stride > 0)
+    {
+      for (j = stride; j <= PAGE_SIZE; j += stride)
+	{
+	  topg[j - 1] ^= 0xff;
+	}
+    }
+
+  if ((ret = process_page (1, xd3_encode_input,
+			   topg, PAGE_SIZE,
+			   frompg, output,
+			   &output_size, OUTPUT_MAX,
+			   encode_flags)) != 0)
+    {
+      fprintf (stderr, "encode failed: stride %u flags 0x%x\n", 
+	       stride, encode_flags);
+      return ret;
+    }
+
+  if ((ret = process_page (0, xd3_decode_input,
+			   output, output_size,
+			   frompg, reout,
+			   &re_size, PAGE_SIZE,
+			   0)) != 0)
+    {
+      fprintf (stderr, "decode failed: stride %u output_size %u flags 0x%x\n",
+	       stride, output_size, encode_flags);
+      return ret;
+    }
+
+  if (output_size > OUTPUT_MAX || re_size != PAGE_SIZE)
+    {
+      fprintf (stderr, "internal error: %u != %u\n", output_size, re_size);
+      return -1;
+    }
+
+  for (i = 0; i < PAGE_SIZE; i++)
+    {
+      if (reout[i] != topg[i])
+	{
+	  fprintf (stderr, "encode-decode error: position %d\n", i);
+	  return -1;
+	}
+    }
+
+  fprintf(stderr, "stride %d flags 0x%x size %u ", 
+	  stride, encode_flags, output_size);
+  fprintf(stderr, "%s\n", (ret == 0) ? "OK" : "FAIL");
+
+  return 0;
+}
+
+int main()
+{
+  int stride;
+  int level;
+
+  for (level = 1; level < 10; level = (level == 1 ? 3 : level + 3))
+    {
+      int lflag = level << XD3_COMPLEVEL_SHIFT;
+
+      for (stride = 2; stride <= PAGE_SIZE; stride += 2)
+	{
+	  test(stride, lflag);
+	  test(stride, lflag | XD3_SEC_DJW);
+	}
+    }
+
+  return 0;
+}
diff --git a/examples/speed_test.c b/examples/speed_test.c
new file mode 100644
index 0000000..d9ce5aa
--- /dev/null
+++ b/examples/speed_test.c
@@ -0,0 +1,73 @@
+/* Copyright (C) 2007 Josh MacDonald */
+
+#include "test.h"
+
+usize_t bench_speed(const uint8_t *from_buf, const size_t from_len,
+		 const uint8_t *to_buf, const size_t to_len,
+		 uint8_t *delta_buf, const size_t delta_alloc,
+		 int flags) {
+  usize_t delta_size;
+  int ret = xd3_encode_memory(to_buf, to_len, from_buf, from_len,
+			      delta_buf, &delta_size, delta_alloc, flags);
+  if (ret != 0) {
+    fprintf(stderr, "encode failure: %d: %s\n", ret, xd3_strerror(ret));
+    abort();
+  }
+  return delta_size;
+}
+
+int main(int argc, char **argv) {
+  int repeat, level;
+  char *from, *to;
+  uint8_t *from_buf = NULL, *to_buf = NULL, *delta_buf = NULL;
+  size_t from_len = 0, to_len, delta_alloc, delta_size = 0;
+  long start, finish;
+  int i, ret;
+  int flags;
+
+  if (argc != 5) {
+    fprintf(stderr, "usage: speed_test LEVEL COUNT FROM TO\n");
+    return 1;
+  }
+
+  level = atoi(argv[1]);
+  repeat = atoi(argv[2]);
+  from = argv[3];
+  to = argv[4];
+  flags = (level << XD3_COMPLEVEL_SHIFT) & XD3_COMPLEVEL_MASK;
+
+  if ((strcmp(from, "null") != 0 &&
+       (ret = read_whole_file(from, &from_buf, &from_len))) ||
+      (ret = read_whole_file(to, &to_buf, &to_len))) {
+    fprintf(stderr, "read_whole_file error\n");
+    goto exit;
+  }
+
+  delta_alloc = to_len * 11 / 10;
+  delta_buf = main_malloc(delta_alloc);
+
+  start = get_millisecs_now();
+
+  for (i = 0; i < repeat; ++i) {
+    delta_size = bench_speed(from_buf, from_len,
+			     to_buf, to_len, delta_buf, delta_alloc, flags);
+  }
+
+  finish = get_millisecs_now();
+
+  fprintf(stderr,
+	  "STAT: encode %3.2f ms from %s to %s repeat %d %zdbit delta %zd\n",
+	  (double)(finish - start) / repeat, from, to, repeat, sizeof (xoff_t) * 8, delta_size);
+
+  ret = 0;
+
+  if (0) {
+  exit:
+    ret = 1;
+  }
+    
+  main_free(to_buf);
+  main_free(from_buf);
+  main_free(delta_buf);
+  return ret;
+}
diff --git a/examples/test.h b/examples/test.h
new file mode 100644
index 0000000..e8016bb
--- /dev/null
+++ b/examples/test.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2007 Josh MacDonald */
+
+#define NOT_MAIN 1
+
+#include "xdelta3.h"
+#include "xdelta3.c"
+
+static int read_whole_file(const char *name,
+			   uint8_t **buf_ptr,
+			   size_t *buf_len) {
+  main_file file;
+  int ret;
+  xoff_t len;
+  usize_t nread;
+  main_file_init(&file);
+  file.filename = name;
+  ret = main_file_open(&file, name, XO_READ);
+  if (ret != 0) {
+    fprintf(stderr, "open failed\n");
+    goto exit;
+  }
+  ret = main_file_stat(&file, &len, 0);
+  if (ret != 0) {
+    fprintf(stderr, "stat failed\n");
+    goto exit;
+  }
+  
+  (*buf_len) = (size_t)len;
+  (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
+  ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
+		       "read failed");
+  if (ret == 0 && *buf_len == nread) {
+    ret = 0;
+  } else {
+    fprintf(stderr, "invalid read\n");
+    ret = XD3_INTERNAL;
+  }
+ exit:
+  main_file_cleanup(&file);
+  return ret;
+}
+
diff --git a/linkxd3lib.c b/linkxd3lib.c
new file mode 100644
index 0000000..284cb0d
--- /dev/null
+++ b/linkxd3lib.c
@@ -0,0 +1,46 @@
+#include "xdelta3.h"
+
+extern int VVV;
+
+int VVV;
+
+void use(int r)
+{
+  VVV = r;
+}
+
+int main() {
+  xd3_config config;
+  xd3_stream stream;
+  xd3_source source;
+
+  xd3_init_config (& config, 0);
+  use (xd3_config_stream (&stream, &config));
+  use (xd3_close_stream (&stream));
+  xd3_abort_stream (&stream);
+  xd3_free_stream (&stream);
+  
+  xd3_avail_input (& stream, NULL, 0);
+  xd3_consume_output (& stream);
+  
+  use (xd3_bytes_on_srcblk (& source, 0));
+  use (xd3_set_source (& stream, & source));
+  xd3_set_flags (& stream, 0);
+  
+  use (xd3_decode_stream (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_decode_input (&stream));
+  use (xd3_get_appheader (& stream, NULL, NULL));
+  
+  use ((int) xd3_errstring (& stream));
+  use ((int) xd3_strerror (0));
+			     
+#if XD3_ENCODER
+  use (xd3_encode_input (&stream));
+  use (xd3_encode_stream (& stream, NULL, 0, NULL, NULL, 0));
+  use (xd3_set_appheader (& stream));
+  use (xd3_encoder_used_source (& stream));
+  use (xd3_encoder_srcbase (& stream));
+  use (xd3_encoder_srclen (& stream));
+#endif
+  return 0;
+}
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..be7c6ce
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,34 @@
+Xdelta 3.x readme.txt
+Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+<josh.macdonald@gmail.com>
+
+
+Thanks for downloading Xdelta!
+
+This directory contains the Xdelta3 command-line interface (CLI) and source
+distribution for VCDIFF differential compression, a.k.a. delta
+compression. The latest information and downloads are available here:
+
+  http://xdelta.org/
+  http://code.google.com/p/xdelta/
+
+The command-line syntax:
+
+  http://code.google.com/p/xdelta/wiki/CommandLineSyntax
+
+Run 'xdelta3 -h' for brief help.  Run 'xdelta3 test' for built-in tests.
+
+Sample commands (like gzip, -e means encode, -d means decode)
+
+  xdelta3 -9 -S djw -e -vfs OLD_FILE NEW_FILE DELTA_FILE
+  xdelta3 -d -vfs OLD_FILE DELTA_FILE DECODED_FILE
+
+File bug reports and browse open support issues here:
+
+  http://code.google.com/p/xdelta/issues/list
+
+The source distribution contains the C/C++/Python APIs, Unix, Microsoft VC++
+and Cygwin builds.  Xdelta3 is covered under the terms of the GPL, see
+COPYING.
+
+Commercial inquiries welcome, please contact <josh.macdonald@gmail.com>
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..0bb39e1
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,59 @@
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2004, 2007.  Joshua P. MacDonald
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+#
+from distutils.core import setup, Extension
+from distutils.util import get_platform
+
+# External compression support works on Windows/Cygwin, but not from
+# within the Python module. It's something to do with fork() and
+# exec() support.
+#platform  = get_platform()
+#is_cygwin = platform.startswith('cygwin')
+
+xdelta3_ext = Extension('xdelta3main',
+                        ['xdelta3.c'],
+                        define_macros = [
+                                         ('PYTHON_MODULE',1),
+                                         ('SECONDARY_DJW',1),
+                                         ('VCDIFF_TOOLS',1),
+                                         ('GENERIC_ENCODE_TABLES',0),
+                                         ('XD3_POSIX',1),
+                                         ('XD3_USE_LARGEFILE64',0),
+
+                                         # the fork/exec stuff doesn't
+                                         # work inside python.
+                                         ('EXTERNAL_COMPRESSION',0),
+
+                                         ('REGRESSION_TEST',0),
+                                         ('SECONDARY_FGK',0),
+                                         ('XD3_DEBUG',0),
+                                         ],
+                        extra_compile_args = [ '-O3',
+                                               '-g',
+                                               '-fno-builtin',
+                                               # '-arch', 'x86_64',
+                                               ])
+
+# $Format: "REL='$Xdelta3Version$'" $
+REL='3.0u'
+
+# This provides xdelta3.main(), which calls the xdelta3 command-line main()
+# from python.
+setup(name='xdelta3main',
+      version=REL,
+      ext_modules=[xdelta3_ext])
diff --git a/testing/Makefile b/testing/Makefile
new file mode 100755
index 0000000..281ef11
--- /dev/null
+++ b/testing/Makefile
@@ -0,0 +1,15 @@
+CFLAGS = -g -Wall -I.. -DXD3_DEBUG=1
+#CFLAGS = -g -Wall -I.. -DXD3_DEBUG=2
+#CFLAGS = -O2 -Wall -I.. -DXD3_DEBUG=0 -DNDEBUG=1
+
+DEPS = ../*.h ../*.c *.cc *.h
+
+TARGETS = xdelta3-regtest
+
+all: $(TARGETS)
+
+xdelta3-regtest: $(DEPS)
+	$(CXX) $(CFLAGS) regtest.cc -o xdelta3-regtest
+
+clean:
+	rm -f *.exe *.stackdump $(TARGETS)
diff --git a/testing/cmp.h b/testing/cmp.h
new file mode 100644
index 0000000..d96c386
--- /dev/null
+++ b/testing/cmp.h
@@ -0,0 +1,64 @@
+/* -*- Mode: C++ -*-  */
+namespace regtest {
+
+inline size_t CmpDifferentBlockBytes(const Block &a, const Block &b) {
+  size_t total = 0;
+  size_t i = 0; 
+  size_t m = min(a.Size(), b.Size());
+
+  for (; i < m; i++) {
+    if (a[i] != b[i]) {
+      total++;
+    }
+  }
+
+  total += a.Size() - i;
+  total += b.Size() - i;
+
+  return total;
+}
+
+inline xoff_t CmpDifferentBytes(const FileSpec &a, const FileSpec &b) {
+  Block block_a, block_b;
+  xoff_t total = 0;
+  FileSpec::iterator a_i(a), b_i(b);
+
+  for (; !a_i.Done() && !b_i.Done(); a_i.Next(), b_i.Next()) {
+
+    a_i.Get(&block_a);
+    b_i.Get(&block_b);
+
+    total += CmpDifferentBlockBytes(block_a, block_b);
+  }
+
+  for (; !a_i.Done(); a_i.Next()) {
+    total += a_i.BytesOnBlock();
+  }
+  for (; !b_i.Done(); b_i.Next()) {
+    total += b_i.BytesOnBlock();
+  }
+
+  return total;
+}
+
+inline bool ExtFile::EqualsSpec(const FileSpec &spec) const {
+  main_file t;
+  main_file_init(&t);
+  CHECK_EQ(0, main_file_open(&t, Name(), XO_READ));
+
+  Block tblock;
+  Block sblock;
+  for (BlockIterator iter(spec); !iter.Done(); iter.Next()) {
+    iter.Get(&sblock);
+    tblock.SetSize(sblock.Size());
+    usize_t tread;
+    CHECK_EQ(0, main_file_read(&t, tblock.Data(), tblock.Size(), &tread, "read failed"));
+    CHECK_EQ(0, CmpDifferentBlockBytes(tblock, sblock));
+  }
+  
+  CHECK_EQ(0, main_file_close(&t));
+  main_file_cleanup(&t);
+  return true;
+}
+
+}  // namespace regtest
diff --git a/testing/delta.h b/testing/delta.h
new file mode 100644
index 0000000..58fbaac
--- /dev/null
+++ b/testing/delta.h
@@ -0,0 +1,79 @@
+// Mode: -*- C++ -*-
+
+namespace regtest {
+
+class Delta {
+public:
+  Delta(const Block &block);
+
+  ~Delta() {
+    xd3_free_stream(&stream_);
+  }
+
+  xoff_t AddedBytes() const {
+    return stream_.whole_target.addslen;
+  }
+
+  xoff_t Windows() const {
+    return stream_.whole_target.wininfolen;
+  }
+
+  void Print() const;
+
+private:
+  xd3_stream stream_;
+};
+
+Delta::Delta(const Block &block) {
+  int ret;
+  xd3_config config;
+  memset(&stream_, 0, sizeof (stream_));
+  memset(&config, 0, sizeof (config));
+
+  xd3_init_config(&config, XD3_SKIP_EMIT | XD3_ADLER32_NOVER);
+
+  CHECK_EQ(0, xd3_config_stream (&stream_, &config));
+
+  xd3_avail_input (&stream_, block.Data(), block.Size());
+
+  bool done = false;
+  while (!done) {
+    ret = xd3_decode_input(&stream_);
+    
+    switch (ret) {
+    case XD3_INPUT:
+      done = true;
+      break;
+    case XD3_OUTPUT:
+      CHECK_EQ(0, xd3_whole_append_window (&stream_));
+      break;
+    case XD3_GOTHEADER:
+    case XD3_WINSTART:
+    case XD3_WINFINISH:
+      break;
+    default:
+      DP(RINT "error code %s\n", xd3_strerror (ret));
+      abort();
+    }
+  }
+}
+
+void Delta::Print() const {
+  for (size_t i = 0; i < stream_.whole_target.instlen; i++) {
+    xd3_winst &winst = stream_.whole_target.inst[i];
+    switch (winst.type) {
+    case XD3_RUN: 
+      DP(RINT "%"Q"u run %u\n", winst.position, winst.size);
+      break;
+    case XD3_ADD: 
+      DP(RINT "%"Q"u add %u\n", winst.position, winst.size);
+      break;
+    default:
+      DP(RINT "%"Q"u copy %u @ %"Q"u (mode %u)\n", 
+	 winst.position, winst.size, winst.addr, winst.mode);
+      break;
+    }
+  }
+}
+
+}  // namespace
diff --git a/testing/file.h b/testing/file.h
new file mode 100644
index 0000000..30a8428
--- /dev/null
+++ b/testing/file.h
@@ -0,0 +1,367 @@
+/* -*- Mode: C++ -*-  */
+namespace regtest {
+
+class Block;
+class BlockIterator;
+class TmpFile;
+
+class FileSpec {
+ public:
+  FileSpec(MTRandom *rand)
+    : rand_(rand) {
+  }
+
+  // Generates a file with a known size
+  void GenerateFixedSize(xoff_t size) {
+    Reset();
+    
+    for (xoff_t p = 0; p < size; ) {
+      xoff_t t = min(Constants::BLOCK_SIZE, size - p);
+      table_.insert(make_pair(p, Segment(t, rand_)));
+      p += t;
+    }
+  }
+
+  // Generates a file with exponential-random distributed size
+  void GenerateRandomSize(xoff_t mean) {
+    GenerateFixedSize(rand_->ExpRand(mean));
+  }
+
+  // Returns the size of the file
+  xoff_t Size() const {
+    if (table_.empty()) {
+      return 0;
+    }
+    SegmentMap::const_iterator i = --table_.end();
+    return i->first + i->second.Size();
+  }
+
+  // Returns the number of blocks
+  xoff_t Blocks(size_t blksize = Constants::BLOCK_SIZE) const {
+    if (table_.empty()) {
+      return 0;
+    }
+    return ((Size() - 1) / blksize) + 1;
+  }
+
+  // Returns the number of segments
+  xoff_t Segments() const {
+    return table_.size();
+  }
+
+  // Create a mutation according to "what".
+  void ModifyTo(const Mutator &mutator,
+		FileSpec *modify) const {
+    modify->Reset();
+    mutator.Mutate(&modify->table_, &table_, rand_);
+    modify->CheckSegments();
+  }
+
+  void CheckSegments() const {
+    for (SegmentMap::const_iterator iter(table_.begin());
+	 iter != table_.end(); ) {
+      SegmentMap::const_iterator iter0(iter++);
+      if (iter == table_.end()) {
+	break;
+      }
+      CHECK_EQ(iter0->first + iter0->second.Size(), iter->first);
+    }
+  }
+
+  void Reset() {
+    table_.clear();
+  }
+
+  void Print() const {
+    for (SegmentMap::const_iterator iter(table_.begin());
+	 iter != table_.end();
+	 ++iter) {
+      const Segment &seg = iter->second;
+      cerr << "Segment at " << iter->first << " (" << seg << ")" << endl;
+    }
+  }
+
+  void PrintData() const;
+
+  void WriteTmpFile(TmpFile *f) const;
+
+  typedef BlockIterator iterator;
+
+ private:
+  friend class BlockIterator;
+
+  MTRandom *rand_;
+  SegmentMap table_;
+};
+
+class Block {
+public:
+  Block()
+    : data_(NULL),
+      data_size_(0), 
+      size_(0) { }
+
+  ~Block() {
+    if (data_) {
+      delete [] data_;
+    }
+  }
+    
+  size_t Size() const {
+    return size_;
+  }
+
+  uint8_t operator[](size_t i) const {
+    CHECK_LT(i, size_);
+    return data_[i];
+  }
+
+  uint8_t* Data() const {
+    if (data_ == NULL) {
+      CHECK_EQ(0, size_); 
+      data_size_ = 1;
+      data_ = new uint8_t[1];
+    }
+    return data_;
+  }
+
+  // For writing to blocks
+  void Append(const uint8_t *data, size_t size);
+
+  // For cleaing a block
+  void Reset() {
+    size_ = 0;
+  }
+
+  void Print() const;
+
+  void WriteTmpFile(TmpFile *f) const;
+
+  void SetSize(size_t size) {
+    size_ = size;
+
+    if (data_size_ < size) {
+      if (data_) {
+	delete [] data_;
+      }
+      data_ = new uint8_t[size];
+      data_size_ = size;
+    }
+  }
+private:
+  friend class BlockIterator;
+
+  mutable uint8_t *data_;
+  mutable size_t data_size_;
+  size_t size_;
+};
+
+class BlockIterator {
+public:
+  explicit BlockIterator(const FileSpec& spec)
+    : spec_(spec),
+      blkno_(0),
+      blksize_(Constants::BLOCK_SIZE) { }
+
+  BlockIterator(const FileSpec& spec,
+		size_t blksize)
+    : spec_(spec),
+      blkno_(0),
+      blksize_(blksize) { }
+
+  bool Done() const {
+    return blkno_ >= spec_.Blocks(blksize_);
+  }
+
+  void Next() {
+    blkno_++;
+  }
+
+  xoff_t Blkno() const {
+    return blkno_;
+  }
+
+  xoff_t Offset() const {
+    return blkno_ * blksize_;
+  }
+
+  void SetBlock(xoff_t blkno) {
+    blkno_ = blkno;
+  }
+
+  void Get(Block *block) const;
+
+  size_t BytesOnBlock() const {
+    xoff_t blocks = spec_.Blocks(blksize_);
+    xoff_t size = spec_.Size();
+
+    CHECK((blkno_ < blocks) ||
+	  (blkno_ == blocks && size % blksize_ == 0));
+
+    if (blkno_ == blocks) {
+      return 0;
+    }
+    if (blkno_ + 1 == blocks) {
+      return ((size - 1) % blksize_) + 1;
+    }
+    return blksize_;
+  }
+
+  size_t BlockSize() const {
+    return blksize_;
+  }
+
+private:
+  const FileSpec& spec_;
+  xoff_t blkno_;
+  size_t blksize_;
+};
+
+class ExtFile {
+public:
+  ExtFile() {
+    static int static_counter = 0;
+    char buf[32];
+    snprintf(buf, 32, "/tmp/regtest.%d", static_counter++);
+    filename_.append(buf);
+    unlink(filename_.c_str());
+  }
+
+  ~ExtFile() {
+    unlink(filename_.c_str());
+  }
+
+  const char* Name() const {
+    return filename_.c_str();
+  }
+
+  // Check whether a real file matches a file spec.
+  bool EqualsSpec(const FileSpec &spec) const;
+
+protected:
+  string filename_;
+};
+
+class TmpFile : public ExtFile {
+public:
+  // TODO this is a little unportable!
+  TmpFile() {
+    main_file_init(&file_);
+    CHECK_EQ(0, main_file_open(&file_, filename_.c_str(), XO_WRITE));
+  }
+
+  ~TmpFile() {
+    main_file_cleanup(&file_);
+  }
+
+  void Append(const Block *block) {
+    CHECK_EQ(0, main_file_write(&file_, 
+				block->Data(), block->Size(), 
+				"tmpfile write failed"));
+  }
+
+
+  const char* Name() const {
+    if (main_file_isopen(&file_)) {
+      CHECK_EQ(0, main_file_close(&file_));
+    }
+    return ExtFile::Name();
+  }
+
+private:
+  mutable main_file file_;
+};
+
+inline void BlockIterator::Get(Block *block) const {
+  xoff_t offset = blkno_ * blksize_;
+  const SegmentMap &table = spec_.table_;
+  size_t got = 0;
+  block->SetSize(BytesOnBlock());
+
+  SegmentMap::const_iterator pos = table.upper_bound(offset);
+  if (pos == table.begin()) {
+    CHECK_EQ(0, spec_.Size());
+    return;
+  }
+  --pos;
+
+  while (got < block->size_) {
+    CHECK(pos != table.end());
+    CHECK_GE(offset, pos->first);
+
+    const Segment &seg = pos->second;
+
+    // The position of this segment may start before this block starts,
+    // and then the position of the data may be offset from the seeding 
+    // position.
+    size_t seg_offset = offset - pos->first;
+    size_t advance = min(seg.Size() - seg_offset,
+			 blksize_ - got);
+
+    seg.Fill(seg_offset, advance, block->data_ + got);
+
+    got += advance;
+    offset += advance;
+    ++pos;
+  }
+}
+
+inline void Block::Append(const uint8_t *data, size_t size) {
+  if (data_ == NULL) {
+    CHECK_EQ(0, size_);
+    CHECK_EQ(0, data_size_);
+    data_ = new uint8_t[Constants::BLOCK_SIZE];
+    data_size_ = Constants::BLOCK_SIZE;
+  }
+  
+  if (size_ + size > data_size_) {
+    uint8_t *tmp = data_;  
+    while (size_ + size > data_size_) {
+      data_size_ *= 2;
+    }
+    data_ = new uint8_t[data_size_];
+    memcpy(data_, tmp, size_);
+    delete tmp;
+  }
+
+  memcpy(data_ + size_, data, size);
+  size_ += size;
+}
+
+inline void FileSpec::PrintData() const {
+  Block block;
+  for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+    iter.Get(&block);
+    block.Print();
+  }
+}
+
+inline void Block::Print() const {
+  xoff_t pos = 0;
+  for (size_t i = 0; i < Size(); i++) {
+    if (pos % 16 == 0) {
+      DP(RINT "%5"Q"x: ", pos);
+    }
+    DP(RINT "%02x ", (*this)[i]);
+    if (pos % 16 == 15) {
+      DP(RINT "\n");
+    }
+    pos++;
+  }
+  DP(RINT "\n");
+}
+
+inline void FileSpec::WriteTmpFile(TmpFile *f) const {
+  Block block;
+  for (BlockIterator iter(*this); !iter.Done(); iter.Next()) {
+    iter.Get(&block);
+    f->Append(&block);
+  }
+}
+
+inline void Block::WriteTmpFile(TmpFile *f) const {
+  f->Append(this);
+}
+
+}  // namespace regtest
+
diff --git a/testing/modify.h b/testing/modify.h
new file mode 100644
index 0000000..67cccd9
--- /dev/null
+++ b/testing/modify.h
@@ -0,0 +1,421 @@
+// -*- Mode: C++ -*-
+namespace regtest {
+
+class Mutator {
+public:
+  virtual ~Mutator() { }
+  virtual void Mutate(SegmentMap *table, 
+		      const SegmentMap *source_table, 
+		      MTRandom *rand) const = 0;
+};
+
+class Change {
+public:
+  enum Kind {
+    MODIFY = 1,
+    ADD = 2,
+    DELETE = 3,
+    MOVE = 4,
+    COPY = 5,
+    OVERWRITE = 6,
+  };
+
+  // Constructor for modify, add, delete.
+  Change(Kind kind, xoff_t size, xoff_t addr1)
+    : kind(kind),
+      size(size),
+      addr1(addr1),
+    insert(NULL) { 
+    CHECK(kind != MOVE && kind != COPY && kind != OVERWRITE);
+  }
+
+  // Constructor for modify, add w/ provided data.
+  Change(Kind kind, xoff_t size, xoff_t addr1, Segment *insert)
+    : kind(kind),
+      size(size),
+      addr1(addr1),
+      insert(insert) { 
+    CHECK(kind != MOVE && kind != COPY && kind != OVERWRITE);
+  }
+
+  // Constructor for move
+  Change(Kind kind, xoff_t size, xoff_t addr1, xoff_t addr2)
+    : kind(kind),
+      size(size),
+      addr1(addr1),
+      addr2(addr2),
+      insert(NULL) { 
+    CHECK(kind == MOVE || kind == COPY || kind == OVERWRITE);
+  }
+
+  Kind kind;
+  xoff_t size;
+  xoff_t addr1;
+  xoff_t addr2;
+  Segment *insert;  // For modify and/or add
+};
+
+typedef list<Change> ChangeList;
+
+class ChangeListMutator : public Mutator {
+public:
+  ChangeListMutator(const ChangeList &cl)
+    : cl_(cl) { }
+
+  ChangeListMutator() { }
+  
+  void Mutate(SegmentMap *table,
+	      const SegmentMap *source_table,
+	      MTRandom *rand) const;
+
+  static void Mutate(const Change &ch, 
+		     SegmentMap *table,
+		     const SegmentMap *source_table,
+		     MTRandom *rand);
+
+  static void AddChange(const Change &ch, 
+			SegmentMap *table,
+			const SegmentMap *source_table,
+			MTRandom *rand);
+
+  static void ModifyChange(const Change &ch, 
+			   SegmentMap *table,
+			   const SegmentMap *source_table,
+			   MTRandom *rand);
+
+  static void DeleteChange(const Change &ch, 
+			   SegmentMap *table,
+			   const SegmentMap *source_table,
+			   MTRandom *rand);
+
+  static void MoveChange(const Change &ch, 
+			 SegmentMap *table,
+			 const SegmentMap *source_table,
+			 MTRandom *rand);
+
+  static void OverwriteChange(const Change &ch, 
+			      SegmentMap *table,
+			      const SegmentMap *source_table,
+			      MTRandom *rand);
+
+  static void CopyChange(const Change &ch, 
+			 SegmentMap *table,
+			 const SegmentMap *source_table,
+			 MTRandom *rand);
+
+  static void AppendCopy(SegmentMap *table,
+			 const SegmentMap *source_table,
+			 xoff_t copy_offset, 
+			 xoff_t append_offset, 
+			 xoff_t length);
+
+  ChangeList* Changes() {
+    return &cl_;
+  }
+
+  const ChangeList* Changes() const {
+    return &cl_;
+  }
+
+private:
+  ChangeList cl_;
+};
+
+void ChangeListMutator::Mutate(SegmentMap *table,
+			       const SegmentMap *source_table,
+			       MTRandom *rand) const {
+  // The speed of processing gigabytes of data is so slow compared with
+  // these table-copy operations, no attempt to make this fast.
+  SegmentMap tmp;
+
+  for (ChangeList::const_iterator iter(cl_.begin()); iter != cl_.end(); ++iter) {
+    const Change &ch = *iter;
+    tmp.clear();
+    Mutate(ch, &tmp, source_table, rand);
+    tmp.swap(*table);
+    source_table = table;
+  }
+}
+  
+void ChangeListMutator::Mutate(const Change &ch, 
+			       SegmentMap *table,
+			       const SegmentMap *source_table,
+			       MTRandom *rand) {
+  switch (ch.kind) {
+  case Change::ADD:
+    AddChange(ch, table, source_table, rand);
+    break;
+  case Change::MODIFY:
+    ModifyChange(ch, table, source_table, rand);
+    break;
+  case Change::DELETE:
+    DeleteChange(ch, table, source_table, rand);
+    break;
+  case Change::COPY:
+    CopyChange(ch, table, source_table, rand);
+    break;
+  case Change::MOVE:
+    MoveChange(ch, table, source_table, rand);
+    break;
+  case Change::OVERWRITE:
+    OverwriteChange(ch, table, source_table, rand);
+    break;
+  }
+}
+
+void ChangeListMutator::ModifyChange(const Change &ch, 
+				     SegmentMap *table,
+				     const SegmentMap *source_table,
+				     MTRandom *rand) {
+  xoff_t m_start = ch.addr1;
+  xoff_t m_end = m_start + ch.size;
+  xoff_t i_start = 0;
+  xoff_t i_end = 0;
+
+  for (SegmentMap::const_iterator iter(source_table->begin()); 
+       iter != source_table->end();
+       ++iter) {
+    const Segment &seg = iter->second;
+    i_start = iter->first;
+    i_end = i_start + seg.Size();
+
+    if (i_end <= m_start || i_start >= m_end) {
+      table->insert(table->end(), make_pair(i_start, seg));
+      continue;
+    }
+
+    if (i_start < m_start) {
+      table->insert(table->end(), 
+		    make_pair(i_start, 
+			      seg.Subseg(0, m_start - i_start)));
+    }
+
+    // Insert the entire segment, even though it may extend into later
+    // segments.  This condition avoids inserting it during later
+    // segments.
+    if (m_start >= i_start) {
+      if (ch.insert != NULL) {
+	table->insert(table->end(), make_pair(m_start, *ch.insert));
+      } else {
+	Segment part(m_end - m_start, rand);
+	table->insert(table->end(), make_pair(m_start, part));
+      }
+    }
+
+    if (i_end > m_end) {
+      table->insert(table->end(), 
+		    make_pair(m_end, 
+			      seg.Subseg(m_end - i_start, i_end - m_end)));
+    }
+  }
+
+  CHECK_LE(m_end, i_end);
+}
+
+void ChangeListMutator::AddChange(const Change &ch, 
+				  SegmentMap *table,
+				  const SegmentMap *source_table,
+				  MTRandom *rand) {
+  xoff_t m_start = ch.addr1;
+  xoff_t i_start = 0;
+  xoff_t i_end = 0;
+
+  for (SegmentMap::const_iterator iter(source_table->begin()); 
+       iter != source_table->end();
+       ++iter) {
+    const Segment &seg = iter->second;
+    i_start = iter->first;
+    i_end = i_start + seg.Size();
+
+    if (i_end <= m_start) {
+      table->insert(table->end(), make_pair(i_start, seg));
+      continue;
+    }
+
+    if (i_start > m_start) {
+      table->insert(table->end(), make_pair(i_start + ch.size, seg));
+      continue;
+    }
+
+    if (i_start < m_start) {
+      table->insert(table->end(), 
+		    make_pair(i_start, 
+			      seg.Subseg(0, m_start - i_start)));
+    }
+
+    if (ch.insert != NULL) {
+      table->insert(table->end(), make_pair(m_start, *ch.insert));
+    } else {
+      Segment addseg(ch.size, rand);
+      table->insert(table->end(), make_pair(m_start, addseg));
+    }
+
+    if (m_start < i_end) {
+      table->insert(table->end(), 
+		    make_pair(m_start + ch.size, 
+			      seg.Subseg(m_start - i_start, i_end - m_start)));
+    }
+  }
+
+  CHECK_LE(m_start, i_end);
+
+  // Special case for add at end-of-input.
+  if (m_start == i_end) {
+    Segment addseg(ch.size, rand);
+    table->insert(table->end(), make_pair(m_start, addseg));
+  }
+}
+
+void ChangeListMutator::DeleteChange(const Change &ch, 
+				     SegmentMap *table,
+				     const SegmentMap *source_table,
+				     MTRandom *rand) {
+  xoff_t m_start = ch.addr1;
+  xoff_t m_end = m_start + ch.size;
+  xoff_t i_start = 0;
+  xoff_t i_end = 0;
+
+  for (SegmentMap::const_iterator iter(source_table->begin()); 
+       iter != source_table->end();
+       ++iter) {
+    const Segment &seg = iter->second;
+    i_start = iter->first;
+    i_end = i_start + seg.Size();
+
+    if (i_end <= m_start) {
+      table->insert(table->end(), make_pair(i_start, seg));
+      continue;
+    }
+
+    if (i_start >= m_end) {
+      table->insert(table->end(), make_pair(i_start - ch.size, seg));
+      continue;
+    }
+
+    if (i_start < m_start) {
+      table->insert(table->end(), 
+		    make_pair(i_start, 
+			      seg.Subseg(0, m_start - i_start)));
+    }
+
+    if (i_end > m_end) {
+      table->insert(table->end(), 
+		    make_pair(m_end - ch.size, 
+			      seg.Subseg(m_end - i_start, i_end - m_end)));
+    }
+  }
+
+  CHECK_LT(m_start, i_end);
+  CHECK_LE(m_end, i_end);
+}
+
+void ChangeListMutator::MoveChange(const Change &ch, 
+				   SegmentMap *table,
+				   const SegmentMap *source_table,
+				   MTRandom *rand) {
+  SegmentMap tmp;
+  CHECK_NE(ch.addr1, ch.addr2);
+  CopyChange(ch, &tmp, source_table, rand);
+  Change d(Change::DELETE, ch.size, 
+	   ch.addr1 < ch.addr2 ? ch.addr1 : ch.addr1 + ch.size);
+  DeleteChange(d, table, &tmp, rand);
+}
+
+void ChangeListMutator::OverwriteChange(const Change &ch, 
+				   SegmentMap *table,
+				   const SegmentMap *source_table,
+				   MTRandom *rand) {
+  SegmentMap tmp;
+  CHECK_NE(ch.addr1, ch.addr2);
+  CopyChange(ch, &tmp, source_table, rand);
+  Change d(Change::DELETE, ch.size, ch.addr2 + ch.size);
+  DeleteChange(d, table, &tmp, rand);
+}
+
+void ChangeListMutator::CopyChange(const Change &ch, 
+				   SegmentMap *table,
+				   const SegmentMap *source_table,
+				   MTRandom *ignore) {
+  xoff_t m_start = ch.addr2;
+  xoff_t c_start = ch.addr1;
+  xoff_t i_start = 0;
+  xoff_t i_end = 0;
+
+  // Like AddChange() with AppendCopy instead of a random segment.
+  for (SegmentMap::const_iterator iter(source_table->begin()); 
+       iter != source_table->end();
+       ++iter) {
+    const Segment &seg = iter->second;
+    i_start = iter->first;
+    i_end = i_start + seg.Size();
+
+    if (i_end <= m_start) {
+      table->insert(table->end(), make_pair(i_start, seg));
+      continue;
+    }
+
+    if (i_start > m_start) {
+      table->insert(table->end(), make_pair(i_start + ch.size, seg));
+      continue;
+    }
+
+    if (i_start < m_start) {
+      table->insert(table->end(), 
+		    make_pair(i_start, 
+			      seg.Subseg(0, m_start - i_start)));
+    }
+
+    AppendCopy(table, source_table, c_start, m_start, ch.size);
+
+    if (m_start < i_end) {
+      table->insert(table->end(), 
+		    make_pair(m_start + ch.size, 
+			      seg.Subseg(m_start - i_start, i_end - m_start)));
+    }
+  }
+
+  CHECK_LE(m_start, i_end);
+
+  // Special case for copy to end-of-input.
+  if (m_start == i_end) {
+    AppendCopy(table, source_table, c_start, m_start, ch.size);
+  }
+}
+
+void ChangeListMutator::AppendCopy(SegmentMap *table,
+				   const SegmentMap *source_table,
+				   xoff_t copy_offset, 
+				   xoff_t append_offset,
+				   xoff_t length) {
+  SegmentMap::const_iterator pos(source_table->upper_bound(copy_offset));
+  --pos;
+  xoff_t got = 0;
+
+  while (got < length) {
+    size_t seg_offset = copy_offset - pos->first;
+    size_t advance = min(pos->second.Size() - seg_offset, 
+			 (size_t)(length - got));
+
+    table->insert(table->end(), 
+		  make_pair(append_offset,
+			    pos->second.Subseg(seg_offset,
+					       advance)));
+
+    got += advance;
+    copy_offset += advance;
+    append_offset += advance;
+    ++pos;
+  }
+}
+
+class Modify1stByte : public Mutator {
+public:
+  void Mutate(SegmentMap *table, 
+	      const SegmentMap *source_table, 
+	      MTRandom *rand) const {
+    ChangeListMutator::Mutate(Change(Change::MODIFY, 1, 0),
+			      table, source_table, rand);
+  }
+};
+
+}  // namespace regtest
diff --git a/testing/random.h b/testing/random.h
new file mode 100644
index 0000000..f2cb167
--- /dev/null
+++ b/testing/random.h
@@ -0,0 +1,140 @@
+/* -*- Mode: C++ -*-  */
+/* This is public-domain Mersenne Twister code,
+ * attributed to Michael Brundage.  Thanks!
+ * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html
+ */
+#include <math.h>
+
+namespace regtest {
+
+class MTRandom {
+ public:
+  static const uint32_t TEST_SEED1 = 5489UL;
+
+  static const int MT_LEN = 624;
+  static const int MT_IA = 397;
+  static const uint32_t UPPER_MASK = 0x80000000;
+  static const uint32_t LOWER_MASK = 0x7FFFFFFF;
+  static const uint32_t MATRIX_A = 0x9908B0DF;
+
+  MTRandom() {
+    Init(TEST_SEED1);
+  }
+
+  MTRandom(uint32_t seed) {
+    Init(seed);
+  }
+
+  uint32_t Rand32 () {
+    uint32_t y;
+    static unsigned long mag01[2] = { 
+      0 , MATRIX_A
+    };
+
+    if (mt_index_ >= MT_LEN) {
+      int kk;
+
+      for (kk = 0; kk < MT_LEN - MT_IA; kk++) {
+	y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+	mt_buffer_[kk] = mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL];
+      }
+      for (;kk < MT_LEN - 1; kk++) {
+	y = (mt_buffer_[kk] & UPPER_MASK) | (mt_buffer_[kk + 1] & LOWER_MASK);
+	mt_buffer_[kk] = mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+      }
+      y = (mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt_buffer_[0] & LOWER_MASK);
+      mt_buffer_[MT_LEN - 1] = mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+      mt_index_ = 0;
+    }
+  
+    y = mt_buffer_[mt_index_++];
+
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680UL;
+    y ^= (y << 15) & 0xefc60000UL;
+    y ^= (y >> 18);
+
+    return y;
+  }
+
+  uint32_t ExpRand32(uint32_t mean) {
+    double mean_d = mean;
+    double erand  = log (1.0 / (Rand32() / (double)UINT32_MAX));
+    uint32_t x = (uint32_t) (mean_d * erand + 0.5);
+    return x;
+  }
+
+  uint64_t Rand64() {
+    return ((uint64_t)Rand32() << 32) | Rand32();
+  }
+
+  uint64_t ExpRand64(uint64_t mean) {
+    double mean_d = mean;
+    double erand  = log (1.0 / (Rand64() / (double)UINT32_MAX));
+    uint64_t x = (uint64_t) (mean_d * erand + 0.5);
+    return x;
+  }
+
+  template <typename T>
+  T Rand() {
+    switch (sizeof(T)) {
+    case sizeof(uint32_t):
+      return Rand32();
+    case sizeof(uint64_t):
+      return Rand64();
+    default:
+      cerr << "Invalid sizeof T" << endl;
+      abort();
+    }
+  }
+
+  template <typename T>
+  T ExpRand(T mean) {
+    switch (sizeof(T)) {
+    case sizeof(uint32_t):
+      return ExpRand32(mean);
+    case sizeof(uint64_t):
+      return ExpRand64(mean);
+    default:
+      cerr << "Invalid sizeof T" << endl;
+      abort();
+    }
+  }
+
+ private:
+  void Init(uint32_t seed) {
+    mt_buffer_[0] = seed;
+    mt_index_ = MT_LEN;
+    for (int i = 1; i < MT_LEN; i++) {
+      /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+      /* In the previous versions, MSBs of the seed affect   */
+      /* only MSBs of the array mt[].                        */
+      /* 2002/01/09 modified by Makoto Matsumoto             */
+      mt_buffer_[i] = 
+	(1812433253UL * (mt_buffer_[i-1] ^ (mt_buffer_[i-1] >> 30)) + i);
+    }
+  }
+
+  int mt_index_;
+  uint32_t mt_buffer_[MT_LEN];
+};
+
+class MTRandom8 {
+public:
+  MTRandom8(MTRandom *rand)
+    : rand_(rand) {
+  }
+
+  uint8_t Rand8() {
+    uint32_t r = rand_->Rand32();
+
+    // TODO: make this use a single byte at a time?
+    return (r & 0xff) ^ (r >> 7) ^ (r >> 15) ^ (r >> 21);
+  }
+
+private:
+  MTRandom *rand_;
+};
+
+}  // namespace regtest
diff --git a/testing/segment.h b/testing/segment.h
new file mode 100644
index 0000000..1dabf5c
--- /dev/null
+++ b/testing/segment.h
@@ -0,0 +1,100 @@
+// -*- Mode: C++ -*-
+
+namespace regtest {
+
+class Segment {
+ public:
+  Segment(size_t size, MTRandom *rand)
+    : size_(size),
+      seed_(rand->Rand32()),
+      seed_offset_(0),
+      data_(NULL) { 
+    CHECK_GT(size_, 0);
+  }
+
+  Segment(size_t size, uint32_t seed)
+    : size_(size),
+      seed_(seed),
+      seed_offset_(0),
+      data_(NULL) { 
+    CHECK_GT(size_, 0);
+  }
+
+  Segment(size_t size, uint8_t *data)
+    : size_(size),
+      seed_(0),
+      seed_offset_(0),
+      data_(data) { 
+    CHECK_GT(size_, 0);
+  }
+
+  size_t Size() const {
+    return size_; 
+  }
+
+  Segment Subseg(size_t start, size_t size) const {
+    CHECK_LE(start + size, size_);
+    if (data_) {
+      return Segment(size, data_ + start);
+    } else {
+      return Segment(size, seed_, seed_offset_ + start);
+    }
+  }
+
+  void Fill(size_t seg_offset, size_t size, uint8_t *data) const {
+    CHECK_LE(seg_offset + size, size_);
+    if (data_) {
+      memcpy(data, data_ + seg_offset, size);
+    } else {
+      size_t skip = seg_offset + seed_offset_;
+      MTRandom gen(seed_);
+      MTRandom8 gen8(&gen);
+      while (skip--) {
+	gen8.Rand8();
+      }
+      for (size_t i = 0; i < size; i++) {
+	data[i] = gen8.Rand8();
+      }
+    }
+  }
+
+private:
+  // Used by Subseg()
+  Segment(size_t size, uint32_t seed, size_t seed_offset)
+    : size_(size),
+      seed_(seed),
+      seed_offset_(seed_offset),
+      data_(NULL) {
+    CHECK_GT(size_, 0);
+  }
+
+  friend ostream& operator<<(ostream& os, const Segment &seg);
+
+  size_t size_;  // Size of this segment
+
+  // For random segments
+  uint32_t seed_;  // Seed used for generating byte sequence
+  size_t seed_offset_;  // Seed positions the sequence this many bytes
+                        // before its beginning.
+
+  // For literal segments (data is not owned)
+  uint8_t *data_;
+};
+
+ostream& operator<<(ostream& os, const Segment &seg) {
+  if (seg.data_) {
+    for (size_t i = 0; i < seg.size_; i++) {
+      char buf[10];
+      sprintf(buf, "%02x ", seg.data_[i]);
+      os << buf;
+    }
+    return os;
+  } else {
+    return os << "size=" << seg.size_ << ",seed=" << seg.seed_
+	      << ",skip=" << seg.seed_offset_;
+  }
+}
+
+typedef map<xoff_t, Segment> SegmentMap;
+
+}  // namespace regtest
diff --git a/testing/sizes.h b/testing/sizes.h
new file mode 100644
index 0000000..6b70892
--- /dev/null
+++ b/testing/sizes.h
@@ -0,0 +1,69 @@
+// -*- Mode: C++ -*-
+namespace regtest {
+
+template <typename T, typename U>
+class SizeIterator {
+ public:
+  SizeIterator(MTRandom *rand, size_t howmany)
+    : rand_(rand),
+      count_(0),
+      fixed_(U::sizes),
+      fixed_size_(SIZEOF_ARRAY(U::sizes)),
+      howmany_(howmany) { }
+
+  T Get() {
+    if (count_ < fixed_size_) {
+      return fixed_[count_];
+    }
+    return rand_->Rand<T>() % U::max_value;
+  }
+
+  bool Done() {
+    return count_ >= fixed_size_ && count_ >= howmany_;
+  }
+
+  void Next() {
+    count_++;
+  }
+
+ private:
+  MTRandom *rand_;
+  size_t count_;
+  T* fixed_;
+  size_t fixed_size_;
+  size_t howmany_;
+};
+
+class SmallSizes {
+public:
+  static size_t sizes[];
+  static size_t max_value;
+};
+
+size_t SmallSizes::sizes[] = {
+  0, 1, Constants::BLOCK_SIZE / 4, 3333, 
+  Constants::BLOCK_SIZE - (Constants::BLOCK_SIZE / 3),
+  Constants::BLOCK_SIZE,
+  Constants::BLOCK_SIZE + (Constants::BLOCK_SIZE / 3),
+  2 * Constants::BLOCK_SIZE - (Constants::BLOCK_SIZE / 3),
+  2 * Constants::BLOCK_SIZE,
+  2 * Constants::BLOCK_SIZE + (Constants::BLOCK_SIZE / 3),
+};
+
+size_t SmallSizes::max_value = Constants::BLOCK_SIZE * 3;
+
+class LargeSizes {
+public:
+  static size_t sizes[];
+  static size_t max_value;
+};
+
+size_t LargeSizes::sizes[] = {
+  1 << 20,
+  1 << 18,
+  1 << 16,
+};
+
+size_t LargeSizes::max_value = 1<<20;
+
+}  // namespace regtest
diff --git a/testing/test.h b/testing/test.h
new file mode 100644
index 0000000..f2b46f3
--- /dev/null
+++ b/testing/test.h
@@ -0,0 +1,110 @@
+// -*- Mode: C++ -*-
+
+extern "C" {
+#define NOT_MAIN 1
+#define REGRESSION_TEST 0
+#define VCDIFF_TOOLS 1
+#include "../xdelta3.c"
+}
+
+#define CHECK_EQ(x,y) CHECK_OP(x,y,==)
+#define CHECK_NE(x,y) CHECK_OP(x,y,!=)
+#define CHECK_LT(x,y) CHECK_OP(x,y,<)
+#define CHECK_GT(x,y) CHECK_OP(x,y,>)
+#define CHECK_LE(x,y) CHECK_OP(x,y,<=)
+#define CHECK_GE(x,y) CHECK_OP(x,y,>=)
+
+#define CHECK_OP(x,y,OP) \
+  do { \
+    typeof(x) _x(x); \
+    typeof(x) _y(y); \
+    if (!(_x OP _y)) { \
+      cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x " " #OP " " #y << endl; \
+      cerr << __FILE__ << ":" << __LINE__ << " Expected: " << _x << endl; \
+      cerr << __FILE__ << ":" << __LINE__ << " Actual: " << _y << endl; \
+    abort(); \
+    } } while (false)
+
+#define CHECK(x) \
+  do {if (!(x)) {				       \
+  cerr << __FILE__ << ":" << __LINE__ << " Check failed: " << #x << endl; \
+  abort(); \
+    } } while (false)
+
+#include <string>
+using std::string;
+
+#include <vector>
+using std::vector;
+
+inline string CommandToString(const vector<const char*> &v) {
+  string s(v[0]);
+  for (size_t i = 1; i < v.size() && v[i] != NULL; i++) {
+    s.append(" ");
+    s.append(v[i]);
+  }
+  return s;
+}
+
+#include <iostream>
+using std::cerr;
+using std::endl;
+using std::ostream;
+
+#include <map> 
+using std::map;
+using std::pair;
+
+#include <ext/hash_map>
+using __gnu_cxx::hash_map;
+
+#include <list>
+using std::list;
+
+template <typename T, typename U>
+pair<T, U> make_pair(const T& t, const U& u) {
+  return pair<T, U>(t, u);
+}
+
+class Constants {
+public:
+  // TODO: need to repeat the tests with different block sizes
+  // 1 << 7 triggers some bugs, 1 << 20 triggers others.
+  //
+  //static const xoff_t BLOCK_SIZE = 1 << 20;
+  static const xoff_t BLOCK_SIZE = 1 << 7;
+};
+
+using std::min;
+
+#include "random.h"
+using regtest::MTRandom;
+using regtest::MTRandom8;
+
+#include "segment.h"
+using regtest::Segment;
+
+#include "modify.h"
+using regtest::Mutator;
+using regtest::ChangeList;
+using regtest::Change;
+using regtest::ChangeListMutator;
+using regtest::Modify1stByte;
+
+#include "file.h"
+using regtest::Block;
+using regtest::BlockIterator;
+using regtest::ExtFile;
+using regtest::FileSpec;
+using regtest::TmpFile;
+
+#include "cmp.h"
+using regtest::CmpDifferentBytes;
+
+#include "sizes.h"
+using regtest::SizeIterator;
+using regtest::SmallSizes;
+using regtest::LargeSizes;
+
+#include "delta.h"
+using regtest::Delta;
diff --git a/xdelta3-cfgs.h b/xdelta3-cfgs.h
new file mode 100644
index 0000000..b13f7b0
--- /dev/null
+++ b/xdelta3-cfgs.h
@@ -0,0 +1,173 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007. Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/******************************************************************
+ SOFT string matcher
+ ******************************************************************/
+
+#if XD3_BUILD_SOFT
+
+#define TEMPLATE      soft
+#define LLOOK         stream->smatcher.large_look
+#define LSTEP         stream->smatcher.large_step
+#define SLOOK         stream->smatcher.small_look
+#define SCHAIN        stream->smatcher.small_chain
+#define SLCHAIN       stream->smatcher.small_lchain
+#define MAXLAZY       stream->smatcher.max_lazy
+#define LONGENOUGH    stream->smatcher.long_enough
+
+#define SOFTCFG 1
+#include "xdelta3.c"
+#undef  SOFTCFG
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+#define SOFTCFG 0
+
+/************************************************************
+ FASTEST string matcher
+ **********************************************************/
+#if XD3_BUILD_FASTEST
+#define TEMPLATE      fastest
+#define LLOOK         9
+#define LSTEP         26
+#define SLOOK         4U
+#define SCHAIN        1
+#define SLCHAIN       1
+#define MAXLAZY       6
+#define LONGENOUGH    6
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/************************************************************
+ FASTER string matcher
+ **********************************************************/
+#if XD3_BUILD_FASTER
+#define TEMPLATE      faster
+#define LLOOK         9
+#define LSTEP         15
+#define SLOOK         4U
+#define SCHAIN        1
+#define SLCHAIN       1
+#define MAXLAZY       18
+#define LONGENOUGH    18
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/******************************************************
+ FAST string matcher
+ ********************************************************/
+#if XD3_BUILD_FAST
+#define TEMPLATE      fast
+#define LLOOK         9
+#define LSTEP         8
+#define SLOOK         4U
+#define SCHAIN        4
+#define SLCHAIN       1
+#define MAXLAZY       18
+#define LONGENOUGH    35
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/**************************************************
+ SLOW string matcher
+ **************************************************************/
+#if XD3_BUILD_SLOW
+#define TEMPLATE      slow
+#define LLOOK         9
+#define LSTEP         2
+#define SLOOK         4U
+#define SCHAIN        44
+#define SLCHAIN       13
+#define MAXLAZY       90
+#define LONGENOUGH    70
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
+
+/********************************************************
+ DEFAULT string matcher
+ ************************************************************/
+#if XD3_BUILD_DEFAULT
+#define TEMPLATE      default
+#define LLOOK         9
+#define LSTEP         3
+#define SLOOK         4U
+#define SCHAIN        8
+#define SLCHAIN       2
+#define MAXLAZY       36
+#define LONGENOUGH    70
+
+#include "xdelta3.c"
+
+#undef  TEMPLATE
+#undef  LLOOK
+#undef  SLOOK
+#undef  LSTEP
+#undef  SCHAIN
+#undef  SLCHAIN
+#undef  MAXLAZY
+#undef  LONGENOUGH
+#endif
diff --git a/xdelta3-decode.h b/xdelta3-decode.h
new file mode 100644
index 0000000..bf2b0b1
--- /dev/null
+++ b/xdelta3-decode.h
@@ -0,0 +1,1115 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_DECODE_H_
+#define _XDELTA3_DECODE_H_
+
+#define SRCORTGT(x) ((((x) & VCD_SRCORTGT) == VCD_SOURCE) ? \
+                     VCD_SOURCE : ((((x) & VCD_SRCORTGT) == \
+                                    VCD_TARGET) ? VCD_TARGET : 0))
+
+/* Initialize the decoder for a new window.  The dec_tgtlen value is
+ * preserved across successive window decodings, and the update to
+ * dec_winstart is delayed until a new window actually starts.  This
+ * is to avoid throwing an error due to overflow until the last
+ * possible moment.  This makes it possible to encode exactly 4GB
+ * through a 32-bit encoder. */
+static int
+xd3_decode_init_window (xd3_stream *stream)
+{
+  stream->dec_cpylen = 0;
+  stream->dec_cpyoff = 0;
+  stream->dec_cksumbytes = 0;
+
+  xd3_init_cache (& stream->acache);
+
+  return 0;
+}
+
+/* Allocates buffer space for the target window and possibly the
+ * VCD_TARGET copy-window.  Also sets the base of the two copy
+ * segments. */
+static int
+xd3_decode_setup_buffers (xd3_stream *stream)
+{
+  /* If VCD_TARGET is set then the previous buffer may be reused. */
+  if (stream->dec_win_ind & VCD_TARGET)
+    {
+      /* But this implementation only supports copying from the last
+       * target window.  If the offset is outside that range, it can't
+       * be done. */
+      if (stream->dec_cpyoff < stream->dec_laststart)
+	{
+	  stream->msg = "unsupported VCD_TARGET offset";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* See if the two windows are the same.  This indicates the
+       * first time VCD_TARGET is used.  This causes a second buffer
+       * to be allocated, after that the two are swapped in the
+       * DEC_FINISH case. */
+      if (stream->dec_lastwin == stream->next_out)
+	{
+	  stream->next_out  = NULL;
+	  stream->space_out = 0;
+	}
+
+      // TODO: VCD_TARGET mode, this is broken
+      stream->dec_cpyaddrbase = stream->dec_lastwin +
+	(usize_t) (stream->dec_cpyoff - stream->dec_laststart);
+    }
+
+  /* See if the current output window is large enough. */
+  if (stream->space_out < stream->dec_tgtlen)
+    {
+      xd3_free (stream, stream->dec_buffer);
+
+      stream->space_out =
+	xd3_round_blksize (stream->dec_tgtlen, XD3_ALLOCSIZE);
+
+      if ((stream->dec_buffer =
+	   (uint8_t*) xd3_alloc (stream, stream->space_out, 1)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      stream->next_out = stream->dec_buffer;
+    }
+
+  /* dec_tgtaddrbase refers to an invalid base address, but it is
+   * always used with a sufficiently large instruction offset (i.e.,
+   * beyond the copy window).  This condition is enforced by
+   * xd3_decode_output_halfinst. */
+  stream->dec_tgtaddrbase = stream->next_out - stream->dec_cpylen;
+
+  return 0;
+}
+
+static int
+xd3_decode_allocate (xd3_stream  *stream,
+		     usize_t       size,
+		     uint8_t    **buf_ptr,
+		     usize_t      *buf_alloc)
+{
+  if (*buf_ptr != NULL && *buf_alloc < size)
+    {
+      xd3_free (stream, *buf_ptr);
+      *buf_ptr = NULL;
+    }
+
+  if (*buf_ptr == NULL)
+    {
+      *buf_alloc = xd3_round_blksize (size, XD3_ALLOCSIZE);
+
+      if ((*buf_ptr = (uint8_t*) xd3_alloc (stream, *buf_alloc, 1)) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_section (xd3_stream *stream,
+		    xd3_desect *section,
+		    xd3_decode_state nstate,
+		    int copy)
+{
+  XD3_ASSERT (section->pos <= section->size);
+  XD3_ASSERT (stream->dec_state != nstate);
+
+  if (section->pos < section->size)
+    {
+      usize_t sect_take;
+
+      if (stream->avail_in == 0)
+	{
+	  return XD3_INPUT;
+	}
+
+      if ((copy == 0) && (section->pos == 0))
+	{
+	  /* No allocation/copy needed */
+	  section->buf = stream->next_in;
+	  sect_take    = section->size;
+	}
+      else
+	{
+	  usize_t sect_need = section->size - section->pos;
+
+	  /* Allocate and copy */
+	  sect_take = min (sect_need, stream->avail_in);
+
+	  if (section->pos == 0)
+	    {
+	      int ret;
+
+	      if ((ret = xd3_decode_allocate (stream,
+					      section->size,
+					      & section->copied1,
+					      & section->alloc1)))
+		{
+		  return ret;
+		}
+
+	      section->buf = section->copied1;
+	    }
+
+	  memcpy (section->copied1 + section->pos,
+		  stream->next_in,
+		  sect_take);
+	}
+
+      section->pos += sect_take;
+
+      stream->dec_winbytes += sect_take;
+
+      DECODE_INPUT (sect_take);
+    }
+
+  if (section->pos < section->size)
+    {
+      stream->msg = "further input required";
+      return XD3_INPUT;
+    }
+
+  XD3_ASSERT (section->pos == section->size);
+
+  stream->dec_state = nstate;
+  section->buf_max  = section->buf + section->size;
+  section->pos      = 0;
+  return 0;
+}
+
+/* Decode the size and address for half of an instruction (i.e., a
+ * single opcode).  This updates the stream->dec_position, which are
+ * bytes already output prior to processing this instruction.  Perform
+ * bounds checking for sizes and copy addresses, which uses the
+ * dec_position (which is why these checks are done here). */
+static int
+xd3_decode_parse_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  int ret;
+
+  /* If the size from the instruction table is zero then read a size value. */
+  if ((inst->size == 0) &&
+      (ret = xd3_read_size (stream,
+ 			    & stream->inst_sect.buf,
+			      stream->inst_sect.buf_max,
+			    & inst->size)))
+    {
+      return XD3_INVALID_INPUT;
+    }
+
+  /* For copy instructions, read address. */
+  if (inst->type >= XD3_CPY)
+    {
+      IF_DEBUG2 ({
+	static int cnt = 0;
+	DP(RINT "DECODE:%u: COPY at %"Q"u (winoffset %u) size %u winaddr %u\n",
+		 cnt++,
+		 stream->total_out + (stream->dec_position -
+				      stream->dec_cpylen),
+		 (stream->dec_position - stream->dec_cpylen),
+		 inst->size,
+		 inst->addr);
+      });
+
+      if ((ret = xd3_decode_address (stream,
+				     stream->dec_position,
+				     inst->type - XD3_CPY,
+				     & stream->addr_sect.buf,
+				     stream->addr_sect.buf_max,
+				     & inst->addr)))
+	{
+	  return ret;
+	}
+
+      /* Cannot copy an address before it is filled-in. */
+      if (inst->addr >= stream->dec_position)
+	{
+	  stream->msg = "address too large";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check: a VCD_TARGET or VCD_SOURCE copy cannot exceed the remaining
+       * buffer space in its own segment. */
+      if (inst->addr < stream->dec_cpylen &&
+	  inst->addr + inst->size > stream->dec_cpylen)
+	{
+	  stream->msg = "size too large";
+	  return XD3_INVALID_INPUT;
+	}
+    }
+  else
+    {
+      IF_DEBUG2 ({
+	if (inst->type == XD3_ADD)
+	  {
+	    static int cnt;
+	    DP(RINT "DECODE:%d: ADD at %"Q"u (winoffset %u) size %u\n",
+	       cnt++,
+	       (stream->total_out + stream->dec_position - stream->dec_cpylen),
+	       stream->dec_position - stream->dec_cpylen,
+	       inst->size);
+	  }
+	else
+	  {
+	    static int cnt;
+	    XD3_ASSERT (inst->type == XD3_RUN);
+	    DP(RINT "DECODE:%d: RUN at %"Q"u (winoffset %u) size %u\n",
+	       cnt++,
+	       stream->total_out + stream->dec_position - stream->dec_cpylen,
+	       stream->dec_position - stream->dec_cpylen,
+	       inst->size);
+	  }
+      });
+    }
+
+  /* Check: The instruction will not overflow the output buffer. */
+  if (stream->dec_position + inst->size > stream->dec_maxpos)
+    {
+      stream->msg = "size too large";
+      return XD3_INVALID_INPUT;
+    }
+
+  stream->dec_position += inst->size;
+  return 0;
+}
+
+/* Decode a single opcode and then decode the two half-instructions. */
+static int
+xd3_decode_instruction (xd3_stream *stream)
+{
+  int ret;
+  const xd3_dinst *inst;
+
+  if (stream->inst_sect.buf == stream->inst_sect.buf_max)
+    {
+      stream->msg = "instruction underflow";
+      return XD3_INVALID_INPUT;
+    }
+
+  inst = &stream->code_table[*stream->inst_sect.buf++];
+
+  stream->dec_current1.type = inst->type1;
+  stream->dec_current2.type = inst->type2;
+  stream->dec_current1.size = inst->size1;
+  stream->dec_current2.size = inst->size2;
+
+  /* For each instruction with a real operation, decode the
+   * corresponding size and addresses if necessary.  Assume a
+   * code-table may have NOOP in either position, although this is
+   * unlikely. */
+  if (inst->type1 != XD3_NOOP &&
+      (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current1)))
+    {
+      return ret;
+    }
+  if (inst->type2 != XD3_NOOP &&
+      (ret = xd3_decode_parse_halfinst (stream, & stream->dec_current2)))
+    {
+      return ret;
+    }
+  return 0;
+}
+
+/* Output the result of a single half-instruction. OPT: This the
+   decoder hotspot. */
+static int
+xd3_decode_output_halfinst (xd3_stream *stream, xd3_hinst *inst)
+{
+  /* To make this reentrant, set take = min (inst->size, available
+     space)... */
+  usize_t take = inst->size;
+
+  XD3_ASSERT (inst->type != XD3_NOOP);
+
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      {
+	/* Only require a single data byte. */
+	if (stream->data_sect.buf == stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	memset (stream->next_out + stream->avail_out,
+		stream->data_sect.buf[0],
+		take);
+
+	stream->data_sect.buf += 1;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    case XD3_ADD:
+      {
+	/* Require at least TAKE data bytes. */
+	if (stream->data_sect.buf + take > stream->data_sect.buf_max)
+	  {
+	    stream->msg = "data underflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	memcpy (stream->next_out + stream->avail_out,
+		stream->data_sect.buf,
+		take);
+
+	stream->data_sect.buf += take;
+	stream->avail_out += take;
+	inst->type = XD3_NOOP;
+	break;
+      }
+    default:
+      {
+	usize_t i;
+	const uint8_t *src;
+	uint8_t *dst;
+
+	/* See if it copies from the VCD_TARGET/VCD_SOURCE window or
+	 * the target window.  Out-of-bounds checks for the addresses
+	 * and sizes are performed in xd3_decode_parse_halfinst. */
+	if (inst->addr < stream->dec_cpylen)
+	  {
+	    if (stream->dec_win_ind & VCD_TARGET)
+	      {
+		/* For VCD_TARGET we know the entire range is
+		 * in-memory, as established by
+		 * decode_setup_buffers.
+                 *
+                 * TODO: this is totally bogus, VCD_TARGET won't work.
+                 */
+		src = stream->dec_cpyaddrbase + inst->addr;
+		inst->type = XD3_NOOP;
+		inst->size = 0;
+	      }
+	    else
+	      {
+		/* In this case we have to read a source block, which
+		 * could return control to the caller.  We need to
+		 * know the first block number needed for this
+		 * copy. */
+		xd3_source *source;
+		xoff_t block;
+		usize_t blkoff;
+		usize_t blksize;
+		int ret;
+
+	      more:
+
+		source  = stream->src;
+		block   = source->cpyoff_blocks;
+		blkoff  = source->cpyoff_blkoff + inst->addr;
+		blksize = source->blksize;
+
+ 		while (blkoff >= blksize)
+		  {
+		    block  += 1;
+		    blkoff -= blksize;
+		  }
+
+		if ((ret = xd3_getblk (stream, block)))
+		  {
+		    /* could be a XD3_GETSRCBLK failure. */
+		    if (ret == XD3_TOOFARBACK)
+		      {
+			ret = XD3_INTERNAL;
+		      }
+		    return ret;
+		  }
+
+		src = source->curblk + blkoff;
+
+		/* This block either contains enough data or the source file
+		 * is short. */
+		if ((source->onblk != blksize) &&
+		    (blkoff + take > source->onblk))
+		  {
+		    stream->msg = "source file too short";
+		    return XD3_INVALID_INPUT;
+
+		  }
+
+		XD3_ASSERT (blkoff != blksize);
+
+		if (blkoff + take <= blksize)
+		  {
+		    inst->type = XD3_NOOP;
+		    inst->size = 0;
+		  }
+		else
+		  {
+		    /* This block doesn't contain all the data, modify
+		     * the instruction, do not set to XD3_NOOP. */
+		    take = blksize - blkoff;
+		    inst->size -= take;
+		    inst->addr += take;
+		  }
+	      }
+	  }
+	else
+	  {
+	    /* For a target-window copy, we know the entire range is
+	     * in-memory.  The dec_tgtaddrbase is negatively offset by
+	     * dec_cpylen because the addresses start beyond that
+	     * point. */
+	    src = stream->dec_tgtaddrbase + inst->addr;
+	    inst->type = XD3_NOOP;
+	    inst->size = 0;
+	  }
+
+ 	dst = stream->next_out + stream->avail_out;
+
+	stream->avail_out += take;
+
+	/* Can't just memcpy here due to possible overlap. */
+	for (i = take; i != 0; i -= 1)
+	  {
+	    *dst++ = *src++;
+	  }
+
+	take = inst->size;
+
+	/* If there is more to copy, call getblk again. */
+	if (inst->type != XD3_NOOP)
+	  {
+	    XD3_ASSERT (take > 0);
+	    goto more;
+	  }
+	else
+	  {
+	    XD3_ASSERT (take == 0);
+	  }
+      }
+    }
+
+  return 0;
+}
+
+static int
+xd3_decode_finish_window (xd3_stream *stream)
+{
+  stream->dec_winbytes  = 0;
+  stream->dec_state     = DEC_FINISH;
+
+  stream->data_sect.pos = 0;
+  stream->inst_sect.pos = 0;
+  stream->addr_sect.pos = 0;
+
+  return XD3_OUTPUT;
+}
+
+static int
+xd3_decode_secondary_sections (xd3_stream *secondary_stream)
+{
+#if SECONDARY_ANY
+  int ret;
+#define DECODE_SECONDARY_SECTION(UPPER,LOWER) \
+  ((secondary_stream->dec_del_ind & VCD_ ## UPPER ## COMP) && \
+   (ret = xd3_decode_secondary (secondary_stream, \
+				& secondary_stream-> LOWER ## _sect,	\
+				& xd3_sec_ ## LOWER (secondary_stream))))
+
+  if (DECODE_SECONDARY_SECTION (DATA, data) ||
+      DECODE_SECONDARY_SECTION (INST, inst) ||
+      DECODE_SECONDARY_SECTION (ADDR, addr))
+    {
+      return ret;
+    }
+#undef DECODE_SECONDARY_SECTION
+#endif
+  return 0;
+}
+
+static int
+xd3_decode_sections (xd3_stream *stream)
+{
+  usize_t need, more, take;
+  int copy, ret;
+
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Nothing left to do. */
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* To avoid copying, need this much data available */
+  need = (stream->inst_sect.size +
+	  stream->addr_sect.size +
+	  stream->data_sect.size);
+
+  /* The window may be entirely processed. */
+  XD3_ASSERT (stream->dec_winbytes <= need);
+
+  /* Compute how much more input is needed. */
+  more = (need - stream->dec_winbytes);
+
+  /* How much to consume. */
+  take = min (more, stream->avail_in);
+
+  /* See if the input is completely available, to avoid copy. */
+  copy = (take != more);
+
+  /* If the window is skipped... */
+  if ((stream->flags & XD3_SKIP_WINDOW) != 0)
+    {
+      /* Skip the available input. */
+      DECODE_INPUT (take);
+
+      stream->dec_winbytes += take;
+
+      if (copy)
+	{
+	  stream->msg = "further input required";
+	  return XD3_INPUT;
+	}
+
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* Process all but the DATA section. */
+  switch (stream->dec_state)
+    {
+    default:
+      stream->msg = "internal error";
+      return XD3_INVALID_INPUT;
+
+    case DEC_DATA:
+      if ((ret = xd3_decode_section (stream, & stream->data_sect,
+				     DEC_INST, copy))) { return ret; }
+    case DEC_INST:
+      if ((ret = xd3_decode_section (stream, & stream->inst_sect,
+				     DEC_ADDR, copy))) { return ret; }
+    case DEC_ADDR:
+      if ((ret = xd3_decode_section (stream, & stream->addr_sect,
+				     DEC_EMIT, copy))) { return ret; }
+    }
+
+  XD3_ASSERT (stream->dec_winbytes == need);
+
+  if ((ret = xd3_decode_secondary_sections (stream))) { return ret; }
+
+  if (stream->flags & XD3_SKIP_EMIT)
+    {
+      return xd3_decode_finish_window (stream);
+    }
+
+  /* OPT: A possible optimization is to avoid allocating memory in
+   * decode_setup_buffers and to avoid a large memcpy when the window
+   * consists of a single VCD_SOURCE copy instruction.  The only
+   * potential problem is if the following window is a VCD_TARGET,
+   * then you need to remember... */
+  if ((ret = xd3_decode_setup_buffers (stream))) { return ret; }
+
+  return 0;
+}
+
+static int
+xd3_decode_emit (xd3_stream *stream)
+{
+  int ret;
+
+  /* Produce output: originally structured to allow reentrant code
+   * that fills as much of the output buffer as possible, but VCDIFF
+   * semantics allows to copy from anywhere from the target window, so
+   * instead allocate a sufficiently sized buffer after the target
+   * window length is decoded.
+   *
+   * This code still needs to be reentrant to allow XD3_GETSRCBLK to
+   * return control.  This is handled by setting the
+   * stream->dec_currentN instruction types to XD3_NOOP after they
+   * have been processed. */
+  XD3_ASSERT (! (stream->flags & XD3_SKIP_EMIT));
+  XD3_ASSERT (stream->dec_tgtlen <= stream->space_out);
+
+  while (stream->inst_sect.buf != stream->inst_sect.buf_max ||
+	 stream->dec_current1.type != XD3_NOOP ||
+	 stream->dec_current2.type != XD3_NOOP)
+    {
+      /* Decode next instruction pair. */
+      if ((stream->dec_current1.type == XD3_NOOP) &&
+	  (stream->dec_current2.type == XD3_NOOP) &&
+	  (ret = xd3_decode_instruction (stream))) { return ret; }
+
+      /* Output for each instruction. */
+      if ((stream->dec_current1.type != XD3_NOOP) &&
+	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current1)))
+	{
+	  return ret;
+	}
+
+      if ((stream->dec_current2.type != XD3_NOOP) &&
+	  (ret = xd3_decode_output_halfinst (stream, & stream->dec_current2)))
+	{
+	  return ret;
+	}
+    }
+
+  if (stream->avail_out != stream->dec_tgtlen)
+    {
+      IF_DEBUG1 (DP(RINT "AVAIL_OUT(%d) != DEC_TGTLEN(%d)\n",
+		    stream->avail_out, stream->dec_tgtlen));
+      stream->msg = "wrong window length";
+      return XD3_INVALID_INPUT;
+    }
+
+  if (stream->data_sect.buf != stream->data_sect.buf_max)
+    {
+      stream->msg = "extra data section";
+      return XD3_INVALID_INPUT;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      stream->msg = "extra address section";
+      return XD3_INVALID_INPUT;
+    }
+
+  /* OPT: Should cksum computation be combined with the above loop? */
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0 &&
+      (stream->flags & XD3_ADLER32_NOVER) == 0)
+    {
+      uint32_t a32 = adler32 (1L, stream->next_out, stream->avail_out);
+
+      if (a32 != stream->dec_adler32)
+	{
+	  stream->msg = "target window checksum mismatch";
+	  return XD3_INVALID_INPUT;
+	}
+    }
+
+  /* Finished with a window. */
+  return xd3_decode_finish_window (stream);
+}
+
+int
+xd3_decode_input (xd3_stream *stream)
+{
+  int ret;
+
+  if (stream->enc_state != 0)
+    {
+      stream->msg = "encoder/decoder transition";
+      return XD3_INVALID_INPUT;
+    }
+
+#define BYTE_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_byte (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+#define OFFSET_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_offset (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+#define SIZE_CASE(expr,x,nstate) \
+      do { \
+      if ( (expr) && \
+           ((ret = xd3_decode_size (stream, & (x))) != 0) ) { return ret; } \
+      stream->dec_state = (nstate); \
+      } while (0)
+
+  switch (stream->dec_state)
+    {
+    case DEC_VCHEAD:
+      {
+	if ((ret = xd3_decode_bytes (stream, stream->dec_magic,
+				     & stream->dec_magicbytes, 4)))
+	  {
+	    return ret;
+	  }
+
+	if (stream->dec_magic[0] != VCDIFF_MAGIC1 ||
+	    stream->dec_magic[1] != VCDIFF_MAGIC2 ||
+	    stream->dec_magic[2] != VCDIFF_MAGIC3)
+	  {
+	    stream->msg = "not a VCDIFF input";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	if (stream->dec_magic[3] != 0)
+	  {
+	    stream->msg = "VCDIFF input version > 0 is not supported";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_state = DEC_HDRIND;
+      }
+    case DEC_HDRIND:
+      {
+	if ((ret = xd3_decode_byte (stream, & stream->dec_hdr_ind)))
+	  {
+	    return ret;
+	  }
+
+	if ((stream->dec_hdr_ind & VCD_INVHDR) != 0)
+	  {
+	    stream->msg = "unrecognized header indicator bits set";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_state = DEC_SECONDID;
+      }
+
+    case DEC_SECONDID:
+      /* Secondary compressor ID: only if VCD_SECONDARY is set */
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
+	{
+	  BYTE_CASE (1, stream->dec_secondid, DEC_TABLEN);
+
+	  switch (stream->dec_secondid)
+	    {
+	    case VCD_FGK_ID:
+	      FGK_CASE (stream);
+	    case VCD_DJW_ID:
+	      DJW_CASE (stream);
+	    default:
+	      stream->msg = "unknown secondary compressor ID";
+	      return XD3_INVALID_INPUT;
+	    }
+	}
+
+    case DEC_TABLEN:
+      /* Length of code table data: only if VCD_CODETABLE is set */
+      SIZE_CASE ((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		 stream->dec_codetblsz, DEC_NEAR);
+
+      /* The codetblsz counts the two NEAR/SAME bytes */
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0) {
+	if (stream->dec_codetblsz <= 2) {
+	  stream->msg = "invalid code table size";
+	  return ENOMEM;
+	}
+	stream->dec_codetblsz -= 2;
+      }
+    case DEC_NEAR:
+      /* Near modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		stream->acache.s_near, DEC_SAME);
+    case DEC_SAME:
+      /* Same modes: only if VCD_CODETABLE is set */
+      BYTE_CASE((stream->dec_hdr_ind & VCD_CODETABLE) != 0,
+		stream->acache.s_same, DEC_TABDAT);
+    case DEC_TABDAT:
+      /* Compressed code table data */
+
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
+	{
+	  /* Get the code table data. */
+	  if ((stream->dec_codetbl == NULL) &&
+	      (stream->dec_codetbl =
+	       (uint8_t*) xd3_alloc (stream,
+				     stream->dec_codetblsz, 1)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_codetbl,
+				       & stream->dec_codetblbytes,
+				       stream->dec_codetblsz)))
+	    {
+	      return ret;
+	    }
+
+	  if ((ret = xd3_apply_table_encoding (stream, stream->dec_codetbl,
+					       stream->dec_codetblbytes)))
+	    {
+	      return ret;
+	    }
+	}
+      else
+	{
+	  /* Use the default table. */
+	  stream->acache.s_near = __rfc3284_code_table_desc.near_modes;
+	  stream->acache.s_same = __rfc3284_code_table_desc.same_modes;
+	  stream->code_table    = xd3_rfc3284_code_table ();
+	}
+
+      if ((ret = xd3_alloc_cache (stream))) { return ret; }
+
+      stream->dec_state = DEC_APPLEN;
+
+    case DEC_APPLEN:
+      /* Length of application data */
+      SIZE_CASE((stream->dec_hdr_ind & VCD_APPHEADER) != 0,
+		stream->dec_appheadsz, DEC_APPDAT);
+
+    case DEC_APPDAT:
+      /* Application data */
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  /* Note: we add an additional byte for padding, to allow
+	     0-termination. */
+	  if ((stream->dec_appheader == NULL) &&
+	      (stream->dec_appheader =
+	       (uint8_t*) xd3_alloc (stream,
+				     stream->dec_appheadsz+1, 1)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+
+	  stream->dec_appheader[stream->dec_appheadsz] = 0;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_appheader,
+				       & stream->dec_appheadbytes,
+				       stream->dec_appheadsz)))
+	    {
+	      return ret;
+	    }
+	}
+
+      stream->dec_hdrsize = stream->total_in;
+      stream->dec_state = DEC_WININD;
+
+    case DEC_WININD:
+      {
+	/* Start of a window: the window indicator */
+	if ((ret = xd3_decode_byte (stream, & stream->dec_win_ind)))
+	  {
+	    return ret;
+	  }
+
+	stream->current_window = stream->dec_window_count;
+
+	if (XOFF_T_OVERFLOW (stream->dec_winstart, stream->dec_tgtlen))
+	  {
+	    stream->msg = "decoder file offset overflow";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	stream->dec_winstart += stream->dec_tgtlen;
+
+	if ((stream->dec_win_ind & VCD_INVWIN) != 0)
+	  {
+	    stream->msg = "unrecognized window indicator bits set";
+	    return XD3_INVALID_INPUT;
+	  }
+
+	if ((ret = xd3_decode_init_window (stream))) { return ret; }
+
+	stream->dec_state = DEC_CPYLEN;
+
+	IF_DEBUG1 (DP(RINT "--------- TARGET WINDOW %"Q"u -----------\n",
+		      stream->current_window));
+      }
+
+    case DEC_CPYLEN:
+      /* Copy window length: only if VCD_SOURCE or VCD_TARGET is set */
+      SIZE_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpylen,
+		DEC_CPYOFF);
+
+      /* Set the initial, logical decoder position (HERE address) in
+       * dec_position.  This is set to just after the source/copy
+       * window, as we are just about to output the first byte of
+       * target window. */
+      stream->dec_position = stream->dec_cpylen;
+
+    case DEC_CPYOFF:
+      /* Copy window offset: only if VCD_SOURCE or VCD_TARGET is set */
+      OFFSET_CASE(SRCORTGT (stream->dec_win_ind), stream->dec_cpyoff,
+		  DEC_ENCLEN);
+
+      /* Copy offset and copy length may not overflow. */
+      if (XOFF_T_OVERFLOW (stream->dec_cpyoff, stream->dec_cpylen))
+	{
+	  stream->msg = "decoder copy window overflows a file offset";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check copy window bounds: VCD_TARGET window may not exceed
+	 current position. */
+      if ((stream->dec_win_ind & VCD_TARGET) &&
+	  (stream->dec_cpyoff + (xoff_t) stream->dec_cpylen >
+	   stream->dec_winstart))
+	{
+	  stream->msg = "VCD_TARGET window out of bounds";
+	  return XD3_INVALID_INPUT;
+	}
+
+    case DEC_ENCLEN:
+      /* Length of the delta encoding */
+      SIZE_CASE(1, stream->dec_enclen, DEC_TGTLEN);
+    case DEC_TGTLEN:
+      /* Length of target window */
+      SIZE_CASE(1, stream->dec_tgtlen, DEC_DELIND);
+
+      /* Set the maximum decoder position, beyond which we should not
+       * decode any data.  This is the maximum value for dec_position.
+       * This may not exceed the size of a usize_t. */
+      if (USIZE_T_OVERFLOW (stream->dec_cpylen, stream->dec_tgtlen))
+	{
+	  stream->msg = "decoder target window overflows a usize_t";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Check for malicious files. */
+      if (stream->dec_tgtlen > XD3_HARDMAXWINSIZE)
+	{
+	  stream->msg = "hard window size exceeded";
+	  return XD3_INVALID_INPUT;
+	}
+
+      stream->dec_maxpos = stream->dec_cpylen + stream->dec_tgtlen;
+
+    case DEC_DELIND:
+      /* Delta indicator */
+      BYTE_CASE(1, stream->dec_del_ind, DEC_DATALEN);
+
+      if ((stream->dec_del_ind & VCD_INVDEL) != 0)
+	{
+	  stream->msg = "unrecognized delta indicator bits set";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Delta indicator is only used with secondary compression. */
+      if ((stream->dec_del_ind != 0) && (stream->sec_type == NULL))
+	{
+	  stream->msg = "invalid delta indicator bits set";
+	  return XD3_INVALID_INPUT;
+	}
+
+      /* Section lengths */
+    case DEC_DATALEN:
+      SIZE_CASE(1, stream->data_sect.size, DEC_INSTLEN);
+    case DEC_INSTLEN:
+      SIZE_CASE(1, stream->inst_sect.size, DEC_ADDRLEN);
+    case DEC_ADDRLEN:
+      SIZE_CASE(1, stream->addr_sect.size, DEC_CKSUM);
+
+    case DEC_CKSUM:
+      /* Window checksum. */
+      if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+	{
+	  int i;
+
+	  if ((ret = xd3_decode_bytes (stream, stream->dec_cksum,
+				       & stream->dec_cksumbytes, 4)))
+	    {
+	      return ret;
+	    }
+
+	  for (i = 0; i < 4; i += 1)
+	    {
+	      stream->dec_adler32 =
+		(stream->dec_adler32 << 8) | stream->dec_cksum[i];
+	    }
+	}
+
+      stream->dec_state = DEC_DATA;
+
+      /* Check dec_enclen for redundency, otherwise it is not really used. */
+      {
+	usize_t enclen_check =
+	  (1 + (xd3_sizeof_size (stream->dec_tgtlen) +
+		xd3_sizeof_size (stream->data_sect.size) +
+		xd3_sizeof_size (stream->inst_sect.size) +
+		xd3_sizeof_size (stream->addr_sect.size)) +
+	   stream->data_sect.size +
+	   stream->inst_sect.size +
+	   stream->addr_sect.size +
+	   ((stream->dec_win_ind & VCD_ADLER32) ? 4 : 0));
+
+	if (stream->dec_enclen != enclen_check)
+	  {
+	    stream->msg = "incorrect encoding length (redundent)";
+	    return XD3_INVALID_INPUT;
+	  }
+      }
+
+      /* Returning here gives the application a chance to inspect the
+       * header, skip the window, etc. */
+      if (stream->current_window == 0) { return XD3_GOTHEADER; }
+      else                             { return XD3_WINSTART; }
+
+    case DEC_DATA:
+    case DEC_INST:
+    case DEC_ADDR:
+      /* Next read the three sections. */
+     if ((ret = xd3_decode_sections (stream))) { return ret; }
+
+    case DEC_EMIT:
+
+      /* To speed VCD_SOURCE block-address calculations, the source
+       * cpyoff_blocks and cpyoff_blkoff are pre-computed. */
+      if (stream->dec_win_ind & VCD_SOURCE)
+	{
+	  xd3_source *src = stream->src;
+
+	  if (src == NULL)
+	    {
+	      stream->msg = "source input required";
+	      return XD3_INVALID_INPUT;
+	    }
+
+	  xd3_blksize_div(stream->dec_cpyoff, src,
+			  &src->cpyoff_blocks,
+			  &src->cpyoff_blkoff);
+	}
+
+      /* xd3_decode_emit returns XD3_OUTPUT on every success. */
+      if ((ret = xd3_decode_emit (stream)) == XD3_OUTPUT)
+	{
+	  stream->total_out += (xoff_t) stream->avail_out;
+	}
+
+      return ret;
+
+    case DEC_FINISH:
+      {
+	if (stream->dec_win_ind & VCD_TARGET)
+	  {
+	    if (stream->dec_lastwin == NULL)
+	      {
+		stream->dec_lastwin   = stream->next_out;
+		stream->dec_lastspace = stream->space_out;
+	      }
+	    else
+	      {
+		xd3_swap_uint8p (& stream->dec_lastwin,
+				 & stream->next_out);
+		xd3_swap_usize_t (& stream->dec_lastspace,
+				  & stream->space_out);
+	      }
+	  }
+
+	stream->dec_lastlen   = stream->dec_tgtlen;
+	stream->dec_laststart = stream->dec_winstart;
+	stream->dec_window_count += 1;
+
+	/* Note: the updates to dec_winstart & current_window are
+	 * deferred until after the next DEC_WININD byte is read. */
+	stream->dec_state = DEC_WININD;
+	return XD3_WINFINISH;
+      }
+
+    default:
+      stream->msg = "invalid state";
+      return XD3_INVALID_INPUT;
+    }
+}
+
+#endif // _XDELTA3_DECODE_H_
diff --git a/xdelta3-djw.h b/xdelta3-djw.h
new file mode 100644
index 0000000..24f5b81
--- /dev/null
+++ b/xdelta3-djw.h
@@ -0,0 +1,1828 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* TODO: This code needs a thorough round of commenting.  There is
+ * some slop in the declaration of arrays, which are maybe one element
+ * larger than they need to be and comments would help clear it up. */
+
+#ifndef _XDELTA3_DJW_H_
+#define _XDELTA3_DJW_H_
+
+/* The following people deserve much credit for the algorithms and
+ * techniques contained in this file:
+
+ Julian Seward
+ Bzip2 sources, implementation of the multi-table Huffman technique.
+
+ Jean-loup Gailly and Mark Adler and L. Peter Deutsch
+ Zlib source code, RFC 1951
+
+ Daniel S. Hirschberg and Debra A. LeLewer
+ "Efficient Decoding of Prefix Codes"
+ Communications of the ACM, April 1990 33(4).
+
+ David J. Wheeler
+ Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
+ This contains the idea behind the multi-table Huffman and 1-2 coding
+ techniques.
+ ftp://ftp.cl.cam.ac.uk/users/djw3/
+
+*/
+
+/* OPT: during the multi-table iteration, pick the worst-overall
+ * performing table and replace it with exactly the frequencies of the
+ * worst-overall performing sector or N-worst performing sectors. */
+
+/* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with
+ * the Bzip prefix coding strategy.  xdfs-0.256 contains the last of
+ * the other-format tests, including RFC1950 and the RFC1950+MTF
+ * tests. */
+
+#define DJW_MAX_CODELEN      20 /* Maximum length of an alphabet code. */
+
+/* Code lengths are themselves code-length encoded, so the total number of
+ * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
+#define DJW_TOTAL_CODES      (DJW_MAX_CODELEN+2)
+
+#define RUN_0                0 /* Symbols used in MTF+1/2 coding. */
+#define RUN_1                1
+
+/* Number of code lengths always encoded (djw_encode_basic array) */
+#define DJW_BASIC_CODES      5  
+#define DJW_RUN_CODES        2  /* Number of run codes */
+
+/* Offset of extra codes */
+#define DJW_EXTRA_12OFFSET   (DJW_BASIC_CODES + DJW_RUN_CODES)
+
+/* Number of optionally encoded code lengths (djw_encode_extra array) */
+#define DJW_EXTRA_CODES      15
+
+/* Number of bits to code [0-DJW_EXTRA_CODES] */
+#define DJW_EXTRA_CODE_BITS  4  
+
+#define DJW_MAX_GROUPS       8  /* Max number of group coding tables */
+#define DJW_GROUP_BITS       3  /* Number of bits to code [1-DJW_MAX_GROUPS] */
+
+#define DJW_SECTORSZ_MULT     5  /* Multiplier for encoded sectorsz */
+#define DJW_SECTORSZ_BITS     5  /* Number of bits to code group size */
+#define DJW_SECTORSZ_MAX      ((1 << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
+
+/* Maximum number of iterations to find group tables. */
+#define DJW_MAX_ITER         6
+/* Minimum number of bits an iteration must reduce coding by. */
+#define DJW_MIN_IMPROVEMENT  20 
+
+/* Maximum code length of a prefix code length */
+#define DJW_MAX_CLCLEN       15
+
+/* Number of bits to code [0-DJW_MAX_CLCLEN] */
+#define DJW_CLCLEN_BITS      4  
+
+#define DJW_MAX_GBCLEN       7  /* Maximum code length of a group selector */
+
+/* Number of bits to code [0-DJW_MAX_GBCLEN]
+ * TODO: Actually, should never have zero code lengths here, or else a group
+ * went unused.  Write a test for this: if a group goes unused, eliminate
+ * it? */
+#define DJW_GBCLEN_BITS      3  
+
+/* It has to save at least this many bits... */
+#define EFFICIENCY_BITS      16
+
+typedef struct _djw_stream   djw_stream;
+typedef struct _djw_heapen   djw_heapen;
+typedef struct _djw_prefix   djw_prefix;
+typedef uint32_t             djw_weight;
+
+struct _djw_heapen
+{
+  uint32_t depth;
+  uint32_t freq;
+  uint32_t parent;
+};
+
+struct _djw_prefix
+{
+  usize_t   scount;
+  uint8_t *symbol;
+  usize_t   mcount;
+  uint8_t *mtfsym;
+  uint8_t *repcnt;
+};
+
+struct _djw_stream
+{
+  int unused;
+};
+
+/* Each Huffman table consists of 256 "code length" (CLEN) codes,
+ * which are themselves Huffman coded after eliminating repeats and
+ * move-to-front coding.  The prefix consists of all the CLEN codes in
+ * djw_encode_basic plus a 4-bit value stating how many of the
+ * djw_encode_extra codes are actually coded (the rest are presumed
+ * zero, or unused CLEN codes).
+ *
+ * These values of these two arrays were arrived at by studying the
+ * distribution of min and max clen over a collection of DATA, INST,
+ * and ADDR inputs.  The goal is to specify the order of
+ * djw_extra_codes that is most likely to minimize the number of extra
+ * codes that must be encoded.
+ *
+ * Results: 158896 sections were counted by compressing files (window
+ * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444
+ * )`
+ *
+ * The distribution of CLEN codes for each efficient invocation of the
+ * secondary compressor (taking the best number of groups/sector size)
+ * was recorded.  Then we look at the distribution of min and max clen
+ * values, counting the number of times the value C_low is less than
+ * the min and C_high is greater than the max.  Values >= C_high and
+ * <= C_low will not have their lengths coded.  The results are sorted
+ * and the least likely 15 are placed into the djw_encode_extra[]
+ * array in order.  These values are used as the initial MTF ordering.
+
+ clow[1] = 155119
+ clow[2] = 140325
+ clow[3] = 84072
+ ---
+ clow[4] = 7225
+ clow[5] = 1093
+ clow[6] = 215
+ ---
+ chigh[4] = 1
+ chigh[5] = 30
+ chigh[6] = 218
+ chigh[7] = 2060
+ chigh[8] = 13271
+ ---
+ chigh[9] = 39463
+ chigh[10] = 77360
+ chigh[11] = 118298
+ chigh[12] = 141360
+ chigh[13] = 154086
+ chigh[14] = 157967
+ chigh[15] = 158603
+ chigh[16] = 158864
+ chigh[17] = 158893
+ chigh[18] = 158895
+ chigh[19] = 158896
+ chigh[20] = 158896
+
+*/
+
+static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
+  {
+    9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20,
+  };
+
+static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
+  {
+    4, 5, 6, 7, 8,
+  };
+
+/*********************************************************************/
+/*                              DECLS                                */
+/*********************************************************************/
+
+static djw_stream*     djw_alloc           (xd3_stream *stream);
+static void            djw_init            (djw_stream *h);
+static void            djw_destroy         (xd3_stream *stream,
+					    djw_stream *h);
+
+#if XD3_ENCODER
+static int             xd3_encode_huff     (xd3_stream   *stream,
+					    djw_stream  *sec_stream,
+					    xd3_output   *input,
+					    xd3_output   *output,
+					    xd3_sec_cfg  *cfg);
+#endif
+
+static int             xd3_decode_huff     (xd3_stream     *stream,
+					    djw_stream    *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/*                             HUFFMAN                               */
+/*********************************************************************/
+
+static djw_stream*
+djw_alloc (xd3_stream *stream)
+{
+  return xd3_alloc (stream, sizeof (djw_stream), 1);
+}
+
+static void
+djw_init (djw_stream *h)
+{
+  /* Fields are initialized prior to use. */
+}
+
+static void
+djw_destroy (xd3_stream *stream,
+	     djw_stream *h)
+{
+  xd3_free (stream, h);
+}
+
+
+/*********************************************************************/
+/*                               HEAP                                */
+/*********************************************************************/
+
+static inline int
+heap_less (const djw_heapen *a, const djw_heapen *b)
+{
+  return a->freq   < b->freq ||
+    (a->freq  == b->freq &&
+     a->depth  < b->depth);
+}
+
+static inline void
+heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e)
+{
+  /* Insert ents[e] into next slot heap[p] */
+  usize_t pp = p/2; /* P's parent */
+
+  while (heap_less (& ents[e], & ents[heap[pp]]))
+    {
+      heap[p] = heap[pp];
+      p  = pp;
+      pp = p/2;
+    }
+
+  heap[p] = e;
+}
+
+static inline djw_heapen*
+heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last)
+{
+  usize_t smallest = heap[1];
+  usize_t p, pc, t;
+
+  /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
+  heap[1] = heap[heap_last+1];
+
+  /* Re-heapify */
+  for (p = 1; ; p = pc)
+    {
+      pc = p*2;
+
+      /* Reached bottom of heap */
+      if (pc > heap_last) { break; }
+
+      /* See if second child is smaller. */
+      if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]]))
+	{
+	  pc += 1;
+	}
+
+      /* If pc is not smaller than p, heap property re-established. */
+      if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
+
+      t = heap[pc];
+      heap[pc] = heap[p];
+      heap[p] = t;
+    }
+
+  return (djw_heapen*) & ents[smallest];
+}
+
+#if XD3_DEBUG
+static void
+heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last)
+{
+  usize_t i;
+  for (i = 1; i <= heap_last; i += 1)
+    {
+      /* Heap property: child not less than parent */
+      XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
+
+      IF_DEBUG1 (DP(RINT "heap[%d] = %u\n", i, ents[heap[i]].freq));
+    }
+}
+#endif
+
+/*********************************************************************/
+/*                             MTF, 1/2                              */
+/*********************************************************************/
+
+static inline usize_t
+djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
+{
+  int k;
+  usize_t sym = mtf[mtf_i];
+
+  for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
+
+  mtf[0] = sym;
+  return sym;
+}
+
+static inline void
+djw_update_1_2 (int *mtf_run, usize_t *mtf_i,
+		uint8_t *mtfsym, djw_weight *freq)
+{
+  int code;
+  
+  do
+    {
+      /* Offset by 1, since any number of RUN_ symbols implies run>0... */
+      *mtf_run -= 1;
+
+      code = (*mtf_run & 1) ? RUN_1 : RUN_0;
+
+      mtfsym[(*mtf_i)++] = code;
+      freq[code] += 1;
+      *mtf_run >>= 1;
+    }
+  while (*mtf_run >= 1);
+
+  *mtf_run = 0;
+}
+
+static void
+djw_init_clen_mtf_1_2 (uint8_t *clmtf)
+{
+  int i, cl_i = 0;
+
+  clmtf[cl_i++] = 0;
+  for (i = 0; i < DJW_BASIC_CODES; i += 1)
+    {
+      clmtf[cl_i++] = djw_encode_12basic[i];
+    }
+  for (i = 0; i < DJW_EXTRA_CODES; i += 1)
+    {
+      clmtf[cl_i++] = djw_encode_12extra[i];
+    }
+}
+
+/*********************************************************************/
+/*                           PREFIX CODES                            */
+/*********************************************************************/
+#if XD3_ENCODER
+static usize_t
+djw_build_prefix (const djw_weight *freq, uint8_t *clen, int asize, int maxlen)
+{
+  /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1
+   * internal nodes, never more than ALPHABET_SIZE entries actually in the
+   * heap (minimum weight subtrees during prefix construction).  First
+   * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are
+   * internal nodes. */
+  djw_heapen ents[ALPHABET_SIZE * 2];
+  usize_t heap[ALPHABET_SIZE + 1];
+
+  usize_t heap_last; /* Index of the last _valid_ heap entry. */
+  usize_t ents_size; /* Number of entries, including 0th fake entry */
+  int  overflow;  /* Number of code lengths that overflow */
+  uint32_t total_bits;
+  int i;
+
+  IF_DEBUG (uint32_t first_bits = 0);
+
+  /* Insert real symbol frequences. */
+  for (i = 0; i < asize; i += 1)
+    {
+      ents[i+1].freq = freq[i];
+      IF_DEBUG1 (DP(RINT "ents[%d] = freq[%d] = %d\n",
+			i+1, i, freq[i]));
+    }
+
+ again:
+
+  /* The loop is re-entered each time an overflow occurs.  Re-initialize... */
+  heap_last = 0;
+  ents_size = 1;
+  overflow  = 0;
+  total_bits = 0;
+
+  /* 0th entry terminates the while loop in heap_insert (it's the parent of
+   * the smallest element, always less-than) */
+  heap[0] = 0;
+  ents[0].depth = 0;
+  ents[0].freq  = 0;
+
+  /* Initial heap. */
+  for (i = 0; i < asize; i += 1, ents_size += 1)
+    {
+      ents[ents_size].depth  = 0;
+      ents[ents_size].parent = 0;
+
+      if (ents[ents_size].freq != 0)
+	{
+	  heap_insert (heap, ents, ++heap_last, ents_size);
+	}
+    }
+
+  IF_DEBUG (heap_check (heap, ents, heap_last));
+
+  /* Must be at least one symbol, or else we can't get here. */
+  XD3_ASSERT (heap_last != 0);
+
+  /* If there is only one symbol, fake a second to prevent zero-length
+   * codes. */
+  if (heap_last == 1)
+    {
+      /* Pick either the first or last symbol. */
+      int s = freq[0] ? asize-1 : 0;
+      ents[s+1].freq = 1;
+      goto again;
+    }
+
+  /* Build prefix tree. */
+  while (heap_last > 1)
+    {
+      djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
+      djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
+
+      ents[ents_size].freq   = h1->freq + h2->freq;
+      ents[ents_size].depth  = 1 + max (h1->depth, h2->depth);
+      ents[ents_size].parent = 0;
+
+      h1->parent = h2->parent = ents_size;
+
+      heap_insert (heap, ents, ++heap_last, ents_size++);
+    }
+
+  IF_DEBUG (heap_check (heap, ents, heap_last));
+
+  /* Now compute prefix code lengths, counting parents. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      int b = 0;
+
+      if (ents[i].freq != 0)
+	{
+	  int p = i;
+
+	  while ((p = ents[p].parent) != 0) { b += 1; }
+
+	  if (b > maxlen) { overflow = 1; }
+
+	  total_bits += b * freq[i-1];
+	}
+
+      /* clen is 0-origin, unlike ents. */
+      IF_DEBUG1 (DP(RINT "clen[%d] = %d\n", i-1, b));
+      clen[i-1] = b;
+    }
+
+  IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
+
+  if (! overflow)
+    {
+      IF_DEBUG1 (if (first_bits != total_bits)
+      {
+	DP(RINT "code length overflow changed %u bits\n",
+	   (usize_t)(total_bits - first_bits));
+      });
+      return total_bits;
+    }
+
+  /* OPT: There is a non-looping way to fix overflow shown in zlib, but this
+   * is easier (for now), as done in bzip2. */
+  for (i = 1; i < asize+1; i += 1)
+    {
+      ents[i].freq = ents[i].freq / 2 + 1;
+    }
+
+  goto again;
+}
+
+static void
+djw_build_codes (usize_t *codes, const uint8_t *clen, int asize, int abs_max)
+{
+  int i, l;
+  int min_clen = DJW_MAX_CODELEN;
+  int max_clen = 0;
+  usize_t code = 0;
+
+  /* Find the min and max code length */
+  for (i = 0; i < asize; i += 1)
+    {
+      if (clen[i] > 0 && clen[i] < min_clen)
+	{
+	  min_clen = clen[i];
+	}
+
+      max_clen = max (max_clen, (int) clen[i]);
+    }
+
+  XD3_ASSERT (max_clen <= abs_max);
+
+  /* Generate a code for each symbol with the appropriate length. */
+  for (l = min_clen; l <= max_clen; l += 1)
+    {
+      for (i = 0; i < asize; i += 1)
+	{
+	  if (clen[i] == l)
+	    {
+	      codes[i] = code++;
+	    } 
+	}
+
+      code <<= 1;
+    }
+
+  IF_DEBUG1 ({
+      for (i = 0; i < asize; i += 1)
+	{
+	  DP(RINT "code[%d] = %u\n", i, codes[i]);
+	}
+    });
+}
+
+/*********************************************************************/
+/*			      MOVE-TO-FRONT                          */
+/*********************************************************************/
+static void
+djw_compute_mtf_1_2 (djw_prefix  *prefix,
+		     uint8_t     *mtf,
+		     djw_weight  *freq_out,
+		     usize_t      nsym)
+{
+  int i, j, k;
+  usize_t sym;
+  usize_t size = prefix->scount;
+  usize_t mtf_i = 0;
+  int mtf_run = 0;
+
+  /* This +2 is for the RUN_0, RUN_1 codes */
+  memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2));
+
+  for (i = 0; i < size; )
+    {
+      /* OPT: Bzip optimizes this algorithm a little by effectively checking
+       * j==0 before the MTF update. */
+      sym = prefix->symbol[i++];
+
+      for (j = 0; mtf[j] != sym; j += 1) { }
+
+      XD3_ASSERT (j <= nsym);
+
+      for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
+
+      mtf[0] = sym;
+
+      if (j == 0)
+	{
+	  mtf_run += 1;
+	  continue;
+	}
+
+      if (mtf_run > 0)
+	{
+	  djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+	}
+
+      /* Non-zero symbols are offset by RUN_1 */
+      prefix->mtfsym[mtf_i++] = j+RUN_1;
+      freq_out[j+RUN_1] += 1;
+    }
+
+  if (mtf_run > 0)
+    {
+      djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
+    }
+
+  prefix->mcount = mtf_i;
+}
+
+/* Counts character frequencies of the input buffer, returns the size. */
+static usize_t
+djw_count_freqs (djw_weight *freq, xd3_output *input)
+{
+  xd3_output *in;
+  usize_t size = 0;
+
+  memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
+
+  for (in = input; in; in = in->next_page)
+    {
+      const uint8_t *p     = in->base;
+      const uint8_t *p_max = p + in->next;
+
+      size += in->next;
+
+      do
+	{
+	  ++freq[*p];
+	}
+      while (++p < p_max);
+    }
+
+  IF_DEBUG1 ({int i;
+  DP(RINT "freqs: ");
+  for (i = 0; i < ALPHABET_SIZE; i += 1)
+    {
+      DP(RINT "%u ", freq[i]);
+    }
+  DP(RINT "\n");});
+
+  return size;
+}
+
+static void
+djw_compute_multi_prefix (int         groups,
+			  uint8_t     clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
+			  djw_prefix *prefix)
+{
+  int gp, i;
+      
+  prefix->scount = ALPHABET_SIZE;
+  memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
+
+  for (gp = 1; gp < groups; gp += 1)
+    {
+      for (i = 0; i < ALPHABET_SIZE; i += 1)
+	{
+	  if (clen[gp][i] == 0)
+	    {
+	      continue;
+	    }
+
+	  prefix->symbol[prefix->scount++] = clen[gp][i];
+	}
+    }
+}
+
+static void
+djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
+{
+  /* This +1 is for the 0 code-length. */
+  uint8_t clmtf[DJW_MAX_CODELEN+1];
+
+  djw_init_clen_mtf_1_2 (clmtf);
+
+  djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN);
+}
+
+static int
+djw_encode_prefix (xd3_stream   *stream,
+		   xd3_output  **output,
+		   bit_state    *bstate,
+		   djw_prefix   *prefix)
+{
+  int ret, i;
+  usize_t num_to_encode;
+  djw_weight clfreq[DJW_TOTAL_CODES];
+  uint8_t    clclen[DJW_TOTAL_CODES];
+  usize_t    clcode[DJW_TOTAL_CODES];
+
+  /* Move-to-front encode prefix symbols, count frequencies */
+  djw_compute_prefix_1_2 (prefix, clfreq);
+
+  /* Compute codes */
+  djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
+  djw_build_codes  (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
+
+  /* Compute number of extra codes beyond basic ones for this template. */
+  num_to_encode = DJW_TOTAL_CODES;
+  while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0)
+    {
+      num_to_encode -= 1;
+    }
+  XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
+
+  /* Encode: # of extra codes */
+  if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
+			      num_to_encode - DJW_EXTRA_12OFFSET)))
+    {
+      return ret;
+    }
+
+  /* Encode: MTF code lengths */
+  for (i = 0; i < num_to_encode; i += 1)
+    {
+      if ((ret = xd3_encode_bits (stream, output, bstate,
+				  DJW_CLCLEN_BITS, clclen[i])))
+	{
+	  return ret;
+	}
+    }
+
+  /* Encode: CLEN code lengths */
+  for (i = 0; i < prefix->mcount; i += 1)
+    {
+      usize_t mtf_sym = prefix->mtfsym[i];
+      usize_t bits    = clclen[mtf_sym];
+      usize_t code    = clcode[mtf_sym];
+
+      if ((ret = xd3_encode_bits (stream, output, bstate, bits, code)))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+static void
+djw_compute_selector_1_2 (djw_prefix *prefix,
+			  usize_t     groups,
+			  djw_weight *gbest_freq)
+{
+  uint8_t grmtf[DJW_MAX_GROUPS];
+  usize_t i;
+
+  for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
+
+  djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
+}
+
+static int
+xd3_encode_howmany_groups (xd3_stream *stream,
+			   xd3_sec_cfg *cfg,
+			   usize_t input_size,
+			   usize_t *ret_groups,
+			   usize_t *ret_sector_size)
+{
+  usize_t cfg_groups = 0;
+  usize_t cfg_sector_size = 0;
+  usize_t sugg_groups = 0;
+  usize_t sugg_sector_size = 0;
+
+  if (cfg->ngroups != 0)
+    {
+      if (cfg->ngroups < 0 || cfg->ngroups > DJW_MAX_GROUPS)
+	{
+	  stream->msg = "invalid secondary encoder group number";
+	  return XD3_INTERNAL;
+	}
+
+      cfg_groups = cfg->ngroups;
+    }
+
+  if (cfg->sector_size != 0)
+    {
+      if (cfg->sector_size < DJW_SECTORSZ_MULT ||
+	  cfg->sector_size > DJW_SECTORSZ_MAX ||
+	  (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
+	{
+	  stream->msg = "invalid secondary encoder sector size";
+	  return XD3_INTERNAL;
+	}
+
+      cfg_sector_size = cfg->sector_size;
+    }
+
+  if (cfg_groups == 0 || cfg_sector_size == 0)
+    {
+      /* These values were found empirically using xdelta3-tune around version
+       * xdfs-0.256. */
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  if      (input_size < 1000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 4000)   { sugg_groups = 2; sugg_sector_size = 10; }
+	  else if (input_size < 7000)   { sugg_groups = 3; sugg_sector_size = 10; }
+	  else if (input_size < 10000)  { sugg_groups = 4; sugg_sector_size = 10; }
+	  else if (input_size < 25000)  { sugg_groups = 5; sugg_sector_size = 10; }
+	  else if (input_size < 50000)  { sugg_groups = 7; sugg_sector_size = 20; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 70; }
+	  break;
+	case INST_SECTION:
+	  if      (input_size < 7000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 10000)  { sugg_groups = 2; sugg_sector_size = 50; }
+	  else if (input_size < 25000)  { sugg_groups = 3; sugg_sector_size = 50; }
+	  else if (input_size < 50000)  { sugg_groups = 6; sugg_sector_size = 40; }
+	  else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
+	  else                          { sugg_groups = 8; sugg_sector_size = 40; }
+	  break;
+	case ADDR_SECTION:
+	  if      (input_size < 9000)   { sugg_groups = 1; sugg_sector_size = 0; }
+	  else if (input_size < 25000)  { sugg_groups = 2; sugg_sector_size = 130; }
+	  else if (input_size < 50000)  { sugg_groups = 3; sugg_sector_size = 130; }
+	  else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
+	  else                          { sugg_groups = 7; sugg_sector_size = 130; }
+	  break;
+	}
+
+      if (cfg_groups == 0)
+	{
+	  cfg_groups = sugg_groups;
+	}
+
+      if (cfg_sector_size == 0)
+	{
+	  cfg_sector_size = sugg_sector_size;
+	}
+    }
+
+  if (cfg_groups != 1 && cfg_sector_size == 0)
+    {
+      switch (cfg->data_type)
+	{
+	case DATA_SECTION:
+	  cfg_sector_size = 20;
+	  break;
+	case INST_SECTION:
+	  cfg_sector_size = 50;
+	  break;
+	case ADDR_SECTION:
+	  cfg_sector_size = 130;
+	  break;
+	}
+    }
+
+  (*ret_groups)     = cfg_groups;
+  (*ret_sector_size) = cfg_sector_size;
+
+  XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
+  XD3_ASSERT (cfg_groups == 1 ||
+	      (cfg_sector_size >= DJW_SECTORSZ_MULT &&
+	       cfg_sector_size <= DJW_SECTORSZ_MAX));
+
+  return 0;
+}
+
+static int
+xd3_encode_huff (xd3_stream   *stream,
+		 djw_stream   *h,
+		 xd3_output   *input,
+		 xd3_output   *output,
+		 xd3_sec_cfg  *cfg)
+{
+  int         ret;
+  usize_t     groups, sector_size;
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *in;
+  int         output_bits;
+  usize_t     input_bits;
+  usize_t     input_bytes;
+  usize_t     initial_offset = output->next;
+  djw_weight  real_freq[ALPHABET_SIZE];
+  uint8_t    *gbest = NULL;
+  uint8_t    *gbest_mtf = NULL;
+
+  input_bytes = djw_count_freqs (real_freq, input);
+  input_bits  = input_bytes * 8;
+
+  XD3_ASSERT (input_bytes > 0);
+
+  if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes,
+					& groups, & sector_size)))
+    {
+      return ret;
+    }
+
+  if (0)
+    {
+    regroup:
+      /* Sometimes we dynamically decide there are too many groups.  Arrive
+       * here. */
+      output->next = initial_offset;
+      xd3_bit_state_encode_init (& bstate);
+    }
+
+  /* Encode: # of groups (3 bits) */
+  if ((ret = xd3_encode_bits (stream, & output, & bstate,
+			      DJW_GROUP_BITS, groups-1))) { goto failure; }
+
+  if (groups == 1)
+    {
+      /* Single Huffman group. */
+      usize_t    code[ALPHABET_SIZE]; /* Codes */
+      uint8_t    clen[ALPHABET_SIZE];
+      uint8_t    prefix_mtfsym[ALPHABET_SIZE];
+      djw_prefix prefix;
+
+      output_bits =
+	djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
+      djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
+
+      if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: prefix */
+      prefix.mtfsym = prefix_mtfsym;
+      prefix.symbol = clen;
+      prefix.scount = ALPHABET_SIZE;
+
+      if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
+	{
+	  goto failure;
+	}
+
+      if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=
+	  input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: data */
+      for (in = input; in; in = in->next_page)
+	{
+	  const uint8_t *p     = in->base;
+	  const uint8_t *p_max = p + in->next;
+
+	  do
+	    {
+	      usize_t sym  = *p++;
+	      usize_t bits = clen[sym];
+
+	      IF_DEBUG (output_bits -= bits);
+
+	      if ((ret = xd3_encode_bits (stream, & output,
+					  & bstate, bits, code[sym])))
+		{
+		  goto failure;
+		}
+	    }
+	  while (p < p_max);
+	}
+
+      XD3_ASSERT (output_bits == 0);
+    }
+  else
+    {
+      /* DJW Huffman */
+      djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
+      uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
+      djw_weight left = input_bytes;
+      int gp;
+      int niter = 0;
+      usize_t select_bits;
+      usize_t sym1 = 0, sym2 = 0, s;
+      usize_t   gcost[DJW_MAX_GROUPS];
+      usize_t  gbest_code[DJW_MAX_GROUPS+2];
+      uint8_t  gbest_clen[DJW_MAX_GROUPS+2];
+      usize_t   gbest_max = 1 + (input_bytes - 1) / sector_size;
+      int      best_bits = 0;
+      usize_t   gbest_no;
+      usize_t   gpcnt;
+      const uint8_t *p;
+      IF_DEBUG1 (usize_t gcount[DJW_MAX_GROUPS]);
+
+      /* Encode: sector size (5 bits) */
+      if ((ret = xd3_encode_bits (stream, & output, & bstate,
+				  DJW_SECTORSZ_BITS,
+				  (sector_size/DJW_SECTORSZ_MULT)-1)))
+	{
+	  goto failure;
+	}
+
+      /* Dynamic allocation. */
+      if (gbest == NULL)
+	{
+	  if ((gbest = xd3_alloc (stream, gbest_max, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto failure;
+	    }
+	}
+
+      if (gbest_mtf == NULL)
+	{
+	  if ((gbest_mtf = xd3_alloc (stream, gbest_max, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto failure;
+	    }
+	}
+
+      /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
+
+      /* Generate initial code length tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_weight sum  = 0;
+	  djw_weight goal = left / (groups - gp);
+
+	  IF_DEBUG1 (usize_t nz = 0);
+
+	  /* Due to the single-code granularity of this distribution, it may
+	   * be that we can't generate a distribution for each group.  In that
+	   * case subtract one group and try again.  If (inefficient), we're
+	   * testing group behavior, so don't mess things up. */
+	  if (goal == 0 && !cfg->inefficient)
+	    {
+	      IF_DEBUG1 (DP(RINT "too many groups (%u), dropping one\n",
+			    groups));
+	      groups -= 1;
+	      goto regroup;
+	    }
+
+	  /* Sum == goal is possible when (cfg->inefficient)... */
+	  while (sum < goal)
+	    {
+	      XD3_ASSERT (sym2 < ALPHABET_SIZE);
+	      IF_DEBUG1 (nz += real_freq[sym2] != 0);
+	      sum += real_freq[sym2++];
+	    }
+
+	  IF_DEBUG1(DP(RINT "group %u has symbols %u..%u (%u non-zero) "
+		       "(%u/%u = %.3f)\n",
+		       gp, sym1, sym2, nz, sum,
+		       input_bytes, sum / (double)input_bytes););
+
+	  for (s = 0; s < ALPHABET_SIZE; s += 1)
+	    {
+	      evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
+	    }
+
+	  left -= sum;
+	  sym1  = sym2+1;
+	}
+
+    repeat:
+
+      niter += 1;
+      gbest_no = 0;
+      memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
+      IF_DEBUG1 (memset (gcount, 0, sizeof (gcount[0]) * groups));
+
+      /* For each input page (loop is irregular to allow non-pow2-size group
+       * size. */
+      in = input;
+      p  = in->base;
+
+      /* For each group-size sector. */
+      do
+	{
+	  const uint8_t *p0  = p;
+	  xd3_output    *in0 = in;
+	  usize_t best   = 0;
+	  usize_t winner = 0;
+
+	  /* Select best group for each sector, update evolve_freq. */
+	  memset (gcost, 0, sizeof (gcost[0]) * groups);
+
+	  /* For each byte in sector. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      /* For each group. */
+	      for (gp = 0; gp < groups; gp += 1)
+		{
+		  gcost[gp] += evolve_clen[gp][*p];
+		}
+
+	      /* Check end-of-input-page. */
+#             define GP_PAGE()                \
+	      if (++p - in->base == in->next) \
+		{                             \
+		  in = in->next_page;         \
+		  if (in == NULL) { break; }  \
+		  p  = in->base;              \
+		}
+
+	      GP_PAGE ();
+	    }
+
+	  /* Find min cost group for this sector */
+	  best = -1U;
+	  for (gp = 0; gp < groups; gp += 1)
+	    {
+	      if (gcost[gp] < best) { best = gcost[gp]; winner = gp; }
+	    }
+
+	  XD3_ASSERT(gbest_no < gbest_max);
+	  gbest[gbest_no++] = winner;
+	  IF_DEBUG1 (gcount[winner] += 1);
+
+	  p  = p0;
+	  in = in0;
+
+	  /* Update group frequencies. */
+	  for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	    {
+	      evolve_freq[winner][*p] += 1;
+
+	      GP_PAGE ();
+	    }
+	}
+      while (in != NULL);
+
+      XD3_ASSERT (gbest_no == gbest_max);
+
+      /* Recompute code lengths. */
+      output_bits = 0;
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  int i;
+	  uint8_t evolve_zero[ALPHABET_SIZE];
+	  int any_zeros = 0;
+
+	  memset (evolve_zero, 0, sizeof (evolve_zero));
+
+	  /* Cannot allow a zero clen when the real frequency is non-zero.
+	   * Note: this means we are going to encode a fairly long code for
+	   * these unused entries.  An improvement would be to implement a
+	   * NOTUSED code for when these are actually zero, but this requires
+	   * another data structure (evolve_zero) since we don't know when
+	   * evolve_freq[i] == 0...  Briefly tested, looked worse. */
+	  for (i = 0; i < ALPHABET_SIZE; i += 1)
+	    {
+	      if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
+		{
+		  evolve_freq[gp][i] = 1;
+		  evolve_zero[i] = 1;
+		  any_zeros = 1;
+		}
+	    }
+
+	  output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],
+					   ALPHABET_SIZE, DJW_MAX_CODELEN);
+
+	  /* The above faking of frequencies does not matter for the last
+	   * iteration, but we don't know when that is yet.  However, it also
+	   * breaks the output_bits computation.  Necessary for accuracy, and
+	   * for the (output_bits==0) assert after all bits are output. */
+	  if (any_zeros)
+	    {
+	      IF_DEBUG1 (usize_t save_total = output_bits);
+
+	      for (i = 0; i < ALPHABET_SIZE; i += 1)
+		{
+		  if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }
+		}
+
+	      IF_DEBUG1 (DP(RINT "evolve_zero reduced %u bits in group %u\n",
+			    save_total - output_bits, gp));
+	    }
+	}
+
+      IF_DEBUG1(
+	DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits);
+	for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
+	DP(RINT "\n");
+	);
+
+      /* End iteration. */
+
+      IF_DEBUG1 (if (niter > 1 && best_bits < output_bits) {
+	DP(RINT "iteration lost %u bits\n", output_bits - best_bits); });
+
+      if (niter == 1 || (niter < DJW_MAX_ITER &&
+			 (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))
+	{
+	  best_bits = output_bits;
+	  goto repeat;
+	}
+
+      /* Efficiency check. */
+      if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      IF_DEBUG1 (DP(RINT "djw compression: %u -> %0.3f\n",
+		    input_bytes, output_bits / 8.0));
+
+      /* Encode: prefix */
+      {
+	uint8_t     prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	uint8_t     prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
+	djw_prefix prefix;
+
+	prefix.symbol = prefix_symbol;
+	prefix.mtfsym = prefix_mtfsym;
+	prefix.repcnt = prefix_repcnt;
+
+	djw_compute_multi_prefix (groups, evolve_clen, & prefix);
+	if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
+	  {
+	    goto failure;
+	  }
+      }
+
+      /* Encode: selector frequencies */
+      {
+	djw_weight gbest_freq[DJW_MAX_GROUPS+1];
+	djw_prefix gbest_prefix;
+	usize_t i;
+
+	gbest_prefix.scount = gbest_no;
+	gbest_prefix.symbol = gbest;
+	gbest_prefix.mtfsym = gbest_mtf;
+
+	djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
+
+	select_bits =
+	  djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
+	djw_build_codes  (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN);
+
+	for (i = 0; i < groups+1; i += 1)
+	  {
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					DJW_GBCLEN_BITS, gbest_clen[i])))
+	      {
+		goto failure;
+	      }
+	  }
+
+	for (i = 0; i < gbest_prefix.mcount; i += 1)
+	  {
+	    usize_t gp_mtf      = gbest_mtf[i];
+	    usize_t gp_sel_bits = gbest_clen[gp_mtf];
+	    usize_t gp_sel_code = gbest_code[gp_mtf];
+
+	    XD3_ASSERT (gp_mtf < groups+1);
+
+	    if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					gp_sel_bits, gp_sel_code)))
+	      {
+		goto failure;
+	      }
+
+	    IF_DEBUG (select_bits -= gp_sel_bits);
+	  }
+
+	XD3_ASSERT (select_bits == 0);
+      }
+
+      /* Efficiency check. */
+      if (output_bits + select_bits + (8 * output->next) +
+	  EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
+	{
+	  goto nosecond;
+	}
+
+      /* Encode: data */
+      {
+	usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
+	usize_t sector = 0;
+
+	/* Build code tables for each group. */
+	for (gp = 0; gp < groups; gp += 1)
+	  {
+	    djw_build_codes (evolve_code[gp], evolve_clen[gp],
+			     ALPHABET_SIZE, DJW_MAX_CODELEN);
+	  }
+
+	/* Now loop over the input. */
+	in = input;
+	p  = in->base;
+
+	do
+	  {
+	    /* For each sector. */
+	    usize_t   gp_best  = gbest[sector];
+	    usize_t *gp_codes = evolve_code[gp_best];
+	    uint8_t *gp_clens = evolve_clen[gp_best];
+
+	    XD3_ASSERT (sector < gbest_no);
+
+	    sector += 1;
+
+	    /* Encode the sector data. */
+	    for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
+	      {
+		usize_t sym  = *p;
+		usize_t bits = gp_clens[sym];
+		usize_t code = gp_codes[sym];
+
+		IF_DEBUG (output_bits -= bits);
+
+		if ((ret = xd3_encode_bits (stream, & output, & bstate,
+					    bits, code)))
+		  {
+		    goto failure;
+		  }
+
+		GP_PAGE ();
+	      }
+	  }
+	while (in != NULL);
+
+	XD3_ASSERT (select_bits == 0);
+	XD3_ASSERT (output_bits == 0);
+      }
+    }
+
+  ret = xd3_flush_bits (stream, & output, & bstate);
+
+  if (0)
+    {
+    nosecond:
+      stream->msg = "secondary compression was inefficient";
+      ret = XD3_NOSECOND;
+    }
+
+ failure:
+
+  xd3_free (stream, gbest);
+  xd3_free (stream, gbest_mtf);
+  return ret;
+}
+#endif /* XD3_ENCODER */
+
+/*********************************************************************/
+/*                              DECODE                               */
+/*********************************************************************/
+
+static void
+djw_build_decoder (xd3_stream    *stream,
+		   usize_t        asize,
+		   usize_t        abs_max,
+		   const uint8_t *clen,
+		   uint8_t       *inorder,
+		   usize_t       *base,
+		   usize_t       *limit,
+		   usize_t       *min_clenp,
+		   usize_t       *max_clenp)
+{
+  int i, l;
+  const uint8_t *ci;
+  usize_t nr_clen [DJW_TOTAL_CODES];
+  usize_t tmp_base[DJW_TOTAL_CODES];
+  int min_clen;
+  int max_clen;
+
+  /* Assumption: the two temporary arrays are large enough to hold abs_max. */
+  XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
+
+  /* This looks something like the start of zlib's inftrees.c */
+  memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
+
+  /* Count number of each code length */
+  i  = asize;
+  ci = clen;
+  do
+    {
+      /* Caller _must_ check that values are in-range.  Most of the time the
+       * caller decodes a specific number of bits, which imply the max value,
+       * and the other time the caller decodes a huffman value, which must be
+       * in-range.  Therefore, its an assertion and this function cannot
+       * otherwise fail. */
+      XD3_ASSERT (*ci <= abs_max);
+
+      nr_clen[*ci++]++;
+    }
+  while (--i != 0);
+
+  /* Compute min, max. */
+  for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
+  min_clen = i;
+  for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
+  max_clen = i;
+
+  /* Fill the BASE, LIMIT table. */
+  tmp_base[min_clen] = 0;
+  base[min_clen]     = 0;
+  limit[min_clen]    = nr_clen[min_clen] - 1;
+  for (i = min_clen + 1; i <= max_clen; i += 1)
+    {
+      usize_t last_limit = ((limit[i-1] + 1) << 1);
+      tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
+      limit[i]    = last_limit + nr_clen[i] - 1;
+      base[i]     = last_limit - tmp_base[i];
+    }
+
+  /* Fill the inorder array, canonically ordered codes. */
+  ci = clen;
+  for (i = 0; i < asize; i += 1)
+    {
+      if ((l = *ci++) != 0)
+	{
+	  inorder[tmp_base[l]++] = i;
+	}
+    }
+
+  *min_clenp = min_clen;
+  *max_clenp = max_clen;
+}
+
+static inline int
+djw_decode_symbol (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *inorder,
+		   const usize_t  *base,
+		   const usize_t  *limit,
+		   usize_t         min_clen,
+		   usize_t         max_clen,
+		   usize_t         *sym,
+		   usize_t          max_sym)
+{
+  usize_t code = 0;
+  usize_t bits = 0;
+
+  /* OPT: Supposedly a small lookup table improves speed here... */
+
+  /* Code outline is similar to xd3_decode_bits... */
+  if (bstate->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  if (bits == max_clen) { goto corrupt; }
+
+	  bits += 1;
+	  code  = (code << 1);
+
+	  if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
+
+	  bstate->cur_mask <<= 1;
+
+	  if (bits >= min_clen && code <= limit[bits]) { goto done; }
+	}
+      while (bstate->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_end)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INTERNAL;
+	}
+
+      bstate->cur_byte = *(*input)++;
+      bstate->cur_mask = 1;
+    }
+
+ done:
+
+  if (base[bits] <= code)
+    {
+      usize_t offset = code - base[bits];
+
+      if (offset <= max_sym)
+	{
+	  IF_DEBUG2 (DP(RINT "(j) %u ", code));
+	  *sym = inorder[offset];
+	  return 0;
+	}
+    }
+
+ corrupt:
+  stream->msg = "secondary decoder invalid code";
+  return XD3_INTERNAL;
+}
+
+static int
+djw_decode_clclen (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   uint8_t        *cl_inorder,
+		   usize_t        *cl_base,
+		   usize_t        *cl_limit,
+		   usize_t        *cl_minlen,
+		   usize_t        *cl_maxlen,
+		   uint8_t        *cl_mtf)
+{
+  int ret;
+  uint8_t cl_clen[DJW_TOTAL_CODES];
+  usize_t num_codes, value;
+  int i;
+
+  /* How many extra code lengths to encode. */
+  if ((ret = xd3_decode_bits (stream, bstate, input,
+			      input_end, DJW_EXTRA_CODE_BITS, & num_codes)))
+    {
+      return ret;
+    }
+
+  num_codes += DJW_EXTRA_12OFFSET;
+
+  /* Read num_codes. */
+  for (i = 0; i < num_codes; i += 1)
+    {
+      if ((ret = xd3_decode_bits (stream, bstate, input,
+				  input_end, DJW_CLCLEN_BITS, & value)))
+	{
+	  return ret;
+	}
+
+      cl_clen[i] = value;
+    }
+
+  /* Set the rest to zero. */
+  for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
+
+  /* No need to check for in-range clen values, because: */
+  XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
+
+  /* Build the code-length decoder. */
+  djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
+		     cl_clen, cl_inorder, cl_base,
+		     cl_limit, cl_minlen, cl_maxlen);
+
+  /* Initialize the MTF state. */
+  djw_init_clen_mtf_1_2 (cl_mtf);
+
+  return 0;
+}
+
+static inline int
+djw_decode_1_2 (xd3_stream     *stream,
+		bit_state      *bstate,
+		const uint8_t **input,
+		const uint8_t  *input_end,
+		const uint8_t  *inorder,
+		const usize_t  *base,
+		const usize_t  *limit,
+		const usize_t  *minlen,
+		const usize_t  *maxlen,
+		uint8_t        *mtfvals,
+		usize_t         elts,
+		usize_t         skip_offset,
+		uint8_t        *values)
+{
+  usize_t n = 0, rep = 0, mtf = 0, s = 0;
+  int ret;
+  
+  while (n < elts)
+    {
+      /* Special case inside generic code: CLEN only: If not the first group,
+       * we already know the zero frequencies. */
+      if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
+	{
+	  values[n++] = 0;
+	  continue;
+	}
+
+      /* Repeat last symbol. */
+      if (rep != 0)
+	{
+	  values[n++] = mtfvals[0];
+	  rep -= 1;
+	  continue;
+	}
+
+      /* Symbol following last repeat code. */
+      if (mtf != 0)
+	{
+	  usize_t sym = djw_update_mtf (mtfvals, mtf);
+	  values[n++] = sym;
+	  mtf = 0;
+	  continue;
+	}
+
+      /* Decode next symbol/repeat code. */
+      if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
+				    inorder, base, limit, *minlen, *maxlen,
+				    & mtf, DJW_TOTAL_CODES))) { return ret; }
+
+      if (mtf <= RUN_1)
+	{
+	  /* Repetition. */
+	  rep = ((mtf + 1) << s);
+	  mtf = 0;
+	  s += 1;
+	}
+      else
+	{
+	  /* Remove the RUN_1 MTF offset. */
+	  mtf -= 1;
+	  s = 0;
+	}
+    }
+
+  /* If (rep != 0) there were too many codes received. */
+  if (rep != 0)
+    {
+      stream->msg = "secondary decoder invalid repeat code";
+      return XD3_INTERNAL;
+    }
+  
+  return 0;
+}
+
+static inline int
+djw_decode_prefix (xd3_stream     *stream,
+		   bit_state      *bstate,
+		   const uint8_t **input,
+		   const uint8_t  *input_end,
+		   const uint8_t  *cl_inorder,
+		   const usize_t  *cl_base,
+		   const usize_t  *cl_limit,
+		   const usize_t  *cl_minlen,
+		   const usize_t  *cl_maxlen,
+		   uint8_t        *cl_mtf,
+		   usize_t         groups,
+		   uint8_t        *clen)
+{
+  return djw_decode_1_2 (stream, bstate, input, input_end,
+			 cl_inorder, cl_base, cl_limit,
+			 cl_minlen, cl_maxlen, cl_mtf,
+			 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
+}
+
+static int
+xd3_decode_huff (xd3_stream     *stream,
+		 djw_stream    *h,
+		 const uint8_t **input_pos,
+		 const uint8_t  *const input_end,
+		 uint8_t       **output_pos,
+		 const uint8_t  *const output_end)
+{
+  const uint8_t *input = *input_pos;
+  uint8_t  *output = *output_pos;
+  bit_state bstate = BIT_STATE_DECODE_INIT;
+  uint8_t  *sel_group = NULL;
+  usize_t    groups, gp;
+  usize_t    output_bytes = (output_end - output);
+  usize_t    sector_size;
+  usize_t    sectors;
+  int ret;
+
+  /* Invalid input. */
+  if (output_bytes == 0)
+    {
+      stream->msg = "secondary decoder invalid input";
+      return XD3_INTERNAL;
+    }
+
+  /* Decode: number of groups */
+  if ((ret = xd3_decode_bits (stream, & bstate, & input,
+			      input_end, DJW_GROUP_BITS, & groups)))
+    {
+      goto fail;
+    }
+
+  groups += 1;
+
+  if (groups > 1)
+    {
+      /* Decode: group size */
+      if ((ret = xd3_decode_bits (stream, & bstate, & input,
+				  input_end, DJW_SECTORSZ_BITS,
+				  & sector_size))) { goto fail; }
+      
+      sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
+    }
+  else
+    {
+      /* Default for groups == 1 */
+      sector_size = output_bytes;
+    }
+
+  sectors = 1 + (output_bytes - 1) / sector_size;
+
+  /* TODO: In the case of groups==1, lots of extra stack space gets used here.
+   * Could dynamically allocate this memory, which would help with excess
+   * parameter passing, too.  Passing too many parameters in this file,
+   * simplify it! */
+
+  /* Outer scope: per-group symbol decoder tables. */
+  {
+    uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
+    usize_t base   [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
+    usize_t limit  [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
+    usize_t minlen [DJW_MAX_GROUPS];
+    usize_t maxlen [DJW_MAX_GROUPS];
+
+    /* Nested scope: code length decoder tables. */
+    {
+      uint8_t clen      [DJW_MAX_GROUPS][ALPHABET_SIZE];
+      uint8_t cl_inorder[DJW_TOTAL_CODES];
+      usize_t cl_base   [DJW_MAX_CLCLEN+2];
+      usize_t cl_limit  [DJW_MAX_CLCLEN+2];
+      uint8_t cl_mtf    [DJW_TOTAL_CODES];
+      usize_t cl_minlen;
+      usize_t cl_maxlen;
+
+      /* Compute the code length decoder. */
+      if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit, & cl_minlen,
+				    & cl_maxlen, cl_mtf))) { goto fail; }
+
+      /* Now decode each group decoder. */
+      if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
+				    cl_inorder, cl_base, cl_limit,
+				    & cl_minlen, & cl_maxlen, cl_mtf,
+				    groups, clen[0]))) { goto fail; }
+
+      /* Prepare the actual decoding tables. */
+      for (gp = 0; gp < groups; gp += 1)
+	{
+	  djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
+			     clen[gp], inorder[gp], base[gp], limit[gp],
+			     & minlen[gp], & maxlen[gp]);
+	}
+    }
+
+    /* Decode: selector clens. */
+    {
+      uint8_t sel_inorder[DJW_MAX_GROUPS+2];
+      usize_t sel_base   [DJW_MAX_GBCLEN+2];
+      usize_t sel_limit  [DJW_MAX_GBCLEN+2];
+      uint8_t sel_mtf    [DJW_MAX_GROUPS+2];
+      usize_t sel_minlen;
+      usize_t sel_maxlen;
+
+      /* Setup group selection. */
+      if (groups > 1)
+	{
+	  uint8_t sel_clen[DJW_MAX_GROUPS+1];
+
+	  for (gp = 0; gp < groups+1; gp += 1)
+	    {
+	      usize_t value;
+
+	      if ((ret = xd3_decode_bits (stream, & bstate, & input,
+					  input_end, DJW_GBCLEN_BITS,
+					  & value))) { goto fail; }
+
+	      sel_clen[gp] = value;
+	      sel_mtf[gp]  = gp;
+	    }
+
+	  if ((sel_group = xd3_alloc (stream, sectors, 1)) == NULL)
+	    {
+	      ret = ENOMEM;
+	      goto fail;
+	    }
+
+	  djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
+			     sel_inorder, sel_base, sel_limit,
+			     & sel_minlen, & sel_maxlen);
+
+	  if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
+				     sel_inorder, sel_base,
+				     sel_limit, & sel_minlen,
+				     & sel_maxlen, sel_mtf,
+				     sectors, 0, sel_group))) { goto fail; }
+	}
+
+      /* Now decode each sector. */
+      {
+	/* Initialize for (groups==1) case. */
+	uint8_t *gp_inorder = inorder[0]; 
+	usize_t *gp_base    = base[0];
+	usize_t *gp_limit   = limit[0];
+	usize_t  gp_minlen  = minlen[0];
+	usize_t  gp_maxlen  = maxlen[0];
+	usize_t c;
+
+	for (c = 0; c < sectors; c += 1)
+	  {
+	    usize_t n;
+
+	    if (groups >= 2)
+	      {
+		gp = sel_group[c];
+
+		XD3_ASSERT (gp < groups);
+
+		gp_inorder = inorder[gp];
+		gp_base    = base[gp];
+		gp_limit   = limit[gp];
+		gp_minlen  = minlen[gp];
+		gp_maxlen  = maxlen[gp];
+	      }
+
+	    XD3_ASSERT (output_end - output > 0);
+	    
+	    /* Decode next sector. */
+	    n = min (sector_size, (usize_t) (output_end - output));
+
+	    do
+	      {
+		usize_t sym;
+
+		if ((ret = djw_decode_symbol (stream, & bstate,
+					      & input, input_end,
+					      gp_inorder, gp_base,
+					      gp_limit, gp_minlen, gp_maxlen,
+					      & sym, ALPHABET_SIZE)))
+		  {
+		    goto fail;
+		  }
+
+		*output++ = sym;
+	      }
+	    while (--n);
+	  }
+      }
+    }
+  }
+
+  IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate)))
+		   { goto fail; });
+  XD3_ASSERT (ret == 0);
+
+ fail:
+  xd3_free (stream, sel_group);
+
+  (*input_pos) = input;
+  (*output_pos) = output;
+  return ret;
+}
+
+#endif
diff --git a/xdelta3-fgk.h b/xdelta3-fgk.h
new file mode 100644
index 0000000..d1f9b24
--- /dev/null
+++ b/xdelta3-fgk.h
@@ -0,0 +1,852 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* For demonstration purposes only.
+ */
+
+#ifndef _XDELTA3_FGK_h_
+#define _XDELTA3_FGK_h_
+
+/* An implementation of the FGK algorithm described by D.E. Knuth in
+ * "Dynamic Huffman Coding" in Journal of Algorithms 6. */
+
+/* A 32bit counter (fgk_weight) is used as the frequency counter for
+ * nodes in the huffman tree.  TODO: Need oto test for overflow and/or
+ * reset stats. */
+
+typedef struct _fgk_stream fgk_stream;
+typedef struct _fgk_node   fgk_node;
+typedef struct _fgk_block  fgk_block;
+typedef unsigned int       fgk_bit;
+typedef uint32_t           fgk_weight;
+
+struct _fgk_block {
+  union {
+    fgk_node  *un_leader;
+    fgk_block *un_freeptr;
+  } un;
+};
+
+#define block_leader  un.un_leader
+#define block_freeptr un.un_freeptr
+
+/* The code can also support fixed huffman encoding/decoding. */
+#define IS_ADAPTIVE 1
+
+/* weight is a count of the number of times this element has been seen
+ * in the current encoding/decoding.  parent, right_child, and
+ * left_child are pointers defining the tree structure.  right and
+ * left point to neighbors in an ordered sequence of weights.  The
+ * left child of a node is always guaranteed to have weight not
+ * greater than its sibling.  fgk_blockLeader points to the element
+ * with the same weight as itself which is closest to the next
+ * increasing weight block.  */
+struct _fgk_node
+{
+  fgk_weight  weight;
+  fgk_node   *parent;
+  fgk_node   *left_child;
+  fgk_node   *right_child;
+  fgk_node   *left;
+  fgk_node   *right;
+  fgk_block  *my_block;
+};
+
+/* alphabet_size is the a count of the number of possible leaves in
+ * the huffman tree.  The number of total nodes counting internal
+ * nodes is ((2 * alphabet_size) - 1).  zero_freq_count is the number
+ * of elements remaining which have zero frequency.  zero_freq_exp and
+ * zero_freq_rem satisfy the equation zero_freq_count =
+ * 2^zero_freq_exp + zero_freq_rem.  root_node is the root of the
+ * tree, which is initialized to a node with zero frequency and
+ * contains the 0th such element.  free_node contains a pointer to the
+ * next available fgk_node space.  alphabet contains all the elements
+ * and is indexed by N.  remaining_zeros points to the head of the
+ * list of zeros.  */
+struct _fgk_stream
+{
+  int alphabet_size;
+  int zero_freq_count;
+  int zero_freq_exp;
+  int zero_freq_rem;
+  int coded_depth;
+
+  int total_nodes;
+  int total_blocks;
+
+  fgk_bit *coded_bits;
+
+  fgk_block *block_array;
+  fgk_block *free_block;
+
+  fgk_node *decode_ptr;
+  fgk_node *remaining_zeros;
+  fgk_node *alphabet;
+  fgk_node *root_node;
+  fgk_node *free_node;
+};
+
+/*********************************************************************/
+/*                             Encoder                               */
+/*********************************************************************/
+
+static fgk_stream*     fgk_alloc           (xd3_stream *stream /*, int alphabet_size */);
+static void            fgk_init            (fgk_stream *h);
+static int             fgk_encode_data     (fgk_stream *h,
+					    int         n);
+static inline fgk_bit  fgk_get_encoded_bit (fgk_stream *h);
+
+static int             xd3_encode_fgk      (xd3_stream  *stream,
+					    fgk_stream  *sec_stream,
+					    xd3_output  *input,
+					    xd3_output  *output,
+					    xd3_sec_cfg *cfg);
+
+/*********************************************************************/
+/* 			       Decoder                               */
+/*********************************************************************/
+
+static inline int      fgk_decode_bit      (fgk_stream *h,
+					    fgk_bit     b);
+static int             fgk_decode_data     (fgk_stream *h);
+static void            fgk_destroy         (xd3_stream *stream,
+					    fgk_stream *h);
+
+static int             xd3_decode_fgk      (xd3_stream     *stream,
+					    fgk_stream     *sec_stream,
+					    const uint8_t **input,
+					    const uint8_t  *const input_end,
+					    uint8_t       **output,
+					    const uint8_t  *const output_end);
+
+/*********************************************************************/
+/* 			       Private                               */
+/*********************************************************************/
+
+static unsigned int fgk_find_nth_zero        (fgk_stream *h, int n);
+static int          fgk_nth_zero             (fgk_stream *h, int n);
+static void         fgk_update_tree          (fgk_stream *h, int n);
+static fgk_node*    fgk_increase_zero_weight (fgk_stream *h, int n);
+static void         fgk_eliminate_zero       (fgk_stream* h, fgk_node *node);
+static void         fgk_move_right           (fgk_stream *h, fgk_node *node);
+static void         fgk_promote              (fgk_stream *h, fgk_node *node);
+static void         fgk_init_node            (fgk_node *node, int i, int size);
+static fgk_block*   fgk_make_block           (fgk_stream *h, fgk_node *l);
+static void         fgk_free_block           (fgk_stream *h, fgk_block *b);
+static void         fgk_factor_remaining     (fgk_stream *h);
+static inline void  fgk_swap_ptrs            (fgk_node **one, fgk_node **two);
+
+/*********************************************************************/
+/* 			    Basic Routines                           */
+/*********************************************************************/
+
+/* returns an initialized huffman encoder for an alphabet with the
+ * given size.  returns NULL if enough memory cannot be allocated */
+static fgk_stream* fgk_alloc (xd3_stream *stream /*, int alphabet_size0 */)
+{
+  int alphabet_size0 = ALPHABET_SIZE;
+  fgk_stream *h;
+
+  if ((h = (fgk_stream*) xd3_alloc (stream, 1, sizeof (fgk_stream))) == NULL)
+    {
+      return NULL;
+    }
+
+  h->total_nodes  = (2 * alphabet_size0) - 1;
+  h->total_blocks = (2 * h->total_nodes);
+  h->alphabet     = (fgk_node*)  xd3_alloc (stream, h->total_nodes,    sizeof (fgk_node));
+  h->block_array  = (fgk_block*) xd3_alloc (stream, h->total_blocks,   sizeof (fgk_block));
+  h->coded_bits   = (fgk_bit*)   xd3_alloc (stream, alphabet_size0, sizeof (fgk_bit));
+
+  if (h->coded_bits  == NULL ||
+      h->alphabet    == NULL ||
+      h->block_array == NULL)
+    {
+      fgk_destroy (stream, h);
+      return NULL;
+    }
+
+  h->alphabet_size   = alphabet_size0;
+
+  return h;
+}
+
+static void fgk_init (fgk_stream *h)
+{
+  int i;
+
+  h->root_node       = h->alphabet;
+  h->decode_ptr      = h->root_node;
+  h->free_node       = h->alphabet + h->alphabet_size;
+  h->remaining_zeros = h->alphabet;
+  h->coded_depth     = 0;
+  h->zero_freq_count = h->alphabet_size + 2;
+
+  /* after two calls to factor_remaining, zero_freq_count == alphabet_size */
+  fgk_factor_remaining(h); /* set ZFE and ZFR */
+  fgk_factor_remaining(h); /* set ZFDB according to prev state */
+
+  IF_DEBUG (memset (h->alphabet, 0, sizeof (h->alphabet[0]) * h->total_nodes));
+
+  for (i = 0; i < h->total_blocks-1; i += 1)
+    {
+      h->block_array[i].block_freeptr = &h->block_array[i + 1];
+    }
+
+  h->block_array[h->total_blocks - 1].block_freeptr = NULL;
+  h->free_block = h->block_array;
+
+  /* Zero frequency nodes are inserted in the first alphabet_size
+   * positions, with Value, weight, and a pointer to the next zero
+   * frequency node.  */
+  for (i = h->alphabet_size - 1; i >= 0; i -= 1)
+    {
+      fgk_init_node (h->alphabet + i, i, h->alphabet_size);
+    }
+}
+
+static void fgk_swap_ptrs(fgk_node **one, fgk_node **two)
+{
+  fgk_node *tmp = *one;
+  *one = *two;
+  *two = tmp;
+}
+
+/* Takes huffman transmitter h and n, the nth elt in the alphabet, and
+ * returns the number of required to encode n. */
+static int fgk_encode_data (fgk_stream* h, int n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+
+  XD3_ASSERT (n < h->alphabet_size);
+
+  h->coded_depth = 0;
+
+  /* First encode the binary representation of the nth remaining
+   * zero frequency element in reverse such that bit, which will be
+   * encoded from h->coded_depth down to 0 will arrive in increasing
+   * order following the tree path.  If there is only one left, it
+   * is not neccesary to encode these bits. */
+  if (IS_ADAPTIVE && target_ptr->weight == 0)
+    {
+      unsigned int where, shift;
+      int bits;
+
+      where = fgk_find_nth_zero(h, n);
+      shift = 1;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bits = h->zero_freq_exp;
+	}
+      else
+	{
+	  bits = h->zero_freq_exp + 1;
+	}
+
+      while (bits > 0)
+	{
+	  h->coded_bits[h->coded_depth++] = (shift & where) && 1;
+
+	  bits   -= 1;
+	  shift <<= 1;
+	};
+
+      target_ptr = h->remaining_zeros;
+    }
+
+  /* The path from root to node is filled into coded_bits in reverse so
+   * that it is encoded in the right order */
+  while (target_ptr != h->root_node)
+    {
+      h->coded_bits[h->coded_depth++] = (target_ptr->parent->right_child == target_ptr);
+
+      target_ptr = target_ptr->parent;
+    }
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, n);
+    }
+
+  return h->coded_depth;
+}
+
+/* Should be called as many times as fgk_encode_data returns.
+ */
+static inline fgk_bit fgk_get_encoded_bit (fgk_stream *h)
+{
+  XD3_ASSERT (h->coded_depth > 0);
+
+  return h->coded_bits[--h->coded_depth];
+}
+
+/* This procedure updates the tree after alphabet[n] has been encoded
+ * or decoded.
+ */
+static void fgk_update_tree (fgk_stream *h, int n)
+{
+  fgk_node *incr_node;
+
+  if (h->alphabet[n].weight == 0)
+    {
+      incr_node = fgk_increase_zero_weight (h, n);
+    }
+  else
+    {
+      incr_node = h->alphabet + n;
+    }
+
+  while (incr_node != h->root_node)
+    {
+      fgk_move_right (h, incr_node);
+      fgk_promote    (h, incr_node);
+      incr_node->weight += 1;   /* incr the parent */
+      incr_node = incr_node->parent; /* repeat */
+    }
+
+  h->root_node->weight += 1;
+}
+
+static void fgk_move_right (fgk_stream *h, fgk_node *move_fwd)
+{
+  fgk_node **fwd_par_ptr, **back_par_ptr;
+  fgk_node *move_back, *tmp;
+
+  move_back = move_fwd->my_block->block_leader;
+
+  if (move_fwd         == move_back ||
+      move_fwd->parent == move_back ||
+      move_fwd->weight == 0)
+    {
+      return;
+    }
+
+  move_back->right->left = move_fwd;
+
+  if (move_fwd->left)
+    {
+      move_fwd->left->right = move_back;
+    }
+
+  tmp = move_fwd->right;
+  move_fwd->right = move_back->right;
+
+  if (tmp == move_back)
+    {
+      move_back->right = move_fwd;
+    }
+  else
+    {
+      tmp->left = move_back;
+      move_back->right = tmp;
+    }
+
+  tmp = move_back->left;
+  move_back->left = move_fwd->left;
+
+  if (tmp == move_fwd)
+    {
+      move_fwd->left = move_back;
+    }
+  else
+    {
+      tmp->right = move_fwd;
+      move_fwd->left = tmp;
+    }
+
+  if (move_fwd->parent->right_child == move_fwd)
+    {
+      fwd_par_ptr = &move_fwd->parent->right_child;
+    }
+  else
+    {
+      fwd_par_ptr = &move_fwd->parent->left_child;
+    }
+
+  if (move_back->parent->right_child == move_back)
+    {
+      back_par_ptr = &move_back->parent->right_child;
+    }
+  else
+    {
+      back_par_ptr = &move_back->parent->left_child;
+    }
+
+  fgk_swap_ptrs (&move_fwd->parent, &move_back->parent);
+  fgk_swap_ptrs (fwd_par_ptr, back_par_ptr);
+
+  move_fwd->my_block->block_leader = move_fwd;
+}
+
+/* Shifts node, the leader of its block, into the next block. */
+static void fgk_promote (fgk_stream *h, fgk_node *node)
+{
+  fgk_node *my_left, *my_right;
+  fgk_block *cur_block;
+
+  my_right  = node->right;
+  my_left   = node->left;
+  cur_block = node->my_block;
+
+  if (node->weight == 0)
+    {
+      return;
+    }
+
+  /* if left is right child, parent of remaining zeros case (?), means parent
+   * has same weight as right child. */
+  if (my_left == node->right_child &&
+      node->left_child &&
+      node->left_child->weight == 0)
+    {
+      XD3_ASSERT (node->left_child == h->remaining_zeros);
+      XD3_ASSERT (node->right_child->weight == (node->weight+1)); /* child weight was already incremented */
+      
+      if (node->weight == (my_right->weight - 1) && my_right != h->root_node)
+	{
+	  fgk_free_block (h, cur_block);
+	  node->my_block    = my_right->my_block;
+	  my_left->my_block = my_right->my_block;
+	}
+
+      return;
+    }
+
+  if (my_left == h->remaining_zeros)
+    {
+      return;
+    }
+
+  /* true if not the leftmost node */
+  if (my_left->my_block == cur_block)
+    {
+      my_left->my_block->block_leader = my_left;
+    }
+  else
+    {
+      fgk_free_block (h, cur_block);
+    }
+
+  /* node->parent != my_right */
+  if ((node->weight == (my_right->weight - 1)) && (my_right != h->root_node))
+    {
+      node->my_block = my_right->my_block;
+    }
+  else
+    {
+      node->my_block = fgk_make_block (h, node);
+    }
+}
+
+/* When an element is seen the first time this is called to remove it from the list of
+ * zero weight elements and introduce a new internal node to the tree.  */
+static fgk_node* fgk_increase_zero_weight (fgk_stream *h, int n)
+{
+  fgk_node *this_zero, *new_internal, *zero_ptr;
+
+  this_zero = h->alphabet + n;
+
+  if (h->zero_freq_count == 1)
+    {
+      /* this is the last one */
+      this_zero->right_child = NULL;
+
+      if (this_zero->right->weight == 1)
+	{
+	  this_zero->my_block = this_zero->right->my_block;
+	}
+      else
+	{
+	  this_zero->my_block = fgk_make_block (h, this_zero);
+	}
+
+      h->remaining_zeros = NULL;
+
+      return this_zero;
+    }
+
+  zero_ptr = h->remaining_zeros;
+
+  new_internal = h->free_node++;
+
+  new_internal->parent      = zero_ptr->parent;
+  new_internal->right       = zero_ptr->right;
+  new_internal->weight      = 0;
+  new_internal->right_child = this_zero;
+  new_internal->left        = this_zero;
+
+  if (h->remaining_zeros == h->root_node)
+    {
+      /* This is the first element to be coded */
+      h->root_node           = new_internal;
+      this_zero->my_block    = fgk_make_block (h, this_zero);
+      new_internal->my_block = fgk_make_block (h, new_internal);
+    }
+  else
+    {
+      new_internal->right->left = new_internal;
+
+      if (zero_ptr->parent->right_child == zero_ptr)
+	{
+	  zero_ptr->parent->right_child = new_internal;
+	}
+      else
+	{
+	  zero_ptr->parent->left_child = new_internal;
+	}
+
+      if (new_internal->right->weight == 1)
+	{
+	  new_internal->my_block = new_internal->right->my_block;
+	}
+      else
+	{
+	  new_internal->my_block = fgk_make_block (h, new_internal);
+	}
+
+      this_zero->my_block = new_internal->my_block;
+    }
+
+  fgk_eliminate_zero (h, this_zero);
+
+  new_internal->left_child = h->remaining_zeros;
+
+  this_zero->right       = new_internal;
+  this_zero->left        = h->remaining_zeros;
+  this_zero->parent      = new_internal;
+  this_zero->left_child  = NULL;
+  this_zero->right_child = NULL;
+
+  h->remaining_zeros->parent = new_internal;
+  h->remaining_zeros->right  = this_zero;
+
+  return this_zero;
+}
+
+/* When a zero frequency element is encoded, it is followed by the
+ * binary representation of the index into the remaining elements.
+ * Sets a cache to the element before it so that it can be removed
+ * without calling this procedure again.  */
+static unsigned int fgk_find_nth_zero (fgk_stream* h, int n)
+{
+  fgk_node *target_ptr = h->alphabet + n;
+  fgk_node *head_ptr = h->remaining_zeros;
+  unsigned int idx = 0;
+
+  while (target_ptr != head_ptr)
+    {
+      head_ptr = head_ptr->right_child;
+      idx += 1;
+    }
+
+  return idx;
+}
+
+/* Splices node out of the list of zeros. */
+static void fgk_eliminate_zero (fgk_stream* h, fgk_node *node)
+{
+  if (h->zero_freq_count == 1)
+    {
+      return;
+    }
+
+  fgk_factor_remaining(h);
+
+  if (node->left_child == NULL)
+    {
+      h->remaining_zeros = h->remaining_zeros->right_child;
+      h->remaining_zeros->left_child = NULL;
+    }
+  else if (node->right_child == NULL)
+    {
+      node->left_child->right_child = NULL;
+    }
+  else
+    {
+      node->right_child->left_child = node->left_child;
+      node->left_child->right_child = node->right_child;
+    }
+}
+
+static void fgk_init_node (fgk_node *node, int i, int size)
+{
+  if (i < size - 1)
+    {
+      node->right_child = node + 1;
+    }
+  else
+    {
+      node->right_child = NULL;
+    }
+
+  if (i >= 1)
+    {
+      node->left_child = node - 1;
+    }
+  else
+    {
+      node->left_child = NULL;
+    }
+
+  node->weight      = 0;
+  node->parent      = NULL;
+  node->right = NULL;
+  node->left  = NULL;
+  node->my_block    = NULL;
+}
+
+/* The data structure used is an array of blocks, which are unions of
+ * free pointers and huffnode pointers.  free blocks are a linked list
+ * of free blocks, the front of which is h->free_block.  The used
+ * blocks are pointers to the head of each block.  */
+static fgk_block* fgk_make_block (fgk_stream *h, fgk_node* lead)
+{
+  fgk_block *ret = h->free_block;
+
+  XD3_ASSERT (h->free_block != NULL);
+
+  h->free_block = h->free_block->block_freeptr;
+
+  ret->block_leader = lead;
+
+  return ret;
+}
+
+/* Restores the block to the front of the free list. */
+static void fgk_free_block (fgk_stream *h, fgk_block *b)
+{
+  b->block_freeptr = h->free_block;
+  h->free_block = b;
+}
+
+/* sets zero_freq_count, zero_freq_rem, and zero_freq_exp to satsity
+ * the equation given above.  */
+static void fgk_factor_remaining (fgk_stream *h)
+{
+  unsigned int i;
+
+  i = (--h->zero_freq_count);
+  h->zero_freq_exp = 0;
+
+  while (i > 1)
+    {
+      h->zero_freq_exp += 1;
+      i >>= 1;
+    }
+
+  i = 1 << h->zero_freq_exp;
+
+  h->zero_freq_rem = h->zero_freq_count - i;
+}
+
+/* receives a bit at a time and returns true when a complete code has
+ * been received.
+ */
+static int inline fgk_decode_bit (fgk_stream* h, fgk_bit b)
+{
+  XD3_ASSERT (b == 1 || b == 0);
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0)
+    {
+      int bitsreq;
+
+      if (h->zero_freq_rem == 0)
+	{
+	  bitsreq = h->zero_freq_exp;
+	}
+      else
+	{
+	  bitsreq = h->zero_freq_exp + 1;
+	}
+
+      h->coded_bits[h->coded_depth] = b;
+      h->coded_depth += 1;
+
+      return h->coded_depth >= bitsreq;
+    }
+  else
+    {
+      if (b)
+	{
+	  h->decode_ptr = h->decode_ptr->right_child;
+	}
+      else
+	{
+	  h->decode_ptr = h->decode_ptr->left_child;
+	}
+
+      if (h->decode_ptr->left_child == NULL)
+	{
+	  /* If the weight is non-zero, finished. */
+	  if (h->decode_ptr->weight != 0)
+	    {
+	      return 1;
+	    }
+
+	  /* zero_freq_count is dropping to 0, finished. */
+	  return h->zero_freq_count == 1;
+	}
+      else
+	{
+	  return 0;
+	}
+    }
+}
+
+static int fgk_nth_zero (fgk_stream* h, int n)
+{
+  fgk_node *ret = h->remaining_zeros;
+
+  /* ERROR: if during this loop (ret->right_child == NULL) then the
+   * encoder's zero count is too high.  Could return an error code
+   * now, but is probably unnecessary overhead, since the caller
+   * should check integrity anyway. */
+  for (; n != 0 && ret->right_child != NULL; n -= 1)
+    {
+      ret = ret->right_child;
+    }
+
+  return ret - h->alphabet;
+}
+
+/* once fgk_decode_bit returns 1, this retrieves an index into the
+ * alphabet otherwise this returns 0, indicating more bits are
+ * required.
+ */
+static int fgk_decode_data (fgk_stream* h)
+{
+  unsigned int elt = h->decode_ptr - h->alphabet;
+
+  if (IS_ADAPTIVE && h->decode_ptr->weight == 0) {
+    int i;
+    unsigned int n = 0;
+
+    for (i = 0; i < h->coded_depth - 1; i += 1)
+      {
+	n |= h->coded_bits[i];
+	n <<= 1;
+      }
+
+    n |= h->coded_bits[i];
+    elt = fgk_nth_zero(h, n);
+  }
+
+  h->coded_depth = 0;
+
+  if (IS_ADAPTIVE)
+    {
+      fgk_update_tree(h, elt);
+    }
+
+  h->decode_ptr = h->root_node;
+
+  return elt;
+}
+
+static void fgk_destroy (xd3_stream *stream,
+			 fgk_stream *h)
+{
+  if (h != NULL)
+    {
+      xd3_free (stream, h->alphabet);
+      xd3_free (stream, h->coded_bits);
+      xd3_free (stream, h->block_array);
+      xd3_free (stream, h);
+    }
+}
+
+/*********************************************************************/
+/* 			       Xdelta                                */
+/*********************************************************************/
+
+static int
+xd3_encode_fgk (xd3_stream *stream, fgk_stream *sec_stream, xd3_output *input, xd3_output *output, xd3_sec_cfg *cfg)
+{
+  bit_state   bstate = BIT_STATE_ENCODE_INIT;
+  xd3_output *cur_page;
+  int ret;
+
+  /* OPT: quit compression early if it looks bad */
+  for (cur_page = input; cur_page; cur_page = cur_page->next_page)
+    {
+      const uint8_t *inp     = cur_page->base;
+      const uint8_t *inp_max = inp + cur_page->next;
+
+      while (inp < inp_max)
+	{
+	  usize_t bits = fgk_encode_data (sec_stream, *inp++);
+
+	  while (bits--)
+	    {
+	      if ((ret = xd3_encode_bit (stream, & output, & bstate, fgk_get_encoded_bit (sec_stream)))) { return ret; }
+	    }
+	}
+    }
+
+  return xd3_flush_bits (stream, & output, & bstate);
+}
+
+static int
+xd3_decode_fgk (xd3_stream     *stream,
+		fgk_stream     *sec_stream,
+		const uint8_t **input_pos,
+		const uint8_t  *const input_max,
+		uint8_t       **output_pos,
+		const uint8_t  *const output_max)
+{
+  bit_state bstate;
+  uint8_t *output = *output_pos;
+  const uint8_t *input = *input_pos;
+
+  for (;;)
+    {
+      if (input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INTERNAL;
+	}
+
+      bstate.cur_byte = *input++;
+
+      for (bstate.cur_mask = 1; bstate.cur_mask != 0x100; bstate.cur_mask <<= 1)
+	{
+	  int done = fgk_decode_bit (sec_stream, (bstate.cur_byte & bstate.cur_mask) && 1);
+
+	  if (! done) { continue; }
+
+	  *output++ = fgk_decode_data (sec_stream);
+
+	  if (output == output_max)
+	    {
+	      /* During regression testing: */
+	      IF_REGRESSION ({
+		int ret;
+		bstate.cur_mask <<= 1;
+		if ((ret = xd3_test_clean_bits (stream, & bstate))) { return ret; }
+	      });
+
+	      (*output_pos) = output;
+	      (*input_pos) = input;
+	      return 0;
+	    }
+	}
+    }
+}
+
+#endif /* _XDELTA3_FGK_ */
diff --git a/xdelta3-hash.h b/xdelta3-hash.h
new file mode 100644
index 0000000..b098d24
--- /dev/null
+++ b/xdelta3-hash.h
@@ -0,0 +1,223 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_HASH_H_
+#define _XDELTA3_HASH_H_
+
+#if XD3_DEBUG
+#define SMALL_HASH_DEBUG1(s,inp)                                  \
+  usize_t debug_state;                                            \
+  usize_t debug_hval = xd3_checksum_hash (& (s)->small_hash,      \
+              xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look))
+#define SMALL_HASH_DEBUG2(s,inp)                                  \
+  XD3_ASSERT (debug_hval == xd3_checksum_hash (& (s)->small_hash, \
+              xd3_scksum (&debug_state, (inp), (s)->smatcher.small_look)))
+#else
+#define SMALL_HASH_DEBUG1(s,inp)
+#define SMALL_HASH_DEBUG2(s,inp)
+#endif /* XD3_DEBUG */
+
+/* This is a good hash multiplier for 32-bit LCGs: see "linear
+ * congruential generators of different sizes and good lattice
+ * structure" */
+static const uint32_t hash_multiplier = 1597334677U;
+
+/***********************************************************************
+ Permute stuff
+ ***********************************************************************/
+
+#if HASH_PERMUTE == 0
+#define PERMUTE(x) (x)
+#else
+#define PERMUTE(x) (__single_hash[(uint32_t)x])
+
+static const uint16_t __single_hash[256] =
+{
+  /* Random numbers generated using SLIB's pseudo-random number generator.
+   * This hashes the input alphabet. */
+  0xbcd1, 0xbb65, 0x42c2, 0xdffe, 0x9666, 0x431b, 0x8504, 0xeb46,
+  0x6379, 0xd460, 0xcf14, 0x53cf, 0xdb51, 0xdb08, 0x12c8, 0xf602,
+  0xe766, 0x2394, 0x250d, 0xdcbb, 0xa678, 0x02af, 0xa5c6, 0x7ea6,
+  0xb645, 0xcb4d, 0xc44b, 0xe5dc, 0x9fe6, 0x5b5c, 0x35f5, 0x701a,
+  0x220f, 0x6c38, 0x1a56, 0x4ca3, 0xffc6, 0xb152, 0x8d61, 0x7a58,
+  0x9025, 0x8b3d, 0xbf0f, 0x95a3, 0xe5f4, 0xc127, 0x3bed, 0x320b,
+  0xb7f3, 0x6054, 0x333c, 0xd383, 0x8154, 0x5242, 0x4e0d, 0x0a94,
+  0x7028, 0x8689, 0x3a22, 0x0980, 0x1847, 0xb0f1, 0x9b5c, 0x4176,
+  0xb858, 0xd542, 0x1f6c, 0x2497, 0x6a5a, 0x9fa9, 0x8c5a, 0x7743,
+  0xa8a9, 0x9a02, 0x4918, 0x438c, 0xc388, 0x9e2b, 0x4cad, 0x01b6,
+  0xab19, 0xf777, 0x365f, 0x1eb2, 0x091e, 0x7bf8, 0x7a8e, 0x5227,
+  0xeab1, 0x2074, 0x4523, 0xe781, 0x01a3, 0x163d, 0x3b2e, 0x287d,
+  0x5e7f, 0xa063, 0xb134, 0x8fae, 0x5e8e, 0xb7b7, 0x4548, 0x1f5a,
+  0xfa56, 0x7a24, 0x900f, 0x42dc, 0xcc69, 0x02a0, 0x0b22, 0xdb31,
+  0x71fe, 0x0c7d, 0x1732, 0x1159, 0xcb09, 0xe1d2, 0x1351, 0x52e9,
+  0xf536, 0x5a4f, 0xc316, 0x6bf9, 0x8994, 0xb774, 0x5f3e, 0xf6d6,
+  0x3a61, 0xf82c, 0xcc22, 0x9d06, 0x299c, 0x09e5, 0x1eec, 0x514f,
+  0x8d53, 0xa650, 0x5c6e, 0xc577, 0x7958, 0x71ac, 0x8916, 0x9b4f,
+  0x2c09, 0x5211, 0xf6d8, 0xcaaa, 0xf7ef, 0x287f, 0x7a94, 0xab49,
+  0xfa2c, 0x7222, 0xe457, 0xd71a, 0x00c3, 0x1a76, 0xe98c, 0xc037,
+  0x8208, 0x5c2d, 0xdfda, 0xe5f5, 0x0b45, 0x15ce, 0x8a7e, 0xfcad,
+  0xaa2d, 0x4b5c, 0xd42e, 0xb251, 0x907e, 0x9a47, 0xc9a6, 0xd93f,
+  0x085e, 0x35ce, 0xa153, 0x7e7b, 0x9f0b, 0x25aa, 0x5d9f, 0xc04d,
+  0x8a0e, 0x2875, 0x4a1c, 0x295f, 0x1393, 0xf760, 0x9178, 0x0f5b,
+  0xfa7d, 0x83b4, 0x2082, 0x721d, 0x6462, 0x0368, 0x67e2, 0x8624,
+  0x194d, 0x22f6, 0x78fb, 0x6791, 0xb238, 0xb332, 0x7276, 0xf272,
+  0x47ec, 0x4504, 0xa961, 0x9fc8, 0x3fdc, 0xb413, 0x007a, 0x0806,
+  0x7458, 0x95c6, 0xccaa, 0x18d6, 0xe2ae, 0x1b06, 0xf3f6, 0x5050,
+  0xc8e8, 0xf4ac, 0xc04c, 0xf41c, 0x992f, 0xae44, 0x5f1b, 0x1113,
+  0x1738, 0xd9a8, 0x19ea, 0x2d33, 0x9698, 0x2fe9, 0x323f, 0xcde2,
+  0x6d71, 0xe37d, 0xb697, 0x2c4f, 0x4373, 0x9102, 0x075d, 0x8e25,
+  0x1672, 0xec28, 0x6acb, 0x86cc, 0x186e, 0x9414, 0xd674, 0xd1a5
+};
+#endif
+
+/* Update the checksum state. */
+#if ADLER_LARGE_CKSUM
+inline uint32_t
+xd3_large_cksum_update (uint32_t cksum,
+			const uint8_t *base,
+			int look) {
+  uint32_t old_c = PERMUTE(base[0]);
+  uint32_t new_c = PERMUTE(base[look]);
+  uint32_t low   = ((cksum & 0xffff) - old_c + new_c) & 0xffff;
+  uint32_t high  = ((cksum >> 16) - (old_c * look) + low) & 0xffff;
+  return (high << 16) | low;
+}
+#else
+// TODO: revisit this topic
+#endif
+
+/* Note: small cksum is hard-coded for 4 bytes */
+#if UNALIGNED_OK
+static inline uint32_t
+xd3_scksum (uint32_t *state,
+            const uint8_t *base,
+            const int look)
+{
+  (*state) = *(uint32_t*)base;
+  return (*state) * hash_multiplier;
+}
+static inline uint32_t
+xd3_small_cksum_update (uint32_t *state,
+			const uint8_t *base,
+			int look)
+{
+  (*state) = *(uint32_t*)(base+1);
+  return (*state) * hash_multiplier;
+}
+#else
+static inline uint32_t
+xd3_scksum (uint32_t *state,
+            const uint8_t *base,
+            const int look)
+{
+  (*state) = (base[0] << 24 |
+              base[1] << 16 |
+              base[2] << 8 |
+              base[3]);
+  return (*state) * hash_multiplier;
+}
+static inline uint32_t
+xd3_small_cksum_update (uint32_t *state,
+			const uint8_t *base,
+			const int look)
+{
+  (*state) <<= 8;
+  (*state) |= base[4];
+  return (*state) * hash_multiplier;
+}
+#endif
+
+/***********************************************************************
+ Ctable stuff
+ ***********************************************************************/
+
+static inline usize_t
+xd3_checksum_hash (const xd3_hash_cfg *cfg, const usize_t cksum)
+{
+  return (cksum >> cfg->shift) ^ (cksum & cfg->mask);
+}
+
+/***********************************************************************
+ Cksum function
+ ***********************************************************************/
+
+#if ADLER_LARGE_CKSUM
+static inline uint32_t
+xd3_lcksum (const uint8_t *seg, const int ln)
+{
+  int i = 0;
+  uint32_t low  = 0;
+  uint32_t high = 0;
+
+  for (; i < ln; i += 1)
+    {
+      low  += PERMUTE(*seg++);
+      high += low;
+    }
+
+  return ((high & 0xffff) << 16) | (low & 0xffff);
+}
+#else
+static inline uint32_t
+xd3_lcksum (const uint8_t *seg, const int ln)
+{
+  int i, j;
+  uint32_t h = 0;
+  for (i = 0, j = ln - 1; i < ln; ++i, --j) {
+    h += PERMUTE(seg[i]) * hash_multiplier_powers[j];
+  }
+  return h;
+}
+#endif
+
+#if XD3_ENCODER
+static usize_t
+xd3_size_log2 (usize_t slots)
+{
+  int bits = 28; /* This should not be an unreasonable limit. */
+  int i;
+
+  for (i = 3; i <= bits; i += 1)
+    {
+      if (slots < (1U << i))
+	{
+	  /* TODO: this is compaction=1 in checksum_test.cc and maybe should
+	   * not be fixed at -1. */
+	  bits = i - 1; 
+	  break;
+	}
+    }
+
+  return bits;
+}
+
+static void
+xd3_size_hashtable (xd3_stream    *stream,
+		    usize_t        slots,
+		    xd3_hash_cfg  *cfg)
+{
+  int bits = xd3_size_log2 (slots);
+
+  /* TODO: there's a 32-bit assumption here */
+  cfg->size  = (1 << bits);
+  cfg->mask  = (cfg->size - 1);
+  cfg->shift = 32 - bits;
+}
+#endif
+
+#endif
diff --git a/xdelta3-list.h b/xdelta3-list.h
new file mode 100644
index 0000000..3c0df5e
--- /dev/null
+++ b/xdelta3-list.h
@@ -0,0 +1,130 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __XDELTA3_LIST__
+#define __XDELTA3_LIST__
+
+#define XD3_MAKELIST(LTYPE,ETYPE,LNAME)                                 \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _entry (LTYPE* l)                                              \
+{                                                                       \
+  return (ETYPE*) ((char*) l - (unsigned long) &((ETYPE*) 0)->LNAME);   \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _init (LTYPE *l)                                               \
+{                                                                       \
+  l->next = l;                                                          \
+  l->prev = l;                                                          \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _add (LTYPE *prev, LTYPE *next, LTYPE *ins)                    \
+{                                                                       \
+  next->prev = ins;                                                     \
+  prev->next = ins;                                                     \
+  ins->next  = next;                                                    \
+  ins->prev  = prev;                                                    \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _push_back (LTYPE *l, ETYPE *i)                                \
+{                                                                       \
+  LTYPE ## _add (l->prev, l, & i->LNAME);                               \
+}                                                                       \
+                                                                        \
+static inline void                                                      \
+LTYPE ## _del (LTYPE *next,                                             \
+	       LTYPE *prev)                                             \
+{                                                                       \
+  next->prev = prev;                                                    \
+  prev->next = next;                                                    \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _remove (ETYPE *f)                                             \
+{                                                                       \
+  LTYPE *i = f->LNAME.next;                                             \
+  LTYPE ## _del (f->LNAME.next, f->LNAME.prev);                         \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _pop_back (LTYPE *l)                                           \
+{                                                                       \
+  LTYPE *i = l->prev;                                                   \
+  LTYPE ## _del (i->next, i->prev);                                     \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _pop_front (LTYPE *l)                                          \
+{                                                                       \
+  LTYPE *i = l->next;                                                   \
+  LTYPE ## _del (i->next, i->prev);                                     \
+  return LTYPE ## _entry (i);                                           \
+}                                                                       \
+                                                                        \
+static inline int                                                       \
+LTYPE ## _empty (LTYPE *l)                                              \
+{                                                                       \
+  return l == l->next;                                                  \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _front (LTYPE *f)                                              \
+{                                                                       \
+  return LTYPE ## _entry (f->next);                                     \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _back (LTYPE *f)                                               \
+{                                                                       \
+  return LTYPE ## _entry (f->prev);                                     \
+}                                                                       \
+                                                                        \
+static inline int                                                       \
+LTYPE ## _end (LTYPE *f, ETYPE *i)                                      \
+{                                                                       \
+  return f == & i->LNAME;                                               \
+}                                                                       \
+                                                                        \
+static inline ETYPE*                                                    \
+LTYPE ## _next (ETYPE *f)                                               \
+{                                                                       \
+  return LTYPE ## _entry (f->LNAME.next);                               \
+}                                                                       \
+                                                                        \
+static inline usize_t                                                   \
+LTYPE ## _length (LTYPE *l)                                             \
+{                                                                       \
+  LTYPE *p;                                                             \
+  int c = 0;                                                            \
+                                                                        \
+  for (p = l->next; p != l; p = p->next)                                \
+    {                                                                   \
+      c += 1;                                                           \
+    }                                                                   \
+                                                                        \
+  return c;                                                             \
+}                                                                       \
+                                                                        \
+typedef int unused_ ## LTYPE
+
+#endif
diff --git a/xdelta3-main.h b/xdelta3-main.h
new file mode 100644
index 0000000..55200bd
--- /dev/null
+++ b/xdelta3-main.h
@@ -0,0 +1,4242 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+ * Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* This is all the extra stuff you need for convenience to users in a
+ * command line application.  It contains these major components:
+ *
+ * 1. VCDIFF tools 2. external compression support (this is
+ * POSIX-specific).  3. a general read/write loop that handles all of
+ * the Xdelta decode/encode/VCDIFF-print functions 4. command-line
+ * interpreter 5. an Xdelta application header which stores default
+ * filename, external compression settings 6. output/error printing
+ * 7. basic file support and OS interface
+ */
+
+/* TODO list: 1. do exact gzip-like filename, stdout handling.  make a
+ * .vcdiff extension, refuse to encode to stdout without -cf, etc.
+ * 2. Allow the user to add a comment string to the app header without
+ * disturbing the default behavior.  3. "Source file must be seekable"
+ * is not actually true for encoding, given current behavior.  Allow
+ * non-seekable sources?  It would in theory let you use a fifo for
+ * the source.
+ */
+
+/* On error handling and printing:
+ *
+ * The xdelta library sets stream->msg to indicate what condition
+ * caused an internal failure, but many failures originate here and
+ * are printed here.  The return convention is 0 for success, as
+ * throughout Xdelta code, but special attention is required here for
+ * the operating system calls with different error handling.  See the
+ * main_file_* routines.  All errors in this file have a message
+ * printed at the time of occurance.  Since some of these calls occur
+ * within calls to the library, the error may end up being printed
+ * again with a more general error message.
+ */
+
+/*********************************************************************/
+
+#ifndef XD3_POSIX
+#define XD3_POSIX 0
+#endif
+#ifndef XD3_STDIO
+#define XD3_STDIO 0
+#endif
+#ifndef XD3_WIN32
+#define XD3_WIN32 0
+#endif
+#ifndef NOT_MAIN
+#define NOT_MAIN 0
+#endif
+
+/* Combines xd3_strerror() and strerror() */
+const char* xd3_mainerror(int err_num);
+
+/* XPRINTX (used by main) prefixes an "xdelta3: " to the output. */
+#define XPR fprintf
+#define NT stderr, "xdelta3: "
+
+/* If none are set, default to posix. */
+#if (XD3_POSIX + XD3_STDIO + XD3_WIN32) == 0
+#undef XD3_POSIX
+#define XD3_POSIX 1
+#endif
+
+/* Handle externally-compressed inputs. */
+#ifndef EXTERNAL_COMPRESSION
+#define EXTERNAL_COMPRESSION 1
+#endif
+
+#define PRINTHDR_SPECIAL -4378291
+
+/* The number of soft-config variables.  */
+#define XD3_SOFTCFG_VARCNT 7
+
+/* this is used as in XPR(NT XD3_LIB_ERRMSG (stream, ret)) to print an
+ * error message from the library. */
+#define XD3_LIB_ERRMSG(stream, ret) "%s: %s\n", \
+    xd3_errstring (stream), xd3_mainerror (ret)
+
+#include <stdio.h>  /* fprintf */
+
+#if XD3_POSIX
+#include <unistd.h> /* close, read, write... */
+#include <sys/types.h>
+#include <fcntl.h>
+#endif
+
+#ifndef _WIN32
+#include <unistd.h> /* lots */
+#include <sys/time.h> /* gettimeofday() */
+#include <sys/stat.h> /* stat() and fstat() */
+#else
+#if defined(_MSC_VER)
+#define strtoll _strtoi64
+#endif
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef WIFEXITED
+#   define WIFEXITED(stat)  (((*((int *) &(stat))) & 0xff) == 0)
+#endif
+#ifndef WEXITSTATUS
+#   define WEXITSTATUS(stat) (((*((int *) &(stat))) >> 8) & 0xff)
+#endif
+#ifndef S_ISREG
+//#   ifdef S_IFREG
+//#       define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+//#   else
+#       define S_ISREG(m) 1
+//#   endif
+#endif /* !S_ISREG */
+
+// For standard input/output handles
+static STARTUPINFO winStartupInfo;
+#endif
+
+/**********************************************************************
+ ENUMS and TYPES
+ *********************************************************************/
+
+/* These flags (mainly pertaining to main_read() operations) are set
+ * in the main_file->flags variable.  All are related to with external
+ * decompression support.
+ *
+ * RD_FIRST causes the external decompression check when the input is
+ * first read.
+ *
+ * RD_NONEXTERNAL disables external decompression for reading a
+ * compressed input, in the case of Xdelta inputs.  Note: Xdelta is
+ * supported as an external compression type, which makes is the
+ * reason for this flag.  An example to justify this is: to create a
+ * delta between two files that are VCDIFF-compressed.  Two external
+ * Xdelta decoders are run to supply decompressed source and target
+ * inputs to the Xdelta encoder. */
+typedef enum
+{
+  RD_FIRST        = (1 << 0),
+  RD_NONEXTERNAL  = (1 << 1),
+  RD_EXTERNAL_V1  = (1 << 2),
+} xd3_read_flags;
+
+/* main_file->mode values */
+typedef enum
+{
+  XO_READ  = 0,
+  XO_WRITE = 1,
+} main_file_modes;
+
+/* Main commands.  For example, CMD_PRINTHDR is the "xdelta printhdr"
+ * command. */
+typedef enum
+{
+  CMD_NONE = 0,
+  CMD_PRINTHDR,
+  CMD_PRINTHDRS,
+  CMD_PRINTDELTA,
+  CMD_RECODE,
+  CMD_MERGE_ARG,
+  CMD_MERGE,
+#if XD3_ENCODER
+  CMD_ENCODE,
+#endif
+  CMD_DECODE,
+  CMD_TEST,
+  CMD_CONFIG,
+} xd3_cmd;
+
+#if XD3_ENCODER
+#define CMD_DEFAULT CMD_ENCODE
+#define IS_ENCODE(cmd) (cmd == CMD_ENCODE)
+#else
+#define CMD_DEFAULT CMD_DECODE
+#define IS_ENCODE(cmd) (0)
+#endif
+
+typedef struct _main_file        main_file;
+typedef struct _main_extcomp     main_extcomp;
+typedef struct _main_blklru      main_blklru;
+typedef struct _main_blklru_list main_blklru_list;
+typedef struct _main_merge       main_merge;
+typedef struct _main_merge_list  main_merge_list;
+
+/* The main_file object supports abstract system calls like open,
+ * close, read, write, seek, stat.  The program uses these to
+ * represent both seekable files and non-seekable files.  Source files
+ * must be seekable, but the target input and any output file do not
+ * require seekability.
+ */
+struct _main_file
+{
+#if XD3_STDIO
+  FILE               *file;
+#elif XD3_POSIX
+  int                 file;
+#elif XD3_WIN32
+  HANDLE              file;
+#endif
+
+  int                 mode;          /* XO_READ and XO_WRITE */
+  const char         *filename;      /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  char               *filename_copy; /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  const char         *realname;      /* File name or /dev/stdin,
+				      * /dev/stdout, /dev/stderr. */
+  const main_extcomp *compressor;    /* External compression struct. */
+  int                 flags;         /* RD_FIRST, RD_NONEXTERNAL, ... */
+  xoff_t              nread;         /* for input position */
+  xoff_t              nwrite;        /* for output position */
+  uint8_t            *snprintf_buf;  /* internal snprintf() use */
+};
+
+/* Various strings and magic values used to detect and call external
+ * compression.  See below for examples. */
+struct _main_extcomp
+{
+  const char    *recomp_cmdname;
+  const char    *recomp_options;
+
+  const char    *decomp_cmdname;
+  const char    *decomp_options;
+
+  const char    *ident;
+  const char    *magic;
+  usize_t        magic_size;
+  int            flags;
+};
+
+/* This file implements a small LRU of source blocks.  For encoding purposes,
+ * we prevent paging in blocks we've already scanned in the source (return
+ * XD3_NOTAVAIL). */
+struct _main_blklru_list
+{
+  main_blklru_list  *next;
+  main_blklru_list  *prev;
+};
+
+struct _main_blklru
+{
+  uint8_t         *blk;
+  xoff_t           blkno;
+  main_blklru_list  link;
+};
+
+#define LRU_SIZE 32U
+#define XD3_MINSRCWINSZ XD3_ALLOCSIZE
+
+/* ... represented as a list (no cache index). */
+XD3_MAKELIST(main_blklru_list,main_blklru,link);
+
+/* Merge state: */
+
+struct _main_merge_list
+{
+  main_merge_list  *next;
+  main_merge_list  *prev;
+};
+
+struct _main_merge
+{
+  const char *filename;
+
+  main_merge_list  link;
+};
+
+XD3_MAKELIST(main_merge_list,main_merge,link);
+
+// TODO: really need to put options in a struct so that internal
+// callers can easily reset state.
+
+/* Program options: various command line flags and options. */
+static int         option_stdout             = 0;
+static int         option_force              = 0;
+static int         option_verbose            = 0;
+static int         option_quiet              = 0;
+static int         option_use_appheader      = 1;
+static uint8_t*    option_appheader          = NULL;
+static int         option_use_secondary      = 0;
+static char*       option_secondary          = NULL;
+static int         option_use_checksum       = 1;
+static int         option_use_altcodetable   = 0;
+static char*       option_smatch_config      = NULL;
+static int         option_no_compress        = 0;
+static int         option_no_output          = 0; /* do not write output */
+static const char *option_source_filename    = NULL;
+
+static int         option_level              = XD3_DEFAULT_LEVEL;
+static usize_t     option_iopt_size          = XD3_DEFAULT_IOPT_SIZE;
+static usize_t     option_winsize            = XD3_DEFAULT_WINSIZE;
+static usize_t     option_srcwinsz           = XD3_DEFAULT_SRCWINSZ;
+static usize_t     option_sprevsz            = XD3_DEFAULT_SPREVSZ;
+
+/* These variables are supressed to avoid their use w/o support.  main() warns
+ * appropriately. */
+#if EXTERNAL_COMPRESSION
+static int         option_decompress_inputs  = 1;
+static int         option_recompress_outputs = 1;
+#endif
+
+/* This is for comparing "printdelta" output without attention to
+ * copy-instruction modes. */
+#if VCDIFF_TOOLS
+static int option_print_cpymode = 1; /* Note: see reset_defaults(). */ 
+#endif
+
+/* Static variables */
+IF_DEBUG(static int main_mallocs = 0;)
+
+static char*           program_name = NULL;
+static uint8_t*        appheader_used = NULL;
+static uint8_t*        main_bdata = NULL;
+static usize_t         main_bsize = 0;
+
+/* The LRU: obviously this is shared by all callers. */
+static usize_t           lru_size = 0;
+static main_blklru      *lru = NULL;  /* array of lru_size elts */
+static main_blklru_list  lru_list;
+static main_blklru_list  lru_free;
+static int               do_not_lru = 0;  /* set to avoid lru */
+
+static int lru_hits   = 0;
+static int lru_misses = 0;
+static int lru_filled = 0;
+
+/* Hacks for VCDIFF tools */
+static int allow_fake_source = 0;
+
+/* recode_stream is used by both recode/merge for reading vcdiff inputs */
+static xd3_stream *recode_stream = NULL;
+
+/* merge_stream is used by merge commands for storing the source encoding */
+static xd3_stream *merge_stream = NULL;
+
+/* This array of compressor types is compiled even if EXTERNAL_COMPRESSION is
+ * false just so the program knows the mapping of IDENT->NAME. */
+static main_extcomp extcomp_types[] =
+{
+  /* The entry for xdelta3 must be 0 because the program_name is set there. */
+  { "xdelta3",  "-cfq",  "xdelta3",    "-dcfq",  "X", "\xd6\xc3\xc4", 3,
+    RD_NONEXTERNAL },
+  { "bzip2",    "-cf",   "bzip2",      "-dcf",   "B", "BZh",          3, 0 },
+  { "gzip",     "-cf",   "gzip",       "-dcf",   "G", "\037\213",     2, 0 },
+  { "compress", "-cf",   "uncompress", "-cf",    "Z", "\037\235",     2, 0 },
+
+  /* TODO: add commandline support for magic-less formats */
+  /*{ "lzma", "-cf",   "lzma", "-dcf",   "M", "]\000", 2, 0 },*/
+};
+
+// };
+
+static int main_input (xd3_cmd cmd, main_file *ifile,
+                       main_file *ofile, main_file *sfile);
+static void main_get_appheader (xd3_stream *stream, main_file *ifile,
+				main_file *output, main_file *sfile);
+
+static int main_help (void);
+
+static int
+main_version (void)
+{
+  /* $Format: "  DP(RINT \"Xdelta version $Xdelta3Version$, Copyright (C) 2007, 2008, Joshua MacDonald\n\");" $ */
+  DP(RINT "Xdelta version 3.0u, Copyright (C) 2007, 2008, Joshua MacDonald\n");
+  DP(RINT "Xdelta comes with ABSOLUTELY NO WARRANTY.\n");
+  DP(RINT "This is free software, and you are welcome to redistribute it\n");
+  DP(RINT "under certain conditions; see \"COPYING\" for details.\n");
+  return EXIT_SUCCESS;
+}
+
+static int
+main_config (void)
+{
+  main_version ();
+
+  DP(RINT "EXTERNAL_COMPRESSION=%d\n", EXTERNAL_COMPRESSION);
+  DP(RINT "GENERIC_ENCODE_TABLES=%d\n", GENERIC_ENCODE_TABLES);
+  DP(RINT "GENERIC_ENCODE_TABLES_COMPUTE=%d\n", GENERIC_ENCODE_TABLES_COMPUTE);
+  DP(RINT "REGRESSION_TEST=%d\n", REGRESSION_TEST);
+  DP(RINT "SECONDARY_DJW=%d\n", SECONDARY_DJW);
+  DP(RINT "SECONDARY_FGK=%d\n", SECONDARY_FGK);
+  DP(RINT "UNALIGNED_OK=%d\n", UNALIGNED_OK);
+  DP(RINT "VCDIFF_TOOLS=%d\n", VCDIFF_TOOLS);
+  DP(RINT "XD3_ALLOCSIZE=%d\n", XD3_ALLOCSIZE);
+  DP(RINT "XD3_DEBUG=%d\n", XD3_DEBUG);
+  DP(RINT "XD3_ENCODER=%d\n", XD3_ENCODER);
+  DP(RINT "XD3_POSIX=%d\n", XD3_POSIX);
+  DP(RINT "XD3_STDIO=%d\n", XD3_STDIO);
+  DP(RINT "XD3_WIN32=%d\n", XD3_WIN32);
+  DP(RINT "XD3_USE_LARGEFILE64=%d\n", XD3_USE_LARGEFILE64);
+  DP(RINT "XD3_DEFAULT_LEVEL=%d\n", XD3_DEFAULT_LEVEL);
+  DP(RINT "XD3_DEFAULT_IOPT_SIZE=%d\n", XD3_DEFAULT_IOPT_SIZE);
+  DP(RINT "XD3_DEFAULT_SPREVSZ=%d\n", XD3_DEFAULT_SPREVSZ);
+  DP(RINT "XD3_DEFAULT_SRCWINSZ=%d\n", XD3_DEFAULT_SRCWINSZ);
+  DP(RINT "XD3_DEFAULT_WINSIZE=%d\n", XD3_DEFAULT_WINSIZE);
+  DP(RINT "XD3_HARDMAXWINSIZE=%d\n", XD3_HARDMAXWINSIZE);
+  DP(RINT "sizeof(void*)=%ld\n", sizeof(void*));
+  DP(RINT "sizeof(int)=%ld\n", sizeof(int));
+  DP(RINT "sizeof(uint32_t)=%ld\n", sizeof(uint32_t));
+  DP(RINT "sizeof(uint64_t)=%ld\n", sizeof(uint64_t));
+  DP(RINT "sizeof(usize_t)=%ld\n", sizeof(usize_t));
+  DP(RINT "sizeof(xoff_t)=%ld\n", sizeof(xoff_t));
+
+  return EXIT_SUCCESS;
+}
+
+static void
+reset_defaults(void)
+{
+  option_stdout = 0;
+  option_force = 0;
+  option_verbose = 0;
+  option_quiet = 0;
+  option_appheader = NULL;
+  option_use_secondary = 0;
+  option_secondary = NULL;
+  option_use_altcodetable = 0;
+  option_smatch_config = NULL;
+  option_no_compress = 0;
+  option_no_output = 0;
+  option_source_filename = NULL;
+  program_name = NULL;
+  appheader_used = NULL;
+  main_bdata = NULL;
+  main_bsize = 0;
+  lru_size = 0;
+  lru = NULL;
+  do_not_lru = 0;
+  lru_hits   = 0;
+  lru_misses = 0;
+  lru_filled = 0;
+  allow_fake_source = 0;
+  option_smatch_config = NULL;
+
+  option_use_appheader = 1;
+  option_use_checksum = 1;
+#if EXTERNAL_COMPRESSION
+  option_decompress_inputs  = 1;
+  option_recompress_outputs = 1;
+#endif
+#if VCDIFF_TOOLS
+  option_print_cpymode = 1;
+#endif
+  option_level = XD3_DEFAULT_LEVEL;
+  option_iopt_size = XD3_DEFAULT_IOPT_SIZE;
+  option_winsize = XD3_DEFAULT_WINSIZE;
+  option_srcwinsz = XD3_DEFAULT_SRCWINSZ;
+  option_sprevsz = XD3_DEFAULT_SPREVSZ;
+}
+
+static void*
+main_malloc1 (usize_t size)
+{
+  void* r = malloc (size);
+  if (r == NULL) { XPR(NT "malloc: %s\n", xd3_mainerror (ENOMEM)); }
+  else if (option_verbose > 3) { XPR(NT "malloc: %u: %p\n", size, r); }
+  return r;
+}
+
+static void*
+main_malloc (usize_t size)
+{
+  void *r = main_malloc1 (size);
+  if (r) { IF_DEBUG (main_mallocs += 1); }
+  return r;
+}
+
+static void*
+main_alloc (void   *opaque,
+	    usize_t  items,
+	    usize_t  size)
+{
+  return main_malloc1 (items * size);
+}
+
+static void
+main_free1 (void *opaque, void *ptr)
+{
+  if (option_verbose > 3) { XPR(NT "free: %p\n", ptr); }
+  free (ptr);
+}
+
+static void
+main_free (void *ptr)
+{
+  if (ptr)
+    {
+      IF_DEBUG (main_mallocs -= 1);
+      main_free1 (NULL, ptr);
+      IF_DEBUG (XD3_ASSERT(main_mallocs >= 0));
+    }
+}
+
+/* This ensures that (ret = errno) always indicates failure, in case errno was
+ * accidentally not set.  If this prints there's a bug somewhere. */
+static int
+get_errno (void)
+{
+#ifndef _WIN32
+  if (errno == 0)
+    {
+      XPR(NT "you found a bug: expected errno != 0\n");
+      errno = XD3_INTERNAL;
+    }
+  return errno;
+#else
+  DWORD errNum = GetLastError();
+  if (errNum == NO_ERROR) {
+	  errNum = XD3_INTERNAL;
+  }
+  return errNum;
+#endif
+}
+
+const char*
+xd3_mainerror(int err_num) {
+#ifndef _WIN32
+	const char* x = xd3_strerror (err_num);
+	if (x != NULL) {
+		return x;
+	}
+	return strerror(err_num);
+#else
+	static char err_buf[256];
+	const char* x = xd3_strerror (err_num);
+	if (x != NULL) {
+		return x;
+	}
+	memset (err_buf, 0, 256);
+	FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM |
+		       FORMAT_MESSAGE_IGNORE_INSERTS,
+		NULL, err_num,
+		MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
+		err_buf, 256, NULL);
+	return err_buf;
+#endif
+}
+
+static long
+get_millisecs_now (void)
+{
+#ifndef _WIN32
+  struct timeval tv;
+
+  gettimeofday (& tv, NULL);
+
+  return (tv.tv_sec) * 1000L + (tv.tv_usec) / 1000;
+#else
+  SYSTEMTIME st;
+  FILETIME ft;
+  __int64 *pi = (__int64*)&ft;
+  GetLocalTime(&st);
+  SystemTimeToFileTime(&st, &ft);
+  return (long)((*pi) / 10000);
+#endif
+}
+
+/* Always >= 1 millisec, right? */
+static long
+get_millisecs_since (void)
+{
+  static long last = 0;
+  long now = get_millisecs_now();
+  long diff = now - last;
+  last = now;
+  return diff;
+}
+
+static char*
+main_format_bcnt (xoff_t r, char *buf)
+{
+  static const char* fmts[] = { "B", "KB", "MB", "GB" };
+  usize_t i;
+
+  for (i = 0; i < SIZEOF_ARRAY(fmts); i += 1)
+    {
+      if (r <= (10 * 1024) || i == (-1 + (int)SIZEOF_ARRAY(fmts)))
+	{
+	  sprintf (buf, "%"Q"u %s", r, fmts[i]);
+	  break;
+	}
+      r /= 1024;
+    }
+  return buf;
+}
+
+static char*
+main_format_rate (xoff_t bytes, long millis, char *buf)
+{
+  xoff_t r = (xoff_t)(1.0 * bytes / (1.0 * millis / 1000.0));
+  static char lbuf[32];
+
+  main_format_bcnt (r, lbuf);
+  sprintf (buf, "%s/sec", lbuf);
+  return buf;
+}
+
+static char*
+main_format_millis (long millis, char *buf)
+{
+  if (millis < 1000)       { sprintf (buf, "%lu ms", millis); }
+  else if (millis < 10000) { sprintf (buf, "%.1f sec", millis / 1000.0); }
+  else                     { sprintf (buf, "%lu sec", millis / 1000L); }
+  return buf;
+}
+
+/* A safe version of strtol for xoff_t. */
+static int
+main_strtoxoff (const char* s, xoff_t *xo, char which)
+{
+  char *e;
+  xoff_t x;
+
+  XD3_ASSERT(s && *s != 0);
+
+  {
+    /* Should check LONG_MIN, LONG_MAX, LLONG_MIN, LLONG_MAX? */
+#if SIZEOF_XOFF_T == 4
+    long xx = strtol (s, &e, 0);
+#else
+    long long xx = strtoll (s, &e, 0);
+#endif
+
+    if (xx < 0)
+      {
+	XPR(NT "-%c: negative integer: %s\n", which, s);
+	return EXIT_FAILURE;
+      }
+
+    x = xx;
+  }
+
+  if (*e != 0)
+    {
+      XPR(NT "-%c: invalid integer: %s\n", which, s);
+      return EXIT_FAILURE;
+    }
+
+  (*xo) = x;
+  return 0;
+}
+
+static int
+main_atou (const char* arg, usize_t *xo, usize_t low,
+	   usize_t high, char which)
+{
+  xoff_t x;
+  int ret;
+
+  if ((ret = main_strtoxoff (arg, & x, which))) { return ret; }
+
+  if (x < low)
+    {
+      XPR(NT "-%c: minimum value: %u\n", which, low);
+      return EXIT_FAILURE;
+    }
+  if (high == 0)
+    {
+      high = USIZE_T_MAX;
+    }
+  if (x > high)
+    {
+      XPR(NT "-%c: maximum value: %u\n", which, high);
+      return EXIT_FAILURE;
+    }
+  (*xo) = (usize_t)x;
+  return 0;
+}
+
+/******************************************************************
+ FILE BASICS
+ ******************************************************************/
+
+/* With all the variation in file system-call semantics, arguments,
+ * return values and error-handling for the POSIX and STDIO file APIs,
+ * the insides of these functions make me sick, which is why these
+ * wrappers exist. */
+
+#define XOPEN_OPNAME (xfile->mode == XO_READ ? "read" : "write")
+#define XOPEN_STDIO  (xfile->mode == XO_READ ? "rb" : "wb")
+#define XOPEN_POSIX  (xfile->mode == XO_READ ? \
+		      O_RDONLY : O_WRONLY | O_CREAT | O_TRUNC)
+#define XOPEN_MODE   (xfile->mode == XO_READ ? 0 : 0666)
+
+#define XF_ERROR(op, name, ret) \
+  do { if (!option_quiet) { XPR(NT "file %s failed: %s: %s: %s\n", (op), \
+       XOPEN_OPNAME, (name), xd3_mainerror (ret)); } } while (0)
+
+#if XD3_STDIO
+#define XFNO(f) fileno(f->file)
+#define XSTDOUT_XF(f) { (f)->file = stdout; (f)->filename = "/dev/stdout"; }
+#define XSTDIN_XF(f)  { (f)->file = stdin;  (f)->filename = "/dev/stdin"; }
+
+#elif XD3_POSIX
+#define XFNO(f) f->file
+#define XSTDOUT_XF(f) \
+  { (f)->file = STDOUT_FILENO; (f)->filename = "/dev/stdout"; }
+#define XSTDIN_XF(f) \
+  { (f)->file = STDIN_FILENO;  (f)->filename = "/dev/stdin"; }
+
+#elif XD3_WIN32
+#define XFNO(f) -1
+#define XSTDOUT_XF(f) { \
+  (f)->file = GetStdHandle(STD_OUTPUT_HANDLE); \
+  (f)->filename = "(stdout)"; \
+  }
+#define XSTDIN_XF(f) { \
+  (f)->file = GetStdHandle(STD_INPUT_HANDLE); \
+  (f)->filename = "(stdin)"; \
+  }
+#endif
+
+static void
+main_file_init (main_file *xfile)
+{
+  memset (xfile, 0, sizeof (*xfile));
+
+#if XD3_POSIX
+  xfile->file = -1;
+#endif
+#if XD3_WIN32
+  xfile->file = INVALID_HANDLE_VALUE;
+#endif
+}
+
+static int
+main_file_isopen (main_file *xfile)
+{
+#if XD3_STDIO
+  return xfile->file != NULL;
+
+#elif XD3_POSIX
+  return xfile->file != -1;
+
+#elif XD3_WIN32
+  return xfile->file != INVALID_HANDLE_VALUE;
+#endif
+}
+
+static int
+main_file_close (main_file *xfile)
+{
+  int ret = 0;
+
+  if (! main_file_isopen (xfile))
+    {
+      return 0;
+    }
+
+#if XD3_STDIO
+  ret = fclose (xfile->file);
+  xfile->file = NULL;
+
+#elif XD3_POSIX
+  ret = close (xfile->file);
+  xfile->file = -1;
+
+#elif XD3_WIN32
+  if (!CloseHandle(xfile->file)) {
+    ret = get_errno ();
+  }
+  xfile->file = INVALID_HANDLE_VALUE;
+#endif
+
+  if (ret != 0) { XF_ERROR ("close", xfile->filename, ret = get_errno ()); }
+  return ret;
+}
+
+static void
+main_file_cleanup (main_file *xfile)
+{
+  XD3_ASSERT (xfile != NULL);
+
+  if (main_file_isopen (xfile))
+    {
+      main_file_close (xfile);
+    }
+
+  if (xfile->snprintf_buf != NULL)
+    {
+      main_free(xfile->snprintf_buf);
+      xfile->snprintf_buf = NULL;
+    }
+
+  if (xfile->filename_copy != NULL)
+    {
+      main_free(xfile->filename_copy);
+      xfile->filename_copy = NULL;
+    }
+}
+
+static int
+main_file_open (main_file *xfile, const char* name, int mode)
+{
+  int ret = 0;
+
+  xfile->mode = mode;
+
+  XD3_ASSERT (name != NULL);
+  XD3_ASSERT (! main_file_isopen (xfile));
+  if (name[0] == 0)
+    {
+      XPR(NT "invalid file name: empty string\n");
+      return XD3_INVALID;
+    }
+
+#if XD3_STDIO
+  xfile->file = fopen (name, XOPEN_STDIO);
+
+  ret = (xfile->file == NULL) ? get_errno () : 0;
+
+#elif XD3_POSIX
+  if ((ret = open (name, XOPEN_POSIX, XOPEN_MODE)) < 0)
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      xfile->file = ret;
+      ret = 0;
+    }
+
+#elif XD3_WIN32
+  xfile->file = CreateFile(name,
+	  (mode == XO_READ) ? GENERIC_READ : GENERIC_WRITE,
+	  FILE_SHARE_READ,
+	  NULL,
+	  (mode == XO_READ) ? OPEN_EXISTING :
+			   (option_force ? CREATE_ALWAYS : CREATE_NEW),
+	  FILE_ATTRIBUTE_NORMAL,
+	  NULL);
+  if (xfile->file == INVALID_HANDLE_VALUE) {
+	  ret = get_errno ();
+  }
+#endif
+  if (ret) { XF_ERROR ("open", name, ret); }
+  else     { xfile->realname = name; xfile->nread = 0; }
+  return ret;
+}
+
+static int
+main_file_stat (main_file *xfile, xoff_t *size, int err_ifnoseek)
+{
+  int ret = 0;
+#if XD3_WIN32
+# if (_WIN32_WINNT >= 0x0500)
+  LARGE_INTEGER li;
+  if (GetFileSizeEx(xfile->file, &li) == 0)
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      *size = li.QuadPart;
+    }
+# else
+  DWORD filesize = GetFileSize(xfile->file, NULL);
+  if (filesize == INVALID_FILE_SIZE)
+    {
+      ret = GetLastError();
+      if (ret != NO_ERROR)
+	return ret;
+    }
+  *size = filesize;
+# endif
+#else
+  struct stat sbuf;
+  if (fstat (XFNO (xfile), & sbuf) < 0)
+    {
+      ret = get_errno ();
+      if (err_ifnoseek)
+	{
+	  XF_ERROR ("stat", xfile->filename, ret);
+	}
+      return ret;
+    }
+
+  if (! S_ISREG (sbuf.st_mode))
+    {
+      if (err_ifnoseek)
+	{
+	  XPR(NT "source file must be seekable: %s\n", xfile->filename);
+	}
+      return ESPIPE;
+    }
+  (*size) = sbuf.st_size;
+#endif
+  return ret;
+}
+
+static int
+main_file_exists (main_file *xfile)
+{
+  struct stat sbuf;
+  return stat (xfile->filename, & sbuf) == 0 && S_ISREG (sbuf.st_mode);
+}
+
+#if (XD3_POSIX || EXTERNAL_COMPRESSION)
+/* POSIX-generic code takes a function pointer to read() or write().
+ * This calls the function repeatedly until the buffer is full or EOF.
+ * The NREAD parameter is not set for write, NULL is passed.  Return
+ * is signed, < 0 indicate errors, otherwise byte count. */
+typedef int (xd3_posix_func) (int fd, uint8_t *buf, usize_t size);
+
+static int
+xd3_posix_io (int fd, uint8_t *buf, usize_t size,
+	      xd3_posix_func *func, usize_t *nread)
+{
+  int ret;
+  usize_t nproc = 0;
+
+  while (nproc < size)
+    {
+      int result = (*func) (fd, buf + nproc, size - nproc);
+
+      if (result < 0)
+	{
+	  ret = get_errno ();
+	  if (ret != EAGAIN && ret != EINTR)
+	    {
+	      return ret;
+	    }
+	  result = 0;
+	}
+
+      if (nread != NULL && result == 0) { break; }
+
+      nproc += result;
+    }
+  if (nread != NULL) { (*nread) = nproc; }
+  return 0;
+}
+#endif
+
+/* POSIX is unbuffered, while STDIO is buffered.  main_file_read()
+ * should always be called on blocks. */
+static int
+main_file_read (main_file   *ifile,
+	       uint8_t    *buf,
+	       usize_t      size,
+	       usize_t     *nread,
+	       const char *msg)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  usize_t result;
+
+  result = fread (buf, 1, size, ifile->file);
+
+  if (result < size && ferror (ifile->file))
+    {
+      ret = get_errno ();
+    }
+  else
+    {
+      *nread = result;
+    }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ifile->file, buf, size, (xd3_posix_func*) &read, nread);
+
+#elif XD3_WIN32
+  DWORD nread2;
+  if (ReadFile (ifile->file, buf, size, &nread2, NULL) == 0) {
+	  ret = get_errno();
+  } else {
+      *nread = (usize_t)nread2;
+  }
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ifile->filename, xd3_mainerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 3) { XPR(NT "main read: %s: %u\n",
+				    ifile->filename, (*nread)); }
+      ifile->nread += (*nread);
+    }
+
+  return ret;
+}
+
+static int
+main_file_write (main_file *ofile, uint8_t *buf, usize_t size, const char *msg)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  usize_t result;
+
+  result = fwrite (buf, 1, size, ofile->file);
+
+  if (result != size) { ret = get_errno (); }
+
+#elif XD3_POSIX
+  ret = xd3_posix_io (ofile->file, buf, size, (xd3_posix_func*) &write, NULL);
+
+#elif XD3_WIN32
+  DWORD nwrite;
+  if (WriteFile(ofile->file, buf, size, &nwrite, NULL) == 0) {
+	  ret = get_errno ();
+  } else {
+	  if (size != nwrite) {
+		  XPR(NT "Incorrect write count");
+		  ret = XD3_INTERNAL;
+	  }
+  }
+#endif
+
+  if (ret)
+    {
+      XPR(NT "%s: %s: %s\n", msg, ofile->filename, xd3_mainerror (ret));
+    }
+  else
+    {
+      if (option_verbose > 3) { XPR(NT "main write: %s: %u\n",
+				    ofile->filename, size); }
+      ofile->nwrite += size;
+    }
+
+  return ret;
+}
+
+static int
+main_file_seek (main_file *xfile, xoff_t pos)
+{
+  int ret = 0;
+
+#if XD3_STDIO
+  if (fseek (xfile->file, pos, SEEK_SET) != 0) { ret = get_errno (); }
+
+#elif XD3_POSIX
+  if ((xoff_t) lseek (xfile->file, pos, SEEK_SET) != pos)
+    { ret = get_errno (); }
+
+#elif XD3_WIN32
+# if (_WIN32_WINNT >= 0x0500)
+  LARGE_INTEGER move, out;
+  move.QuadPart = pos;
+  if (SetFilePointerEx(xfile->file, move, &out, FILE_BEGIN) == 0) {
+	  ret = get_errno ();
+  }
+# else
+  if (SetFilePointer(xfile->file, (LONG)pos, NULL, FILE_BEGIN) ==
+					INVALID_SET_FILE_POINTER)
+  {
+	  ret = get_errno ();
+  }
+# endif
+#endif
+
+  if (ret)
+    {
+      XPR(NT "seek failed: %s: %s\n", xfile->filename, xd3_mainerror (ret));
+    }
+
+  return ret;
+}
+
+/* This function simply writes the stream output buffer, if there is
+ * any, for encode, decode and recode commands.  (The VCDIFF tools use
+ * main_print_func()). */
+static int
+main_write_output (xd3_stream* stream, main_file *ofile)
+{
+  int ret;
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (stream->avail_out > 0 &&
+      (ret = main_file_write (ofile, stream->next_out,
+			      stream->avail_out, "write failed")))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+static int
+main_set_secondary_flags (xd3_config *config)
+{
+  int ret;
+  if (option_use_secondary)
+    {
+      /* The default secondary compressor is DJW, if it's compiled. */
+      if (option_secondary == NULL)
+	{
+	  if (SECONDARY_DJW)
+	    {
+	      config->flags |= XD3_SEC_DJW;
+	    }
+	}
+      else
+	{
+	  if (strcmp (option_secondary, "fgk") == 0 && SECONDARY_FGK)
+	    {
+	      config->flags |= XD3_SEC_FGK;
+	    }
+	  else if (strncmp (option_secondary, "djw", 3) == 0 && SECONDARY_DJW)
+	    {
+	      usize_t level = XD3_DEFAULT_SECONDARY_LEVEL;
+
+	      config->flags |= XD3_SEC_DJW;
+
+	      if (strlen (option_secondary) > 3 &&
+		  (ret = main_atou (option_secondary + 3,
+				    &level,
+				    0, 9, 'S')) != 0 &&
+		  !option_quiet)
+		{
+		  return XD3_INVALID;
+		}
+
+	      /* XD3_SEC_NOXXXX flags disable secondary compression on
+	       * a per-section basis.  For djw, ngroups=1 indicates
+	       * minimum work, ngroups=0 uses default settings, which
+	       * is > 1 groups by default. */
+	      if (level < 1) { config->flags |= XD3_SEC_NODATA; }
+	      if (level < 7) { config->sec_data.ngroups = 1; }
+	      else { config->sec_data.ngroups = 0; }
+
+	      if (level < 3) { config->flags |= XD3_SEC_NOINST; }
+	      if (level < 8) { config->sec_inst.ngroups = 1; }
+	      else { config->sec_inst.ngroups = 0; }
+
+	      if (level < 5) { config->flags |= XD3_SEC_NOADDR; }
+	      if (level < 9) { config->sec_addr.ngroups = 1; }
+	      else { config->sec_addr.ngroups = 0; }
+	    }
+	  else if (strcmp (option_secondary, "none") == 0 && SECONDARY_DJW)
+	    {
+	      /* No secondary */
+	    }
+	  else
+	    {
+	      if (!option_quiet)
+		{
+		  XPR(NT "unrecognized secondary compressor type: %s\n",
+		      option_secondary);
+		  return XD3_INVALID;
+		}
+	    }
+	}
+    }
+
+  return 0;
+}
+
+/******************************************************************
+ VCDIFF TOOLS
+ *****************************************************************/
+
+#if VCDIFF_TOOLS
+#include "xdelta3-merge.h"
+
+#if defined(_WIN32) || defined(__DJGPP__)
+/* According to the internet, Windows vsnprintf() differs from most
+ * Unix implementations regarding the terminating 0 when the boundary
+ * condition is met. It doesn't matter here, we don't rely on the
+ * trailing 0.  Besides, both Windows and DJGPP vsnprintf return -1
+ * upon truncation, which isn't C99 compliant. To overcome this,
+ * recent MinGW runtimes provided their own vsnprintf (notice the
+ * absence of the '_' prefix) but they were initially buggy.  So,
+ * always use the native '_'-prefixed version with Win32. */
+#include <stdarg.h>
+#ifdef _WIN32
+#define vsnprintf_func _vsnprintf
+#else
+#define vsnprintf_func  vsnprintf
+#endif
+
+int
+snprintf_func (char *str, int n, char *fmt, ...)
+{
+  va_list a;
+  int ret;
+  va_start (a, fmt);
+  ret = vsnprintf_func (str, n, fmt, a);
+  va_end (a);
+  if (ret < 0)
+      ret = n;
+  return ret;
+}
+#else
+#define snprintf_func snprintf
+#endif
+
+/* The following macros let VCDIFF printing something printf-like with
+ * main_file_write(), e.g.,:
+ *
+ *   VC(UT "trying to be portable: %d\n", x)VE;
+ */
+#define SNPRINTF_BUFSIZE 1024
+#define VC do { if (((ret = snprintf_func
+#define UT (char*)xfile->snprintf_buf, SNPRINTF_BUFSIZE,
+#define VE ) >= SNPRINTF_BUFSIZE			       \
+  && (ret = main_print_overflow(ret)) != 0)		       \
+  || (ret = main_file_write(xfile, xfile->snprintf_buf,        \
+			    ret, "print")) != 0)	       \
+  { return ret; } } while (0)
+
+static int
+main_print_overflow (int x)
+{
+  XPR(NT "internal print buffer overflow: %d bytes\n", x);
+  return XD3_INTERNAL;
+}
+
+/* This function prints a single VCDIFF window. */
+static int
+main_print_window (xd3_stream* stream, main_file *xfile)
+{
+  int ret;
+  usize_t size = 0;
+
+  VC(UT "  Offset Code Type1 Size1  @Addr1 + Type2 Size2 @Addr2\n")VE;
+
+  while (stream->inst_sect.buf < stream->inst_sect.buf_max)
+    {
+      usize_t code = stream->inst_sect.buf[0];
+      const uint8_t *addr_before = stream->addr_sect.buf;
+      const uint8_t *inst_before = stream->inst_sect.buf;
+      usize_t addr_bytes;
+      usize_t inst_bytes;
+      usize_t size_before = size;
+
+      if ((ret = xd3_decode_instruction (stream)))
+	{
+	  XPR(NT "instruction decode error at %"Q"u: %s\n",
+	      stream->dec_winstart + size, stream->msg);
+	  return ret;
+	}
+
+      addr_bytes = stream->addr_sect.buf - addr_before;
+      inst_bytes = stream->inst_sect.buf - inst_before;
+
+      VC(UT "  %06"Q"u %03u  %s %6u", stream->dec_winstart + size, 
+	 option_print_cpymode ? code : 0,
+	 xd3_rtype_to_string ((xd3_rtype) stream->dec_current1.type, option_print_cpymode),
+	 (usize_t) stream->dec_current1.size)VE;
+
+      if (stream->dec_current1.type != XD3_NOOP)
+	{
+	  if (stream->dec_current1.type >= XD3_CPY)
+	    {
+	      if (stream->dec_current1.addr >= stream->dec_cpylen) 
+		{
+		  VC(UT " T@%-6u", 
+		     stream->dec_current1.addr - stream->dec_cpylen)VE;
+		} 
+	      else
+		{
+		  VC(UT " S@%-6"Q"u", 
+		     stream->dec_cpyoff + stream->dec_current1.addr)VE;
+		}
+	    }
+	  else
+	    {
+	      VC(UT "        ")VE;
+	    }
+
+	  size += stream->dec_current1.size;
+	}
+
+      if (stream->dec_current2.type != XD3_NOOP)
+	{
+	  VC(UT "  %s %6u",
+	     xd3_rtype_to_string ((xd3_rtype) stream->dec_current2.type,
+				  option_print_cpymode),
+	     (usize_t)stream->dec_current2.size)VE;
+
+	  if (stream->dec_current2.type >= XD3_CPY)
+	    {
+	      if (stream->dec_current2.addr >= stream->dec_cpylen) 
+		{
+		  VC(UT " T@%-6u", 
+		     stream->dec_current2.addr - stream->dec_cpylen)VE;
+		} 
+	      else
+		{
+		  VC(UT " S@%-6"Q"u", 
+		     stream->dec_cpyoff + stream->dec_current2.addr)VE;
+		}
+	    }
+
+	  size += stream->dec_current2.size;
+	}
+
+      VC(UT "\n")VE;
+
+      if (option_verbose &&
+	  addr_bytes + inst_bytes >= (size - size_before) &&
+	  (stream->dec_current1.type >= XD3_CPY ||
+	   stream->dec_current2.type >= XD3_CPY))
+	{
+	  VC(UT "  %06"Q"u (inefficiency) %u encoded as %u bytes\n",
+	     stream->dec_winstart + size_before,
+	     size - size_before,
+	     addr_bytes + inst_bytes)VE;
+	}
+    }
+
+  if (stream->dec_tgtlen != size && (stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      XPR(NT "target window size inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  if (stream->dec_position != stream->dec_maxpos)
+    {
+      XPR(NT "target window position inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  if (stream->addr_sect.buf != stream->addr_sect.buf_max)
+    {
+      XPR(NT "address section inconsistency");
+      return XD3_INTERNAL;
+    }
+
+  return 0;
+}
+
+static int
+main_print_vcdiff_file (main_file *xfile, main_file *file, const char *type)
+{
+  int ret;  /* Used by above macros */
+  if (file->filename)
+    {
+      VC(UT "XDELTA filename (%s):     %s\n", type,
+	 file->filename)VE;
+    }
+  if (file->compressor)
+    {
+      VC(UT "XDELTA ext comp (%s):     %s\n", type,
+	 file->compressor->recomp_cmdname)VE;
+    }
+  return 0;
+}
+
+/* This function prints a VCDIFF input, mainly for debugging purposes. */
+static int
+main_print_func (xd3_stream* stream, main_file *xfile)
+{
+  int ret;
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (xfile->snprintf_buf == NULL)
+    {
+      if ((xfile->snprintf_buf = (uint8_t*)main_malloc(SNPRINTF_BUFSIZE)) == NULL)
+	{
+	  return ENOMEM;
+	}
+    }
+
+  if (stream->dec_winstart == 0)
+    {
+      VC(UT "VCDIFF version:               0\n")VE;
+      VC(UT "VCDIFF header size:           %d\n",
+	 stream->dec_hdrsize)VE;
+      VC(UT "VCDIFF header indicator:      ")VE;
+      if ((stream->dec_hdr_ind & VCD_SECONDARY) != 0)
+	VC(UT "VCD_SECONDARY ")VE;
+      if ((stream->dec_hdr_ind & VCD_CODETABLE) != 0)
+	VC(UT "VCD_CODETABLE ")VE;
+      if ((stream->dec_hdr_ind & VCD_APPHEADER) != 0)
+	VC(UT "VCD_APPHEADER ")VE;
+      if (stream->dec_hdr_ind == 0)
+	VC(UT "none")VE;
+      VC(UT "\n")VE;
+
+      IF_SEC(VC(UT "VCDIFF secondary compressor:  %s\n",
+		stream->sec_type ? stream->sec_type->name : "none")VE);
+      IF_NSEC(VC(UT "VCDIFF secondary compressor: unsupported\n")VE);
+
+      if (stream->dec_hdr_ind & VCD_APPHEADER)
+	{
+	  uint8_t *apphead;
+	  usize_t appheadsz;
+	  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+	  if (ret == 0 && appheadsz > 0)
+	    {
+	      int sq = option_quiet;
+	      main_file i, o, s;
+	      XD3_ASSERT (apphead != NULL);
+	      VC(UT "VCDIFF application header:    ")VE;
+	      if ((ret = main_file_write (xfile, apphead,
+					  appheadsz, "print")) != 0)
+		{ return ret; }
+	      VC(UT "\n")VE;
+
+	      main_file_init (& i);
+	      main_file_init (& o);
+	      main_file_init (& s);
+	      option_quiet = 1;
+	      main_get_appheader (stream, &i, & o, & s);
+	      option_quiet = sq;
+	      if ((ret = main_print_vcdiff_file (xfile, & o, "output")))
+		{ return ret; }
+	      if ((ret = main_print_vcdiff_file (xfile, & s, "source")))
+		{ return ret; }
+	      main_file_cleanup (& i);
+	      main_file_cleanup (& o);
+	      main_file_cleanup (& s);
+	    }
+	}
+    }
+  else
+    {
+      VC(UT "\n")VE;
+    }
+
+  VC(UT "VCDIFF window number:         %"Q"u\n", stream->current_window)VE;
+  VC(UT "VCDIFF window indicator:      ")VE;
+  if ((stream->dec_win_ind & VCD_SOURCE) != 0) VC(UT "VCD_SOURCE ")VE;
+  if ((stream->dec_win_ind & VCD_TARGET) != 0) VC(UT "VCD_TARGET ")VE;
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0) VC(UT "VCD_ADLER32 ")VE;
+  if (stream->dec_win_ind == 0) VC(UT "none")VE;
+  VC(UT "\n")VE;
+
+  if ((stream->dec_win_ind & VCD_ADLER32) != 0)
+    {
+      VC(UT "VCDIFF adler32 checksum:      %08X\n",
+	 (usize_t)stream->dec_adler32)VE;
+    }
+
+  if (stream->dec_del_ind != 0)
+    {
+      VC(UT "VCDIFF delta indicator:       ")VE;
+      if ((stream->dec_del_ind & VCD_DATACOMP) != 0) VC(UT "VCD_DATACOMP ")VE;
+      if ((stream->dec_del_ind & VCD_INSTCOMP) != 0) VC(UT "VCD_INSTCOMP ")VE;
+      if ((stream->dec_del_ind & VCD_ADDRCOMP) != 0) VC(UT "VCD_ADDRCOMP ")VE;
+      if (stream->dec_del_ind == 0) VC(UT "none")VE;
+      VC(UT "\n")VE;
+    }
+
+  if (stream->dec_winstart != 0)
+    {
+      VC(UT "VCDIFF window at offset:      %"Q"u\n", stream->dec_winstart)VE;
+    }
+
+  if (SRCORTGT (stream->dec_win_ind))
+    {
+      VC(UT "VCDIFF copy window length:    %u\n",
+	 (usize_t)stream->dec_cpylen)VE;
+      VC(UT "VCDIFF copy window offset:    %"Q"u\n",
+	 stream->dec_cpyoff)VE;
+    }
+
+  VC(UT "VCDIFF delta encoding length: %u\n",
+     (usize_t)stream->dec_enclen)VE;
+  VC(UT "VCDIFF target window length:  %u\n",
+     (usize_t)stream->dec_tgtlen)VE;
+
+  VC(UT "VCDIFF data section length:   %u\n",
+     (usize_t)stream->data_sect.size)VE;
+  VC(UT "VCDIFF inst section length:   %u\n",
+     (usize_t)stream->inst_sect.size)VE;
+  VC(UT "VCDIFF addr section length:   %u\n",
+     (usize_t)stream->addr_sect.size)VE;
+
+  ret = 0;
+  if ((stream->flags & XD3_JUST_HDR) != 0)
+    {
+      /* Print a header -- finished! */
+      ret = PRINTHDR_SPECIAL;
+    }
+  else if ((stream->flags & XD3_SKIP_WINDOW) == 0)
+    {
+      ret = main_print_window (stream, xfile);
+    }
+
+  return ret;
+}
+
+static int
+main_recode_copy (xd3_stream* stream,
+		  xd3_output* output,
+		  xd3_desect* input)
+{
+  int ret;
+
+  XD3_ASSERT(output != NULL);
+  XD3_ASSERT(output->next_page == NULL);
+
+  if ((ret = xd3_decode_allocate (recode_stream,
+				  input->size,
+				  &output->base,
+				  &output->avail)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return ret;
+    }
+
+  memcpy (output->base,
+	  /* Note: decoder advances buf, so get base of buffer with
+	   * buf_max - size */
+	  input->buf_max - input->size,
+	  input->size);
+  output->next = input->size;
+  return 0;
+}
+
+// Re-encode one window
+static int
+main_recode_func (xd3_stream* stream, main_file *ofile)
+{
+  int ret;
+  xd3_source decode_source;
+
+  XD3_ASSERT(stream->dec_state == DEC_FINISH);
+  XD3_ASSERT(recode_stream->enc_state == ENC_INIT ||
+	     recode_stream->enc_state == ENC_INPUT);
+
+  // Copy partial decoder output to partial encoder inputs
+  if ((ret = main_recode_copy (recode_stream,
+			       DATA_HEAD(recode_stream),
+			       &stream->data_sect)) ||
+      (ret = main_recode_copy (recode_stream,
+			       INST_HEAD(recode_stream),
+			       &stream->inst_sect)) ||
+      (ret = main_recode_copy (recode_stream,
+			       ADDR_HEAD(recode_stream),
+			       &stream->addr_sect)))
+    {
+      return ret;
+    }
+
+  // This jumps to xd3_emit_hdr()
+  recode_stream->enc_state = ENC_FLUSH;
+  recode_stream->avail_in = stream->dec_tgtlen;
+
+  if (SRCORTGT (stream->dec_win_ind))
+    {
+      recode_stream->src = & decode_source;
+      decode_source.srclen = stream->dec_cpylen;
+      decode_source.srcbase = stream->dec_cpyoff;
+    }
+
+  if (option_use_checksum &&
+      (stream->dec_win_ind & VCD_ADLER32) != 0)
+    {
+      recode_stream->flags |= XD3_ADLER32_RECODE;
+      recode_stream->recode_adler32 = stream->dec_adler32;
+    }
+
+  if (option_use_appheader != 0 &&
+      option_appheader != NULL)
+    {
+      xd3_set_appheader (recode_stream, option_appheader,
+			 strlen ((char*) option_appheader));
+    }
+  else if (option_use_appheader != 0 &&
+	   option_appheader == NULL)
+    {
+      if (stream->dec_appheader != NULL)
+	{
+	  xd3_set_appheader (recode_stream,
+			     stream->dec_appheader, stream->dec_appheadsz);
+	}
+    }
+
+  // Output loop
+  for (;;)
+    {
+      switch((ret = xd3_encode_input (recode_stream)))
+	{
+	case XD3_INPUT: {
+	  /* finished recoding one window */
+	  stream->total_out = recode_stream->total_out;
+	  return 0;
+	}
+	case XD3_OUTPUT: {
+	  /* main_file_write below */
+	  break;
+	}
+	case XD3_GOTHEADER:
+	case XD3_WINSTART:
+	case XD3_WINFINISH: {
+	  /* ignore */
+	  continue;
+	}
+	case XD3_GETSRCBLK:
+	case 0: {
+	    return XD3_INTERNAL;
+	  }
+	default:
+	  return ret;
+	}
+
+      if ((ret = main_write_output (recode_stream, ofile)))
+	{
+	  return ret;
+	}
+
+      xd3_consume_output (recode_stream);
+    }
+}
+#endif /* VCDIFF_TOOLS */
+
+/*******************************************************************
+ VCDIFF merging
+ ******************************************************************/
+
+#if VCDIFF_TOOLS
+/* Modifies static state. */
+static int
+main_init_recode_stream (void)
+{
+  int ret;
+  int stream_flags = XD3_ADLER32_NOVER | XD3_SKIP_EMIT;
+  int recode_flags;
+  xd3_config recode_config;
+
+  XD3_ASSERT (recode_stream == NULL);
+
+  if ((recode_stream = (xd3_stream*) main_malloc(sizeof(xd3_stream))) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  recode_flags = (stream_flags & XD3_SEC_TYPE);
+
+  recode_config.alloc = main_alloc;
+  recode_config.freef = main_free1;
+
+  xd3_init_config(&recode_config, recode_flags);
+
+  if ((ret = main_set_secondary_flags (&recode_config)) ||
+      (ret = xd3_config_stream (recode_stream, &recode_config)) ||
+      (ret = xd3_encode_init_partial (recode_stream)) ||
+      (ret = xd3_whole_state_init (recode_stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (recode_stream, ret));
+      xd3_free_stream (recode_stream);
+      recode_stream = NULL;
+      return ret;
+    }
+
+  return 0;
+}
+
+/* This processes the sequence of -m arguments.  The final input
+ * is processed as part of the ordinary main_input() loop. */
+static int
+main_merge_arguments (main_merge_list* merges)
+{
+  int ret = 0;
+  int count = 0;
+  main_merge *merge = NULL;
+  xd3_stream merge_input;
+
+  if (main_merge_list_empty (merges))
+    {
+      return 0;
+    }
+
+  if ((ret = xd3_config_stream (& merge_input, NULL)) ||
+      (ret = xd3_whole_state_init (& merge_input))) 
+    {
+      XPR(NT XD3_LIB_ERRMSG (& merge_input, ret));
+      return ret;
+    }
+
+  merge = main_merge_list_front (merges);
+  while (!main_merge_list_end (merges, merge))
+    {
+      main_file mfile;
+      main_file_init (& mfile);
+      mfile.filename = merge->filename;
+      mfile.flags = RD_NONEXTERNAL;
+
+      if ((ret = main_file_open (& mfile, merge->filename, XO_READ)))
+        {
+          goto error;
+        }
+
+      ret = main_input (CMD_MERGE_ARG, & mfile, NULL, NULL);
+
+      if (ret == 0)
+	{
+	  if (count++ == 0)
+	    {
+	      /* The first merge source is the next merge input. */
+	      xd3_swap_whole_state (& recode_stream->whole_target, 
+				    & merge_input.whole_target);
+	    }
+	  else
+	    {
+	      /* Merge the recode_stream with merge_input. */
+	      ret = xd3_merge_input_output (recode_stream,
+					    & merge_input.whole_target);
+
+	      /* Save the next merge source in merge_input. */
+	      xd3_swap_whole_state (& recode_stream->whole_target,
+				    & merge_input.whole_target);
+	    }
+	}
+
+      main_file_cleanup (& mfile);
+
+      if (recode_stream != NULL)
+        {
+          xd3_free_stream (recode_stream);
+          main_free (recode_stream);
+          recode_stream = NULL;
+        }
+
+      if (main_bdata != NULL)
+        {
+          main_free (main_bdata);
+          main_bdata = NULL;
+	  main_bsize = 0;
+        }
+
+      if (ret != 0)
+        {
+	  goto error;
+        }
+
+      merge = main_merge_list_next (merge);
+    }
+
+  XD3_ASSERT (merge_stream == NULL);
+
+  if ((merge_stream = (xd3_stream*) main_malloc (sizeof(xd3_stream))) == NULL)
+    {
+      ret = ENOMEM;
+      goto error;
+    }
+
+  if ((ret = xd3_config_stream (merge_stream, NULL)) ||
+      (ret = xd3_whole_state_init (merge_stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& merge_input, ret));
+      goto error;
+    }
+
+  xd3_swap_whole_state (& merge_stream->whole_target, 
+			& merge_input.whole_target);
+  ret = 0;
+ error:
+  xd3_free_stream (& merge_input);
+  return ret;
+}
+
+/* This processes each window of the final merge input.  This routine
+ * does not output, it buffers the entire delta into memory. */
+static int
+main_merge_func (xd3_stream* stream, main_file *no_write)
+{
+  int ret;
+
+  if ((ret = xd3_whole_append_window (stream)))
+    {
+      return ret;
+    }
+
+  return 0;
+}
+
+
+/* This is called after all windows have been read, as a final step in
+ * main_input().  This is only called for the final merge step. */
+static int
+main_merge_output (xd3_stream *stream, main_file *ofile)
+{
+  int ret;
+  usize_t inst_pos = 0;
+  xoff_t output_pos = 0;
+  xd3_source recode_source;
+  usize_t window_num = 0;
+  int at_least_once = 0;
+
+  /* merge_stream is set if there were arguments.  this stream's input
+   * needs to be applied to the merge_stream source. */
+  if ((merge_stream != NULL) &&
+      (ret = xd3_merge_input_output (stream, 
+				     & merge_stream->whole_target)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      return ret;
+    }
+
+  if (option_use_appheader != 0 &&
+      option_appheader != NULL)
+    {
+      xd3_set_appheader (recode_stream, option_appheader,
+			 strlen ((char*) option_appheader));
+    }
+
+  /* Enter the ENC_INPUT state and bypass the next_in == NULL test
+   * and (leftover) input buffering logic. */
+  XD3_ASSERT(recode_stream->enc_state == ENC_INIT);
+  recode_stream->enc_state = ENC_INPUT;
+  recode_stream->next_in = main_bdata;
+  recode_stream->flags |= XD3_FLUSH;
+
+  /* This encodes the entire target. */
+  while (inst_pos < stream->whole_target.instlen || !at_least_once)
+    {
+      xoff_t window_start = output_pos;
+      int window_srcset = 0;
+      xoff_t window_srcmin = 0;
+      xoff_t window_srcmax = 0;
+      usize_t window_pos = 0;
+      usize_t window_size;
+
+      /* at_least_once ensures that we encode at least one window,
+       * which handles the 0-byte case. */
+      at_least_once = 1;
+
+      XD3_ASSERT (recode_stream->enc_state == ENC_INPUT);
+
+      if ((ret = xd3_encode_input (recode_stream)) != XD3_WINSTART)
+	{
+	  XPR(NT "invalid merge state: %s\n", xd3_mainerror (ret));
+	  return XD3_INVALID;
+	}
+
+      /* Window sizes must match from the input to the output, so that
+       * target copies are in-range (and so that checksums carry
+       * over). */
+      XD3_ASSERT (window_num < stream->whole_target.wininfolen);
+      window_size = stream->whole_target.wininfo[window_num].length;
+
+      /* Output position should also match. */
+      if (output_pos != stream->whole_target.wininfo[window_num].offset)
+	{
+	  XPR(NT "internal merge error: offset mismatch\n");
+	  return XD3_INVALID;
+	}
+
+      if (option_use_checksum &&
+	  (stream->dec_win_ind & VCD_ADLER32) != 0) 
+	{
+	  recode_stream->flags |= XD3_ADLER32_RECODE;
+	  recode_stream->recode_adler32 = stream->whole_target.wininfo[window_num].adler32;
+	}
+
+      window_num++;
+
+      if (main_bsize < window_size)
+	{
+	  main_free (main_bdata);
+	  main_bdata = NULL;
+	  main_bsize = 0;
+	  if ((main_bdata = (uint8_t*) 
+	       main_malloc (window_size)) == NULL)
+	    {
+	      return ENOMEM;
+	    }
+	  main_bsize = window_size;
+	}
+
+      /* This encodes a single target window. */
+      while (window_pos < window_size &&
+	     inst_pos < stream->whole_target.instlen)
+	{
+	  xd3_winst *inst = &stream->whole_target.inst[inst_pos];
+	  usize_t take = min(inst->size, window_size - window_pos);
+	  xoff_t addr;
+
+	  switch (inst->type)
+	    {
+	    case XD3_RUN:
+	      if ((ret = xd3_emit_run (recode_stream, window_pos, take,
+				       stream->whole_target.adds[inst->addr])))
+		{
+		  return ret;
+		}
+	      break;
+
+	    case XD3_ADD:
+	      /* Adds are implicit, put them into the input buffer. */
+	      memcpy (main_bdata + window_pos, 
+		      stream->whole_target.adds + inst->addr, take);
+	      break;
+
+	    default: /* XD3_COPY + copy mode */
+	      if (inst->mode != 0)
+		{
+		  if (window_srcset) {
+		    window_srcmin = min(window_srcmin, inst->addr);
+		    window_srcmax = max(window_srcmax, inst->addr + take);
+		  } else {
+		    window_srcset = 1;
+		    window_srcmin = inst->addr;
+		    window_srcmax = inst->addr + take;
+		  }
+		  addr = inst->addr;
+		}
+	      else 
+		{
+		  XD3_ASSERT (inst->addr >= window_start);
+		  addr = inst->addr - window_start;
+		}
+	      IF_DEBUG1 (DP(RINT "[merge copy] winpos %u take %u addr %"Q"u mode %u\n",
+			    window_pos, take, addr, inst->mode));
+	      if ((ret = xd3_found_match (recode_stream, window_pos, take, 
+					  addr, inst->mode != 0)))
+		{
+		  return ret;
+		}
+	      break;
+	    }
+
+	  window_pos += take;
+	  output_pos += take;
+
+	  if (take == inst->size)
+	    {
+	      inst_pos += 1;
+	    }
+	  else
+	    {
+	      /* Modify the instruction for the next pass. */
+	      if (inst->type != XD3_RUN)
+		{
+		  inst->addr += take;
+		}
+	      inst->size -= take;
+	    }
+	}
+
+      xd3_avail_input (recode_stream, main_bdata, window_pos);
+
+      recode_stream->enc_state = ENC_INSTR;
+
+      if (window_srcset) {
+	recode_stream->srcwin_decided = 1;
+	recode_stream->src = &recode_source;
+	recode_source.srclen = window_srcmax - window_srcmin;
+	recode_source.srcbase = window_srcmin;
+	recode_stream->taroff = recode_source.srclen;
+      } else {
+	recode_stream->srcwin_decided = 0;
+	recode_stream->src = NULL;
+	recode_stream->taroff = 0;
+      }
+
+      for (;;)
+	{
+	  switch ((ret = xd3_encode_input (recode_stream)))
+	    {
+	    case XD3_INPUT: {
+	      goto done_window;
+	    }
+	    case XD3_OUTPUT: {
+	      /* main_file_write below */
+	      break;
+	    }
+	    case XD3_GOTHEADER:
+	    case XD3_WINSTART:
+	    case XD3_WINFINISH: {
+	      /* ignore */
+	      continue;
+	    }
+	    case XD3_GETSRCBLK:
+	    case 0: {
+	      return XD3_INTERNAL;
+	    }
+	    default:
+	      return ret;
+	    }
+
+	  if ((ret = main_write_output(recode_stream, ofile)))
+	    {
+	      return ret;
+	    }
+
+	  xd3_consume_output (recode_stream);
+	}
+    done_window:
+      (void) 0;
+    }
+
+  return 0;
+}
+#endif
+
+/*******************************************************************
+ Input decompression, output recompression
+ ******************************************************************/
+
+#if EXTERNAL_COMPRESSION
+/* This is tricky POSIX-specific code with lots of fork(), pipe(),
+ * dup(), waitpid(), and exec() business.  Most of this code
+ * originated in PRCS1, which did automatic package-file
+ * decompression.  It works with both XD3_POSIX and XD3_STDIO file
+ * disciplines.
+ *
+ * To automatically detect compressed inputs requires a child process
+ * to reconstruct the input stream, which was advanced in order to
+ * detect compression, because it may not be seekable.  In other
+ * words, the main program reads part of the input stream, and if it
+ * detects a compressed input it then forks a pipe copier process,
+ * which copies the first-read block out of the main-program's memory,
+ * then streams the remaining compressed input into the
+ * input-decompression pipe.
+ */
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+/* Remember which pipe FD is which. */
+#define PIPE_READ_FD  0
+#define PIPE_WRITE_FD 1
+
+static pid_t ext_subprocs[2];
+static char* ext_tmpfile = NULL;
+
+/* Like write(), but makes repeated calls to empty the buffer. */
+static int
+main_pipe_write (int outfd, uint8_t *exist_buf, usize_t remain)
+{
+  int ret;
+
+  if ((ret = xd3_posix_io (outfd, exist_buf, remain,
+			   (xd3_posix_func*) &write, NULL)))
+    {
+      XPR(NT "pipe write failed: %s", xd3_mainerror (ret));
+      return ret;
+    }
+
+  return 0;
+}
+
+/* A simple error-reporting waitpid interface. */
+static int
+main_waitpid_check(pid_t pid)
+{
+  int status;
+  int ret = 0;
+
+  if (waitpid (pid, & status, 0) < 0)
+    {
+      ret = get_errno ();
+      XPR(NT "compression subprocess: wait: %s\n", xd3_mainerror (ret));
+    }
+  else if (! WIFEXITED (status))
+    {
+      ret = ECHILD;
+      XPR(NT "compression subprocess: signal %d\n",
+	 WIFSIGNALED (status) ? WTERMSIG (status) : WSTOPSIG (status));
+    }
+  else if (WEXITSTATUS (status) != 0)
+    {
+      ret = ECHILD;
+      XPR(NT "compression subprocess: exit %d\n", WEXITSTATUS (status));
+    }
+
+  return ret;
+}
+
+/* Wait for any existing child processes to check for abnormal exit. */
+static int
+main_external_compression_finish (void)
+{
+  int i;
+  int ret;
+
+  for (i = 0; i < 2; i += 1)
+    {
+      if (! ext_subprocs[i]) { continue; }
+
+      if ((ret = main_waitpid_check (ext_subprocs[i])))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This runs as a forked process of main_input_decompress_setup() to
+ * copy input to the decompression process.  First, the available
+ * input is copied out of the existing buffer, then the buffer is
+ * reused to continue reading from the compressed input file. */
+static int
+main_pipe_copier (uint8_t    *pipe_buf,
+		  usize_t      pipe_bufsize,
+		  usize_t      nread,
+		  main_file   *ifile,
+		  int         outfd)
+{
+  int ret;
+
+  for (;;)
+    {
+      if (nread > 0 && (ret = main_pipe_write (outfd, pipe_buf, nread)))
+	{
+	  return ret;
+	}
+
+      if (nread < pipe_bufsize)
+	{
+	  break;
+	}
+
+      if ((ret = main_file_read (ifile, pipe_buf, pipe_bufsize,
+				 & nread, "pipe read failed")) < 0)
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* This function is called after we have read some amount of data from
+ * the input file and detected a compressed input.  Here we start a
+ * decompression subprocess by forking twice.  The first process runs
+ * the decompression command, the second process copies data to the
+ * input of the first. */
+static int
+main_input_decompress_setup (const main_extcomp     *decomp,
+			     main_file              *ifile,
+			     uint8_t               *input_buf,
+			     usize_t                 input_bufsize,
+			     uint8_t               *pipe_buf,
+			     usize_t                 pipe_bufsize,
+			     usize_t                 pipe_avail,
+			     usize_t                *nread)
+{
+  /* The two pipes: input and output file descriptors. */
+  int outpipefd[2], inpipefd[2];
+  int input_fd = -1;  /* The resulting input_fd (output of decompression). */
+  pid_t decomp_id, copier_id;  /* The two subprocs. */
+  int ret;
+
+  outpipefd[0] = outpipefd[1] = -1;
+  inpipefd[0]  = inpipefd[1]  = -1;
+
+  if (pipe (outpipefd) || pipe (inpipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((decomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The first child runs the decompression process: */
+  if (decomp_id == 0)
+    {
+      /* Setup pipes: write to the outpipe, read from the inpipe. */
+      if (dup2 (outpipefd[PIPE_WRITE_FD], STDOUT_FILENO) < 0 ||
+	  dup2 (inpipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (outpipefd[PIPE_READ_FD]) ||
+	  close (outpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_READ_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]) ||
+	  execlp (decomp->decomp_cmdname, decomp->decomp_cmdname,
+		  decomp->decomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+	      decomp->decomp_cmdname, xd3_mainerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  ext_subprocs[0] = decomp_id;
+
+  if ((copier_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The second child runs the copier process: */
+  if (copier_id == 0)
+    {
+      int exitval = 0;
+
+      if (close (inpipefd[PIPE_READ_FD]) ||
+	  main_pipe_copier (pipe_buf, pipe_bufsize, pipe_avail,
+			    ifile, inpipefd[PIPE_WRITE_FD]) ||
+	  close (inpipefd[PIPE_WRITE_FD]))
+	{
+	  XPR(NT "child copier process failed: %s\n",
+	      xd3_mainerror (get_errno ()));
+	  exitval = 1;
+	}
+
+      _exit (exitval);
+    }
+
+  ext_subprocs[1] = copier_id;
+
+  /* The parent closes both pipes after duplicating the output of
+   * compression. */
+  input_fd = dup (outpipefd[PIPE_READ_FD]);
+
+  if (input_fd < 0 ||
+      main_file_close (ifile) ||
+      close (outpipefd[PIPE_READ_FD]) ||
+      close (outpipefd[PIPE_WRITE_FD]) ||
+      close (inpipefd[PIPE_READ_FD]) ||
+      close (inpipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "dup/close failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ifile->file = fdopen (input_fd, "r")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ifile->file = input_fd;
+#endif
+
+  ifile->compressor = decomp;
+
+  /* Now the input file is decompressed. */
+  return main_file_read (ifile, input_buf, input_bufsize,
+			 nread, "input decompression failed");
+
+ pipe_cleanup:
+  close (input_fd);
+  close (outpipefd[PIPE_READ_FD]);
+  close (outpipefd[PIPE_WRITE_FD]);
+  close (inpipefd[PIPE_READ_FD]);
+  close (inpipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+
+
+/* This routine is called when the first buffer of input data is read
+ * by the main program (unless input decompression is disabled by
+ * command-line option).  If it recognizes the magic number of a known
+ * input type it invokes decompression.
+ *
+ * Skips decompression if the decompression type or the file type is
+ * RD_NONEXTERNAL.
+ *
+ * Behaves exactly like main_file_read, otherwise.
+ *
+ * This function uses a separate buffer to read the first small block
+ * of input.  If a compressed input is detected, the separate buffer
+ * is passed to the pipe copier.  This avoids using the same size
+ * buffer in both cases. */
+static int
+main_decompress_input_check (main_file   *ifile,
+			    uint8_t    *input_buf,
+			    usize_t      input_size,
+			    usize_t     *nread)
+{
+  int ret;
+  usize_t i;
+  usize_t check_nread;
+  uint8_t check_buf[XD3_ALLOCSIZE];
+
+  if ((ret = main_file_read (ifile, check_buf,
+			     min (input_size, XD3_ALLOCSIZE),
+			     & check_nread, "input read failed")))
+    {
+      return ret;
+    }
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      const main_extcomp *decomp = & extcomp_types[i];
+
+      if ((check_nread > decomp->magic_size) &&
+	  /* The following expr skips decompression if we are trying
+	   * to read a VCDIFF input and that is the magic number. */
+	  !((decomp->flags & RD_NONEXTERNAL) &&
+	    (ifile->flags & RD_NONEXTERNAL)) &&
+	  memcmp (check_buf, decomp->magic, decomp->magic_size) == 0)
+	{
+	  if (! option_quiet)
+	    {
+	      XPR(NT "%s | %s %s\n",
+		 ifile->filename,
+		 decomp->decomp_cmdname,
+		 decomp->decomp_options);
+	    }
+
+	  return main_input_decompress_setup (decomp, ifile,
+					      input_buf, input_size,
+					      check_buf, XD3_ALLOCSIZE,
+					      check_nread, nread);
+	}
+    }
+
+  /* Now read the rest of the input block. */
+  (*nread) = 0;
+
+  if (check_nread == XD3_ALLOCSIZE)
+    {
+      ret = main_file_read (ifile, input_buf + XD3_ALLOCSIZE,
+			    input_size - XD3_ALLOCSIZE, nread,
+			    "input read failed");
+    }
+
+  memcpy (input_buf, check_buf, check_nread);
+
+  (*nread) += check_nread;
+
+  return 0;
+}
+
+/* This is called when the source file needs to be decompressed.  We
+ * fork/exec a decompression command with the proper input and output
+ * to a temporary file. */
+static int
+main_decompress_source (main_file *sfile, xd3_source *source)
+{
+  const main_extcomp *decomp = sfile->compressor;
+  pid_t decomp_id;  /* One subproc. */
+  int   input_fd  = -1;
+  int   output_fd = -1;
+  int   ret;
+  char *tmpname = NULL;
+  char *tmpdir  = getenv ("TMPDIR");
+  static const char tmpl[] = "/xd3src.XXXXXX";
+
+  /* Make a template for mkstmp() */
+  if (tmpdir == NULL) { tmpdir = "/tmp"; }
+  if ((tmpname =
+       (char*) main_malloc (strlen (tmpdir) + sizeof (tmpl) + 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+  sprintf (tmpname, "%s%s", tmpdir, tmpl);
+
+  XD3_ASSERT (ext_tmpfile == NULL);
+  ext_tmpfile = tmpname;
+
+  /* Open the output FD. */
+  if ((output_fd = mkstemp (tmpname)) < 0)
+    {
+      XPR(NT "mkstemp failed: %s: %s",
+	  tmpname, xd3_mainerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  /* Copy the input FD, reset file position. */
+  XD3_ASSERT (main_file_isopen (sfile));
+#if XD3_STDIO
+  if ((input_fd = dup (fileno (sfile->file))) < 0)
+    {
+      XPR(NT "dup failed: %s", xd3_mainerror (ret = get_errno ()));
+      goto cleanup;
+    }
+  main_file_close (sfile);
+  sfile->file = NULL;
+#elif XD3_POSIX
+  input_fd = sfile->file;
+  sfile->file = -1;
+#endif
+
+  if ((ret = lseek (input_fd, SEEK_SET, 0)) != 0)
+    {
+      XPR(NT "lseek failed: : %s", xd3_mainerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  if ((decomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s", xd3_mainerror (ret = get_errno ()));
+      goto cleanup;
+    }
+
+  /* The child runs the decompression process: */
+  if (decomp_id == 0)
+    {
+      /* Setup pipes: write to the output file, read from the pipe. */
+      if (dup2 (input_fd, STDIN_FILENO) < 0 ||
+	  dup2 (output_fd, STDOUT_FILENO) < 0 ||
+	  execlp (decomp->decomp_cmdname, decomp->decomp_cmdname,
+		  decomp->decomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+		   decomp->decomp_cmdname, xd3_mainerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  close (input_fd);
+  close (output_fd);
+  input_fd  = -1;
+  output_fd = -1;
+
+  /* Then wait for completion. */
+  if ((ret = main_waitpid_check (decomp_id)))
+    {
+      goto cleanup;
+    }
+
+  /* Open/stat the decompressed source file. */
+  if ((ret = main_file_open (sfile, tmpname, XO_READ))) { goto cleanup; }
+  if ((ret = main_file_stat (sfile, & source->size, 1))) { goto cleanup; }
+  return 0;
+
+ cleanup:
+  close (input_fd);
+  close (output_fd);
+  if (tmpname) { free (tmpname); }
+  ext_tmpfile = NULL;
+  return ret;
+}
+
+/* Initiate re-compression of the output stream.  This is easier than
+ * input decompression because we know beforehand that the stream will
+ * be compressed, whereas the input has already been read when we
+ * decide it should be decompressed.  Thus, it only requires one
+ * subprocess and one pipe. */
+static int
+main_recompress_output (main_file *ofile)
+{
+  pid_t recomp_id;  /* One subproc. */
+  int   pipefd[2];  /* One pipe. */
+  int   output_fd = -1;
+  int   ret;
+  const main_extcomp *recomp = ofile->compressor;
+
+  pipefd[0] = pipefd[1] = -1;
+
+  if (pipe (pipefd))
+    {
+      XPR(NT "pipe failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  if ((recomp_id = fork ()) < 0)
+    {
+      XPR(NT "fork failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+  /* The child runs the recompression process: */
+  if (recomp_id == 0)
+    {
+      /* Setup pipes: write to the output file, read from the pipe. */
+      if (dup2 (XFNO (ofile), STDOUT_FILENO) < 0 ||
+	  dup2 (pipefd[PIPE_READ_FD], STDIN_FILENO) < 0 ||
+	  close (pipefd[PIPE_READ_FD]) ||
+	  close (pipefd[PIPE_WRITE_FD]) ||
+	  execlp (recomp->recomp_cmdname, recomp->recomp_cmdname,
+		  recomp->recomp_options, NULL))
+	{
+	  XPR(NT "child process %s failed to execute: %s\n",
+	      recomp->recomp_cmdname, xd3_mainerror (get_errno ()));
+	}
+
+      _exit (127);
+    }
+
+  ext_subprocs[0] = recomp_id;
+
+  /* The parent closes both pipes after duplicating the output-fd for
+   * writing to the compression pipe. */
+  output_fd = dup (pipefd[PIPE_WRITE_FD]);
+
+  if (output_fd < 0 ||
+      main_file_close (ofile) ||
+      close (pipefd[PIPE_READ_FD]) ||
+      close (pipefd[PIPE_WRITE_FD]))
+    {
+      XPR(NT "close failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#if XD3_STDIO
+  /* Note: fdopen() acquires the fd, closes it when finished. */
+  if ((ofile->file = fdopen (output_fd, "w")) == NULL)
+    {
+      XPR(NT "fdopen failed: %s\n", xd3_mainerror (ret = get_errno ()));
+      goto pipe_cleanup;
+    }
+
+#elif XD3_POSIX
+  ofile->file = output_fd;
+#endif
+
+  /* Now the output file will be compressed. */
+  return 0;
+
+ pipe_cleanup:
+  close (output_fd);
+  close (pipefd[PIPE_READ_FD]);
+  close (pipefd[PIPE_WRITE_FD]);
+  return ret;
+}
+#endif /* EXTERNAL_COMPRESSION */
+
+/* Identify the compressor that was used based on its ident string,
+ * which is passed in the application header. */
+static const main_extcomp*
+main_ident_compressor (const char *ident)
+{
+  usize_t i;
+
+  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+    {
+      if (strcmp (extcomp_types[i].ident, ident) == 0)
+	{
+	  return & extcomp_types[i];
+	}
+    }
+
+  return NULL;
+}
+
+/* Return the main_extcomp record to use for this identifier, if possible. */
+static const main_extcomp*
+main_get_compressor (const char *ident)
+{
+  const main_extcomp *ext = main_ident_compressor (ident);
+
+  if (ext == NULL)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: cannot recompress output: "
+		   "unrecognized external compression ID: %s\n", ident);
+	}
+      return NULL;
+    }
+  else if (! EXTERNAL_COMPRESSION)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "warning: external support not compiled: "
+		   "original input was compressed: %s\n", ext->recomp_cmdname);
+	}
+      return NULL;
+    }
+  else
+    {
+      return ext;
+    }
+}
+
+/*********************************************************************
+ APPLICATION HEADER
+ *******************************************************************/
+
+#if XD3_ENCODER
+static const char*
+main_apphead_string (const char* x)
+{
+  const char *y;
+
+  if (x == NULL) { return ""; }
+
+  if (strcmp (x, "/dev/stdin") == 0 ||
+      strcmp (x, "/dev/stdout") == 0 ||
+      strcmp (x, "/dev/stderr") == 0) { return "-"; }
+
+  // TODO: this is not portable
+  return (y = strrchr (x, '/')) == NULL ? x : y + 1;
+}
+
+static int
+main_set_appheader (xd3_stream *stream, main_file *input, main_file *sfile)
+{
+  /* The user may disable the application header.  Once the appheader
+   * is set, this disables setting it again. */
+  if (appheader_used || ! option_use_appheader) { return 0; }
+
+  /* The user may specify the application header, otherwise format the
+     default header. */
+  if (option_appheader)
+    {
+      appheader_used = option_appheader;
+    }
+  else
+    {
+      const char *iname;
+      const char *icomp;
+      const char *sname;
+      const char *scomp;
+      int len;
+
+      iname = main_apphead_string (input->filename);
+      icomp = (input->compressor == NULL) ? "" : input->compressor->ident;
+      len = strlen (iname) + strlen (icomp) + 2;
+
+      if (sfile->filename != NULL)
+	{
+	  sname = main_apphead_string (sfile->filename);
+	  scomp = (sfile->compressor == NULL) ? "" : sfile->compressor->ident;
+	  len += strlen (sname) + strlen (scomp) + 2;
+	}
+      else
+	{
+	  sname = scomp = "";
+	}
+
+      if ((appheader_used = (uint8_t*) main_malloc (len)) == NULL)
+	{
+	  return ENOMEM;
+	}
+
+      if (sfile->filename == NULL)
+	{
+	  sprintf ((char*)appheader_used, "%s/%s", iname, icomp);
+	}
+      else
+	{
+	  sprintf ((char*)appheader_used, "%s/%s/%s/%s",
+		   iname, icomp, sname, scomp);
+	}
+    }
+
+  xd3_set_appheader (stream, appheader_used, strlen ((char*)appheader_used));
+
+  return 0;
+}
+#endif
+
+static void
+main_get_appheader_params (main_file *file, char **parsed,
+			   int output, const char *type,
+			   main_file *other)
+{
+  /* Set the filename if it was not specified.  If output, option_stdout (-c)
+   * overrides. */
+  if (file->filename == NULL &&
+      ! (output && option_stdout) &&
+      strcmp (parsed[0], "-") != 0)
+    {
+      file->filename = parsed[0];
+
+      if (other->filename != NULL) {
+	/* Take directory from the other file, if it has one. */
+	/* TODO: This results in nonsense names like /dev/foo.tar.gz
+	 * and probably the filename-default logic interferes with
+	 * multi-file operation and the standard file extension?
+	 * Possibly the name header is bad, should be off by default.
+	 * Possibly we just want to remember external/compression
+	 * settings. */
+	char *last_slash = strrchr(other->filename, '/');
+
+	if (last_slash != NULL) {
+	  int dlen = last_slash - other->filename;
+
+	  XD3_ASSERT(file->filename_copy == NULL);
+	  file->filename_copy =
+	    (char*) main_malloc(dlen + 2 + strlen(file->filename));
+
+	  strncpy(file->filename_copy, other->filename, dlen);
+	  file->filename_copy[dlen] = '/';
+	  strcpy(file->filename_copy + dlen + 1, parsed[0]);
+
+	  file->filename = file->filename_copy;
+	}
+      }
+
+      if (! option_quiet)
+	{
+	  XPR(NT "using default %s filename: %s\n", type, file->filename);
+	}
+    }
+
+  /* Set the compressor, initiate de/recompression later. */
+  if (file->compressor == NULL && *parsed[1] != 0)
+    {
+      file->compressor = main_get_compressor (parsed[1]);
+    }
+}
+
+static void
+main_get_appheader (xd3_stream *stream, main_file *ifile,
+		    main_file *output, main_file *sfile)
+{
+  uint8_t *apphead;
+  usize_t appheadsz;
+  int ret;
+
+  /* The user may disable the application header.  Once the appheader
+   * is set, this disables setting it again. */
+  if (! option_use_appheader) { return; }
+
+  ret = xd3_get_appheader (stream, & apphead, & appheadsz);
+
+  /* Ignore failure, it only means we haven't received a header yet. */
+  if (ret != 0) { return; }
+
+  if (appheadsz > 0)
+    {
+      char *start = (char*)apphead;
+      char *slash;
+      int   place = 0;
+      char *parsed[4];
+
+      memset (parsed, 0, sizeof (parsed));
+
+      while ((slash = strchr (start, '/')) != NULL)
+	{
+	  *slash = 0;
+	  parsed[place++] = start;
+	  start = slash + 1;
+	}
+
+      parsed[place++] = start;
+
+      /* First take the output parameters. */
+      if (place == 2 || place == 4)
+	{
+	  main_get_appheader_params (output, parsed, 1, "output", ifile);
+	}
+
+      /* Then take the source parameters. */
+      if (place == 4)
+	{
+	  main_get_appheader_params (sfile, parsed+2, 0, "source", ifile);
+	}
+    }
+
+  option_use_appheader = 0;
+  return;
+}
+
+/*********************************************************************
+ Main I/O routines
+ **********************************************************************/
+
+/* This function acts like the above except it may also try to
+ * recognize a compressed input when the first buffer of data is read.
+ * The EXTERNAL_COMPRESSION code is called to search for magic
+ * numbers. */
+static int
+main_read_primary_input (main_file   *ifile,
+			 uint8_t    *buf,
+			 usize_t      size,
+			 usize_t     *nread)
+{
+#if EXTERNAL_COMPRESSION
+  if (option_decompress_inputs && ifile->flags & RD_FIRST)
+    {
+      ifile->flags &= ~RD_FIRST;
+
+      return main_decompress_input_check (ifile, buf, size, nread);
+    }
+#endif
+
+  return main_file_read (ifile, buf, size, nread, "input read failed");
+}
+
+/* Open the main output file, sets a default file name, initiate
+ * recompression.  This function is expected to fprint any error
+ * messages. */
+static int
+main_open_output (xd3_stream *stream, main_file *ofile)
+{
+  int ret;
+
+  if (option_no_output)
+    {
+      return 0;
+    }
+
+  if (ofile->filename == NULL)
+    {
+      XSTDOUT_XF (ofile);
+
+      if (option_verbose > 1)
+	{
+	  XPR(NT "using standard output: %s\n", ofile->filename);
+	}
+    }
+  else
+    {
+      /* Stat the file to check for overwrite. */
+      if (option_force == 0 && main_file_exists (ofile))
+	{
+	  if (!option_quiet)
+	    {
+	      XPR(NT "to overwrite output file specify -f: %s\n",
+		  ofile->filename);
+	    }
+	  return EEXIST;
+	}
+
+      if ((ret = main_file_open (ofile, ofile->filename, XO_WRITE)))
+	{
+	  return ret;
+	}
+
+      if (option_verbose > 1) { XPR(NT "output file: %s\n", ofile->filename); }
+    }
+
+#if EXTERNAL_COMPRESSION
+  /* Do output recompression. */
+  if (ofile->compressor != NULL && option_recompress_outputs == 1)
+    {
+      if (! option_quiet)
+	{
+	  XPR(NT "%s %s | %s\n",
+	     ofile->compressor->recomp_cmdname,
+	     ofile->compressor->recomp_options,
+	     ofile->filename);
+	}
+
+      if ((ret = main_recompress_output (ofile)))
+	{
+	  return ret;
+	}
+    }
+#endif
+
+  return 0;
+}
+
+/* This is called at different times for encoding and decoding.  The
+ * encoder calls it immediately, the decoder delays until the
+ * application header is received.  Stream may be NULL, in which case
+ * xd3_set_source is not called. */
+static int
+main_set_source (xd3_stream *stream, int cmd,
+		 main_file *sfile, xd3_source *source)
+{
+  int ret = 0;
+  usize_t i;
+  uint8_t *tmp_buf = NULL;
+
+  /* Open it, check for seekability, set required xd3_source fields. */
+  if (allow_fake_source)
+    {
+      sfile->mode = XO_READ;
+      sfile->realname = sfile->filename;
+      sfile->nread = 0;
+      source->size = XOFF_T_MAX;
+    }
+  else
+    {
+      if ((ret = main_file_open (sfile, sfile->filename, XO_READ)) ||
+	  (ret = main_file_stat (sfile, & source->size, 1)))
+	{
+	  goto error;
+	}
+    }
+
+  source->name     = sfile->filename;
+  source->ioh      = sfile;
+  source->curblkno = (xoff_t) -1;
+  source->curblk   = NULL;
+
+#if EXTERNAL_COMPRESSION
+  if (option_decompress_inputs)
+    {
+      /* If encoding, read the header to check for decompression. */
+      if (IS_ENCODE (cmd))
+	{
+	  usize_t nread;
+	  tmp_buf = (uint8_t*) main_malloc (XD3_ALLOCSIZE);
+
+	  if ((ret = main_file_read (sfile, tmp_buf, XD3_ALLOCSIZE,
+				     & nread, "source read failed")))
+	    {
+	      goto error;
+	    }
+
+	  /* Check known magic numbers. */
+	  for (i = 0; i < SIZEOF_ARRAY (extcomp_types); i += 1)
+	    {
+	      const main_extcomp *decomp = & extcomp_types[i];
+
+	      if ((nread > decomp->magic_size) &&
+		  memcmp (tmp_buf, decomp->magic, decomp->magic_size) == 0)
+		{
+		  sfile->compressor = decomp;
+		  break;
+		}
+	    }
+
+	  if (sfile->compressor == NULL)
+	    {
+	      if (option_verbose > 2)
+		{
+		  XPR(NT "source block 0 read (not compressed)\n");
+		}
+	    }
+	}
+
+      /* In either the encoder or decoder, start decompression. */
+      if (sfile->compressor)
+	{
+	  xoff_t osize = source->size;
+
+	  if ((ret = main_decompress_source (sfile, source)))
+	    {
+	      goto error;
+	    }
+
+	  if (! option_quiet)
+	    {
+	      char s1[32], s2[32];
+	      XPR(NT "%s | %s %s => %s %.1f%% [ %s , %s ]\n",
+		 sfile->filename,
+		 sfile->compressor->decomp_cmdname,
+		 sfile->compressor->decomp_options,
+		 sfile->realname,
+		 100.0 * source->size / osize,
+		 main_format_bcnt (osize, s1),
+		 main_format_bcnt (source->size, s2));
+	    }
+	}
+    }
+#endif
+
+  /* At this point we know source->size.
+   * Source buffer, blksize, LRU init. */
+  if (source->size < option_srcwinsz)
+    {
+      /* Reduce sizes to actual source size, read whole file */
+      option_srcwinsz = source->size;
+      source->blksize = source->size;
+      lru_size = 1;
+    }
+  else
+    {
+      option_srcwinsz = max(option_srcwinsz, XD3_MINSRCWINSZ);
+
+      source->blksize = (option_srcwinsz / LRU_SIZE);
+      lru_size = LRU_SIZE;
+    }
+
+  main_blklru_list_init (& lru_list);
+  main_blklru_list_init (& lru_free);
+
+  if (option_verbose)
+    {
+      static char buf[32];
+
+      XPR(NT "source %s winsize %s size %"Q"u\n",
+	  sfile->filename, main_format_bcnt(option_srcwinsz, buf),
+	  source->size);
+    }
+
+  if (option_verbose > 1)
+    {
+      XPR(NT "source block size: %u\n", source->blksize);
+    }
+
+  if ((lru = (main_blklru*)
+       main_malloc (sizeof (main_blklru) * lru_size)) == NULL)
+    {
+      ret = ENOMEM;
+      goto error;
+    }
+
+  for (i = 0; i < lru_size; i += 1)
+    {
+      lru[i].blkno = (xoff_t) -1;
+
+      if ((lru[i].blk = (uint8_t*) main_malloc (source->blksize)) == NULL)
+	{
+	  ret = ENOMEM;
+	  goto error;
+	}
+
+      main_blklru_list_push_back (& lru_free, & lru[i]);
+    }
+
+  if (stream && (ret = xd3_set_source (stream, source)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (stream, ret));
+      goto error;
+    }
+
+ error:
+  if (tmp_buf != NULL)
+    {
+      main_free (tmp_buf);
+    }
+
+  return ret;
+}
+
+static usize_t
+main_get_winsize (main_file *ifile) {
+  xoff_t file_size;
+  usize_t size = option_winsize;
+
+  if (main_file_stat (ifile, &file_size, 0) == 0)
+    {
+      size = (usize_t) min(file_size, (xoff_t) size);
+    }
+
+  size = max(size, XD3_ALLOCSIZE);
+
+  if (option_verbose > 1)
+    {
+      XPR(NT "input window size: %u\n", size);
+    }
+
+  return size;
+}
+
+/*******************************************************************
+ Source routines
+ *******************************************************************/
+
+/* This is the callback for reading a block of source.  This function
+ * is blocking and it implements a small LRU.
+ *
+ * Note that it is possible for main_input() to handle getblk requests
+ * in a non-blocking manner.  If the callback is NULL then the caller
+ * of xd3_*_input() must handle the XD3_GETSRCBLK return value and
+ * fill the source in the same way.  See xd3_getblk for details.  To
+ * see an example of non-blocking getblk, see xdelta-test.h. */
+static int
+main_getblk_func (xd3_stream *stream,
+		  xd3_source *source,
+		  xoff_t      blkno)
+{
+  int ret;
+  xoff_t pos = blkno * source->blksize;
+  main_file *sfile = (main_file*) source->ioh;
+  main_blklru *blru  = NULL;
+  usize_t onblk = xd3_bytes_on_srcblk_fast (source, blkno);
+  usize_t nread;
+  usize_t i;
+
+  if (allow_fake_source)
+    {
+      source->curblkno = blkno;
+      source->onblk    = onblk;
+      source->curblk   = lru[0].blk;
+      return 0;
+    }
+
+  if (do_not_lru)
+    {
+      /* Direct lookup assumes sequential scan w/o skipping blocks. */
+      int idx = blkno % lru_size;
+      if (lru[idx].blkno == blkno)
+	{
+	  source->curblkno = blkno;
+	  source->onblk    = onblk;
+	  source->curblk   = lru[idx].blk;
+	  lru_hits += 1;
+	  return 0;
+	}
+
+      if (lru[idx].blkno != (xoff_t)-1 &&
+	  lru[idx].blkno != (xoff_t)(blkno - lru_size))
+	{
+	  return XD3_TOOFARBACK;
+	}
+    }
+  else
+    {
+      /* Sequential search through LRU. */
+      for (i = 0; i < lru_size; i += 1)
+	{
+	  if (lru[i].blkno == blkno)
+	    {
+	      main_blklru_list_remove (& lru[i]);
+	      main_blklru_list_push_back (& lru_list, & lru[i]);
+
+	      source->curblkno = blkno;
+	      source->onblk    = onblk;
+	      source->curblk   = lru[i].blk;
+	      lru_hits += 1;
+	      return 0;
+	    }
+	}
+    }
+
+  if (! main_blklru_list_empty (& lru_free))
+    {
+      blru = main_blklru_list_pop_front (& lru_free);
+    }
+  else if (! main_blklru_list_empty (& lru_list))
+    {
+      if (do_not_lru) {
+	blru = & lru[blkno % lru_size];
+	main_blklru_list_remove(blru);
+      } else {
+	blru = main_blklru_list_pop_front (& lru_list);
+      }
+      lru_misses += 1;
+    }
+
+  lru_filled += 1;
+
+  if ((ret = main_file_seek (sfile, pos)))
+    {
+      return ret;
+    }
+
+  if ((ret = main_file_read (sfile, (uint8_t*) blru->blk, source->blksize,
+			     & nread, "source read failed")))
+    {
+      return ret;
+    }
+
+  if (nread != onblk)
+    {
+      XPR(NT "source file size change: %s\n", sfile->filename);
+      return XD3_INTERNAL;
+    }
+
+  main_blklru_list_push_back (& lru_list, blru);
+
+  if (option_verbose > 3)
+    {
+      if (blru->blkno != (xoff_t)-1)
+	{
+	  XPR(NT "source block %"Q"u ejects %"Q"u (lru_hits=%u, "
+	      "lru_misses=%u, lru_filled=%u)\n",
+	      blkno, blru->blkno, lru_hits, lru_misses, lru_filled);
+	}
+      else
+	{
+	  XPR(NT "source block %"Q"u read (lru_hits=%u, lru_misses=%u, "
+	      "lru_filled=%u)\n", blkno, lru_hits, lru_misses, lru_filled);
+	}
+    }
+
+  blru->blkno      = blkno;
+  source->curblk   = blru->blk;
+  source->curblkno = blkno;
+  source->onblk    = onblk;
+
+  return 0;
+}
+
+/*********************************************************************
+ Main routines
+ ********************************************************************/
+
+/* This is a generic input function.  It calls the xd3_encode_input or
+ * xd3_decode_input functions and makes calls to the various input
+ * handling routines above, which coordinate external decompression.
+ */
+static int
+main_input (xd3_cmd     cmd,
+	    main_file   *ifile,
+	    main_file   *ofile,
+	    main_file   *sfile)
+{
+  int        ret;
+  xd3_stream stream;
+  usize_t    nread;
+  usize_t    winsize;
+  int        stream_flags = 0;
+  xd3_config config;
+  xd3_source source;
+  xoff_t     last_total_in = 0;
+  xoff_t     last_total_out = 0;
+  long       start_time;
+  int        stdout_only = 0;
+  int (*input_func) (xd3_stream*);
+  int (*output_func) (xd3_stream*, main_file *);
+
+  memset (& stream, 0, sizeof (stream));
+  memset (& source, 0, sizeof (source));
+  memset (& config, 0, sizeof (config));
+
+  config.alloc = main_alloc;
+  config.freef = main_free1;
+
+  config.iopt_size = option_iopt_size;
+  config.sprevsz = option_sprevsz;
+
+  do_not_lru = 0;
+
+  start_time = get_millisecs_now ();
+
+  if (option_use_checksum) { stream_flags |= XD3_ADLER32; }
+
+  /* main_input setup. */
+  switch ((int) cmd)
+    {
+#if VCDIFF_TOOLS
+           if (1) { case CMD_PRINTHDR:   stream_flags |= XD3_JUST_HDR; }
+      else if (1) { case CMD_PRINTHDRS:  stream_flags |= XD3_SKIP_WINDOW; }
+      else        { case CMD_PRINTDELTA: stream_flags |= XD3_SKIP_EMIT; }
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_print_func;
+      stream_flags |= XD3_ADLER32_NOVER;
+      stdout_only   = 1;
+      break;
+
+    case CMD_RECODE:
+    case CMD_MERGE:
+    case CMD_MERGE_ARG:
+      /* No source will be read */
+      stream_flags |= XD3_ADLER32_NOVER | XD3_SKIP_EMIT;
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func = xd3_decode_input;
+
+      if ((ret = main_init_recode_stream ()))
+        {
+	  return EXIT_FAILURE;
+        }
+
+      if (cmd == CMD_RECODE) { output_func = main_recode_func; }
+      else                   { output_func = main_merge_func; }
+      break;
+#endif /* VCDIFF_TOOLS */
+
+#if XD3_ENCODER
+    case CMD_ENCODE:
+      do_not_lru  = 1;
+      input_func  = xd3_encode_input;
+      output_func = main_write_output;
+
+      if (option_no_compress)      { stream_flags |= XD3_NOCOMPRESS; }
+      if (option_use_altcodetable) { stream_flags |= XD3_ALT_CODE_TABLE; }
+      if (option_smatch_config)
+	{
+	  char *s = option_smatch_config, *e;
+	  int values[XD3_SOFTCFG_VARCNT];
+	  int got;
+
+	  config.smatch_cfg = XD3_SMATCH_SOFT;
+
+	  for (got = 0; got < XD3_SOFTCFG_VARCNT; got += 1, s = e + 1)
+	    {
+	      values[got] = strtol (s, &e, 10);
+
+	      if ((values[got] < 0) ||
+		  (e == s) ||
+		  (got < XD3_SOFTCFG_VARCNT-1 && *e == 0) ||
+		  (got == XD3_SOFTCFG_VARCNT-1 && *e != 0))
+		{
+		  XPR(NT "invalid string match specifier (-C) %d: %s\n",
+		      got, s);
+		  return EXIT_FAILURE;
+		}
+	    }
+
+	  config.smatcher_soft.large_look    = values[0];
+	  config.smatcher_soft.large_step    = values[1];
+	  config.smatcher_soft.small_look    = values[2];
+	  config.smatcher_soft.small_chain   = values[3];
+	  config.smatcher_soft.small_lchain  = values[4];
+	  config.smatcher_soft.max_lazy      = values[5];
+	  config.smatcher_soft.long_enough   = values[6];
+	}
+      else
+	{
+	  if (option_verbose > 1)
+	    {
+	      XPR(NT "compression level: %d\n", option_level);
+	    }
+	  if (option_level == 0)
+	    {
+	      stream_flags |= XD3_NOCOMPRESS;
+	      config.smatch_cfg = XD3_SMATCH_FASTEST;
+	    }
+	  else if (option_level == 1)
+	    { config.smatch_cfg = XD3_SMATCH_FASTEST; }
+	  else if (option_level == 2)
+	    { config.smatch_cfg = XD3_SMATCH_FASTER; }
+	  else if (option_level <= 5)
+	    { config.smatch_cfg = XD3_SMATCH_FAST; }
+	  else if (option_level == 6)
+	    { config.smatch_cfg = XD3_SMATCH_DEFAULT; }
+	  else
+	    { config.smatch_cfg = XD3_SMATCH_SLOW; }
+	}
+      break;
+#endif
+    case CMD_DECODE:
+      if (option_use_checksum == 0) { stream_flags |= XD3_ADLER32_NOVER; }
+      ifile->flags |= RD_NONEXTERNAL;
+      input_func    = xd3_decode_input;
+      output_func   = main_write_output;
+      break;
+    default:
+      XPR(NT "internal error\n");
+      return EXIT_FAILURE;
+    }
+
+  main_bsize = winsize = main_get_winsize (ifile);
+
+  if ((main_bdata = (uint8_t*) main_malloc (winsize)) == NULL)
+    {
+      return EXIT_FAILURE;
+    }
+
+  if (IS_ENCODE (cmd))
+    {
+      /* When encoding, open the source file, possibly decompress it.
+       * The decoder delays this step until XD3_GOTHEADER. */
+      if (sfile->filename != NULL &&
+	  (ret = main_set_source (NULL, cmd, sfile, & source)))
+	{
+	  return EXIT_FAILURE;
+	}
+    }
+
+  config.winsize = winsize;
+  config.srcwin_maxsz = option_srcwinsz;
+  config.getblk = main_getblk_func;
+  config.flags = stream_flags;
+
+  if ((ret = main_set_secondary_flags (&config)) ||
+      (ret = xd3_config_stream (& stream, & config)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+#if VCDIFF_TOOLS
+  if ((cmd == CMD_MERGE || cmd == CMD_MERGE_ARG) && 
+      (ret = xd3_whole_state_init (& stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+#endif
+
+  if (IS_ENCODE (cmd) && sfile->filename != NULL &&
+      (ret = xd3_set_source (& stream, & source)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+  /* This times each window. */
+  get_millisecs_since ();
+
+  /* Main input loop. */
+  do
+    {
+      xoff_t input_offset;
+      xoff_t input_remain;
+      usize_t try_read;
+
+      input_offset = ifile->nread;
+
+      input_remain = XOFF_T_MAX - input_offset;
+
+      try_read = (usize_t) min ((xoff_t) config.winsize, input_remain);
+
+      if ((ret = main_read_primary_input (ifile, main_bdata,
+					  try_read, & nread)))
+	{
+	  return EXIT_FAILURE;
+	}
+
+      /* If we've reached EOF tell the stream to flush. */
+      if (nread < try_read)
+	{
+	  stream.flags |= XD3_FLUSH;
+	}
+
+#if XD3_ENCODER
+      /* After the first main_read_primary_input completes, we know
+       * all the information needed to encode the application
+       * header. */
+      if (cmd == CMD_ENCODE &&
+	  (ret = main_set_appheader (& stream, ifile, sfile)))
+	{
+	  return EXIT_FAILURE;
+	}
+#endif
+      xd3_avail_input (& stream, main_bdata, nread);
+
+      /* If we read zero bytes after encoding at least one window... */
+      if (nread == 0 && stream.current_window > 0) {
+	break;
+      }
+
+    again:
+      ret = input_func (& stream);
+
+      switch (ret)
+	{
+	case XD3_INPUT:
+	  continue;
+
+	case XD3_GOTHEADER:
+	  {
+	    XD3_ASSERT (stream.current_window == 0);
+
+	    /* Need to process the appheader as soon as possible.  It may
+	     * contain a suggested default filename/decompression routine for
+	     * the ofile, and it may contain default/decompression routine for
+	     * the sources. */
+	    if (cmd == CMD_DECODE)
+	      {
+		/* May need to set the sfile->filename if none was given. */
+		main_get_appheader (& stream, ifile, ofile, sfile);
+
+		/* Now open the source file. */
+		  if ((sfile->filename != NULL) &&
+		      (ret = main_set_source (& stream, cmd, sfile, & source)))
+		  {
+		    return EXIT_FAILURE;
+		  }
+	      }
+	    else if (cmd == CMD_PRINTHDR ||
+		     cmd == CMD_PRINTHDRS ||
+		     cmd == CMD_PRINTDELTA ||
+		     cmd == CMD_RECODE)
+	      {
+		if (sfile->filename == NULL)
+		  {
+		    allow_fake_source = 1;
+		    sfile->filename = "<placeholder>";
+		    main_set_source (& stream, cmd, sfile, & source);
+		  }
+	      }
+	  }
+	/* FALLTHROUGH */
+	case XD3_WINSTART:
+	  {
+	    /* e.g., set or unset XD3_SKIP_WINDOW. */
+	    goto again;
+	  }
+
+	case XD3_OUTPUT:
+	  {
+	    /* Defer opening the output file until the stream produces its
+	     * first output for both encoder and decoder, this way we
+	     * delay long enough for the decoder to receive the
+	     * application header.  (Or longer if there are skipped
+	     * windows, but I can't think of any reason not to delay
+	     * open.) */
+	    if (ofile != NULL &&
+		! main_file_isopen (ofile) &&
+		(ret = main_open_output (& stream, ofile)) != 0)
+	      {
+		return EXIT_FAILURE;
+	      }
+	    
+	    if ((ret = output_func (& stream, ofile)) &&
+		(ret != PRINTHDR_SPECIAL))
+	      {
+		return EXIT_FAILURE;
+	      }
+
+	    if (ret == PRINTHDR_SPECIAL)
+	      {
+		xd3_abort_stream (& stream);
+		ret = EXIT_SUCCESS;
+		goto done;
+	      }
+
+	    ret = 0;
+
+	    xd3_consume_output (& stream);
+	    goto again;
+	  }
+
+	case XD3_WINFINISH:
+	  {
+	    if (IS_ENCODE (cmd) || cmd == CMD_DECODE || cmd == CMD_RECODE)
+	      {
+		if (! option_quiet && IS_ENCODE (cmd) &&
+		    main_file_isopen (sfile))
+		  {
+		    /* Warn when no source copies are found */
+		    if (option_verbose && ! xd3_encoder_used_source (& stream))
+		      {
+			XPR(NT "warning: input window %"Q"u..%"Q"u has "
+			    "no source copies\n",
+			    stream.current_window * winsize,
+			    (stream.current_window+1) * winsize);
+		      }
+
+		    /* Limited i-buffer size affects source copies */
+		    if (option_verbose > 1 &&
+			stream.i_slots_used > stream.iopt_size)
+		      {
+			XPR(NT "warning: input position %"Q"u overflowed "
+			    "instruction buffer, needed %u (vs. %u), "
+			    "consider raising -I\n",
+			    stream.current_window * winsize,
+			    stream.i_slots_used, stream.iopt_size);
+		      }
+		  }
+
+		if (option_verbose)
+		  {
+		    char rrateavg[32], wrateavg[32], tm[32];
+		    char rdb[32], wdb[32];
+		    char trdb[32], twdb[32];
+		    long millis = get_millisecs_since ();
+		    usize_t this_read = (usize_t)(stream.total_in -
+						  last_total_in);
+		    usize_t this_write = (usize_t)(stream.total_out -
+						   last_total_out);
+		    last_total_in = stream.total_in;
+		    last_total_out = stream.total_out;
+
+		    if (option_verbose > 1)
+		      {
+			XPR(NT "%"Q"u: in %s (%s): out %s (%s): "
+			    "total in %s: out %s: %s\n",
+			    stream.current_window,
+			    main_format_bcnt (this_read, rdb),
+			    main_format_rate (this_read, millis, rrateavg),
+			    main_format_bcnt (this_write, wdb),
+			    main_format_rate (this_write, millis, wrateavg),
+			    main_format_bcnt (stream.total_in, trdb),
+			    main_format_bcnt (stream.total_out, twdb),
+			    main_format_millis (millis, tm));
+		      }
+		    else
+		      {
+			XPR(NT "%"Q"u: in %s: out %s: total in %s: "
+			    "out %s: %s\n",
+ 			    stream.current_window,
+			    main_format_bcnt (this_read, rdb),
+			    main_format_bcnt (this_write, wdb),
+			    main_format_bcnt (stream.total_in, trdb),
+			    main_format_bcnt (stream.total_out, twdb),
+			    main_format_millis (millis, tm));
+		      }
+		  }
+	      }
+	    goto again;
+	  }
+
+	default:
+	  /* input_func() error */
+	  XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+	  return EXIT_FAILURE;
+	}
+    }
+  while (nread == config.winsize);
+done:
+  /* Close the inputs. (ifile must be open, sfile may be open) */
+  main_file_close (ifile);
+  if (sfile != NULL)
+    {
+      main_file_close (sfile);
+    }
+
+#if VCDIFF_TOOLS
+  if (cmd == CMD_MERGE &&
+      (ret = main_merge_output (& stream, ofile)))
+    {
+      return EXIT_FAILURE;
+    }
+
+  if (cmd == CMD_MERGE_ARG)
+    {
+      xd3_swap_whole_state (& stream.whole_target,
+			    & recode_stream->whole_target);
+    }
+#endif /* VCDIFF_TOOLS */
+
+  /* If output file is not open yet because of delayed-open, it means
+   * we never encountered a window in the delta, but it could have had
+   * a VCDIFF header?  TODO: solve this elsewhere.  For now, it prints
+   * "nothing to output" below, but the check doesn't happen in case
+   * of option_no_output.  */
+  if (! option_no_output && ofile != NULL)
+    {
+      if (!stdout_only && ! main_file_isopen (ofile))
+	{
+	  XPR(NT "nothing to output: %s\n", ifile->filename);
+	  return EXIT_FAILURE;
+	}
+
+      /* Have to close the output before calling
+       * main_external_compression_finish, or else it hangs. */
+      if (main_file_close (ofile) != 0)
+	{
+	  return EXIT_FAILURE;
+	}
+    }
+
+#if EXTERNAL_COMPRESSION
+  if ((ret = main_external_compression_finish ()))
+    {
+      XPR(NT "external compression commands failed\n");
+      return EXIT_FAILURE;
+    }
+#endif
+
+  if ((ret = xd3_close_stream (& stream)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (& stream, ret));
+      return EXIT_FAILURE;
+    }
+
+#if XD3_ENCODER
+  if (option_verbose > 1 && cmd == CMD_ENCODE)
+    {
+      XPR(NT "scanner configuration: %s\n", stream.smatcher.name);
+      XPR(NT "target hash table size: %u\n", stream.small_hash.size);
+      if (sfile != NULL && sfile->filename != NULL)
+	{
+	  XPR(NT "source hash table size: %u\n", stream.large_hash.size);
+	}
+    }
+
+  if (option_verbose > 2 && cmd == CMD_ENCODE)
+    {
+      XPR(NT "source copies: %"Q"u (%"Q"u bytes)\n",
+	  stream.n_scpy, stream.l_scpy);
+      XPR(NT "target copies: %"Q"u (%"Q"u bytes)\n",
+	  stream.n_tcpy, stream.l_tcpy);
+      XPR(NT "adds: %"Q"u (%"Q"u bytes)\n", stream.n_add, stream.l_add);
+      XPR(NT "runs: %"Q"u (%"Q"u bytes)\n", stream.n_run, stream.l_run);
+    }
+#endif
+
+  xd3_free_stream (& stream);
+
+  if (option_verbose)
+    {
+      char tm[32];
+      long end_time = get_millisecs_now ();
+      xoff_t nwrite = ofile != NULL ? ofile->nwrite : 0;
+
+      XPR(NT "finished in %s; input %"Q"u  output %"Q"u bytes  (%0.2f%%)\n",
+	  main_format_millis (end_time - start_time, tm),
+	  ifile->nread, nwrite, 100.0 * nwrite / ifile->nread);
+    }
+
+  return EXIT_SUCCESS;
+}
+
+/* free memory before exit, reset single-use variables. */
+static void
+main_cleanup (void)
+{
+  usize_t i;
+
+  if (appheader_used != NULL &&
+      appheader_used != option_appheader)
+    {
+      main_free (appheader_used);
+      appheader_used = NULL;
+    }
+
+  main_free (main_bdata);
+  main_bdata = NULL;
+  main_bsize = 0;
+
+#if EXTERNAL_COMPRESSION
+  main_free (ext_tmpfile);
+  ext_tmpfile = NULL;
+#endif
+
+  for (i = 0; lru && i < lru_size; i += 1)
+    {
+      main_free (lru[i].blk);
+    }
+
+  main_free (lru);
+  lru = NULL;
+
+  lru_hits = 0;
+  lru_misses = 0;
+  lru_filled = 0;
+
+  if (recode_stream != NULL)
+    {
+      xd3_free_stream (recode_stream);
+      main_free (recode_stream);
+      recode_stream = NULL;
+    }
+
+  if (merge_stream != NULL)
+    {
+      xd3_free_stream (merge_stream);
+      main_free (merge_stream);
+      merge_stream = NULL;
+    }
+
+  XD3_ASSERT (main_mallocs == 0);
+}
+
+static void
+setup_environment (int argc,
+		   char **argv,
+		   int *argc_out,
+		   char ***argv_out,
+		   char ***argv_free,
+		   char **env_free)
+{
+  int n, i, i0;
+  char *p, *v = getenv("XDELTA");
+  if (v == NULL) {
+    (*argc_out) = argc;
+    (*argv_out) = argv;
+    (*argv_free) = NULL;
+    (*env_free) = NULL;
+    return;
+  }
+
+  (*env_free) = (char*) main_malloc(strlen(v) + 1);
+  strcpy(*env_free, v);
+
+  /* Space needed for extra args, at least # of spaces */
+  n = argc + 1;
+  for (p = *env_free; *p != 0; ) {
+    if (*p++ == ' ') {
+      n++;
+    }
+  }
+
+  (*argv_free) = (char**) main_malloc(sizeof(char*) * (n + 1));
+  (*argv_out) = (*argv_free);
+  (*argv_out)[0] = argv[0];
+  (*argv_out)[n] = NULL;
+
+  i = 1;
+  for (p = *env_free; *p != 0; ) {
+    (*argv_out)[i++] = p;
+    while (*p != ' ' && *p != 0) {
+      p++;
+    }
+    while (*p == ' ') {
+      *p++ = 0;
+    }
+  }
+
+  for (i0 = 1; i0 < argc; i0++) {
+    (*argv_out)[i++] = argv[i0];
+  }
+
+  /* Counting spaces is an upper bound, argv stays NULL terminated. */
+  (*argc_out) = i;
+  while (i <= n) {
+    (*argv_out)[i++] = NULL;
+  }
+}
+
+int
+#if PYTHON_MODULE || SWIG_MODULE || NOT_MAIN
+xd3_main_cmdline (int argc, char **argv)
+#else
+main (int argc, char **argv)
+#endif
+{
+  static const char *flags =
+    "0123456789cdefhnqvDJNORTVs:m:B:C:E:F:I:L:O:M:P:W:A::S::";
+  xd3_cmd cmd;
+  main_file ifile;
+  main_file ofile;
+  main_file sfile;
+  main_merge_list merge_order;
+  main_merge *merge;
+  int my_optind;
+  char *my_optarg;
+  char *my_optstr;
+  char *sfilename;
+  int env_argc;
+  char **env_argv;
+  char **free_argv;  /* malloc() in setup_environment() */
+  char *free_value;  /* malloc() in setup_environment() */
+  int ret;
+
+#ifdef _WIN32
+  GetStartupInfo(&winStartupInfo);
+  setvbuf(stderr, NULL, _IONBF, 0);  /* Do not buffer stderr */
+#endif
+
+  main_file_init (& ifile);
+  main_file_init (& ofile);
+  main_file_init (& sfile);
+  main_merge_list_init (& merge_order);
+
+  reset_defaults();
+
+  free_argv = NULL;
+  free_value = NULL;
+  setup_environment(argc, argv, &env_argc, &env_argv,
+		    &free_argv, &free_value);
+  cmd = CMD_NONE;
+  sfilename = NULL;
+  my_optind = 1;
+  argv = env_argv;
+  argc = env_argc;
+  program_name = env_argv[0];
+  extcomp_types[0].recomp_cmdname = program_name;
+  extcomp_types[0].decomp_cmdname = program_name;
+
+ takearg:
+  my_optarg = NULL;
+  my_optstr = argv[my_optind];
+
+  /* This doesn't use getopt() because it makes trouble for -P & python which
+   * reenter main() and thus care about freeing all memory.  I never had much
+   * trust for getopt anyway, it's too opaque.  This implements a fairly
+   * standard non-long-option getopt with support for named operations (e.g.,
+   * "xdelta3 [encode|decode|printhdr...] < in > out"). */
+  if (my_optstr)
+    {
+      if (*my_optstr == '-')    { my_optstr += 1; }
+      else if (cmd == CMD_NONE) { goto nonflag; }
+      else                      { my_optstr = NULL; }
+    }
+  while (my_optstr)
+    {
+      char *s;
+      my_optarg = NULL;
+      if ((ret = *my_optstr++) == 0) { my_optind += 1; goto takearg; }
+
+      /* Option handling: first check for one ':' following the option in
+       * flags, then check for two.  The syntax allows:
+       *
+       * 1. -Afoo                   defines optarg="foo"
+       * 2. -A foo                  defines optarg="foo"
+       * 3. -A ""                   defines optarg="" (allows empty-string)
+       * 4. -A [EOA or -moreargs]   error (mandatory case)
+       * 5. -A [EOA -moreargs]      defines optarg=NULL (optional case)
+       * 6. -A=foo                  defines optarg="foo"
+       * 7. -A=                     defines optarg="" (mandatory case)
+       * 8. -A=                     defines optarg=NULL (optional case)
+       *
+       * See tests in test_command_line_arguments().
+       */
+      s = strchr (flags, ret);
+      if (s && s[1] && s[1] == ':')
+	{
+	  int eqcase = 0;
+	  int option = s[2] && s[2] == ':';
+
+	  /* Case 1, set optarg to the remaining characters. */
+	  my_optarg = my_optstr;
+	  my_optstr = "";
+
+	  /* Case 2-5 */
+	  if (*my_optarg == 0)
+	    {
+	      /* Condition 4-5 */
+	      int have_arg = (my_optind < (argc - 1) &&
+			      *argv[my_optind+1] != '-');
+
+	      if (! have_arg)
+		{
+		  if (! option)
+		  {
+		    /* Case 4 */
+		    XPR(NT "-%c: requires an argument\n", ret);
+		    ret = EXIT_FAILURE;
+		    goto cleanup;
+		  }
+		  /* Case 5. */
+		  my_optarg = NULL;
+		}
+	      else
+		{
+		  /* Case 2-3. */
+		  my_optarg = argv[++my_optind];
+		}
+	    }
+	  /* Case 6-8. */
+	  else if (*my_optarg == '=')
+	    {
+	      /* Remove the = in all cases. */
+	      my_optarg += 1;
+	      eqcase = 1;
+
+	      if (option && *my_optarg == 0)
+		{
+		  /* Case 8. */
+		  my_optarg = NULL;
+		}
+	    }
+	}
+
+      switch (ret)
+	{
+	/* case: if no '-' was found, maybe check for a command name. */
+	nonflag:
+	       if (strcmp (my_optstr, "decode") == 0) { cmd = CMD_DECODE; }
+	  else if (strcmp (my_optstr, "encode") == 0)
+	    {
+#if XD3_ENCODER
+	      cmd = CMD_ENCODE;
+#else
+	      XPR(NT "encoder support not compiled\n");
+	      return EXIT_FAILURE;
+#endif
+	    }
+	  else if (strcmp (my_optstr, "config") == 0) { cmd = CMD_CONFIG; }
+#if REGRESSION_TEST
+	  else if (strcmp (my_optstr, "test") == 0) { cmd = CMD_TEST; }
+#endif
+#if VCDIFF_TOOLS
+	  else if (strcmp (my_optstr, "printhdr") == 0) { cmd = CMD_PRINTHDR; }
+	  else if (strcmp (my_optstr, "printhdrs") == 0)
+	    { cmd = CMD_PRINTHDRS; }
+	  else if (strcmp (my_optstr, "printdelta") == 0)
+	    { cmd = CMD_PRINTDELTA; }
+	  else if (strcmp (my_optstr, "recode") == 0) { cmd = CMD_RECODE; }
+	  else if (strcmp (my_optstr, "merge") == 0) { cmd = CMD_MERGE; }
+#endif
+
+	  /* If no option was found and still no command, let the default
+	   * command be encode.  The remaining args are treated as
+	   * filenames. */
+	  if (cmd == CMD_NONE)
+	    {
+	      cmd = CMD_DEFAULT;
+	      my_optstr = NULL;
+	      break;
+	    }
+	  else
+	    {
+	      /* But if we find a command name, continue the getopt loop. */
+	      my_optind += 1;
+	      goto takearg;
+	    }
+
+	  /* gzip-like options */
+	case '0': case '1': case '2': case '3': case '4':
+	case '5': case '6': case '7': case '8': case '9':
+	  option_level = ret - '0';
+	  break;
+	case 'f': option_force = 1; break;
+	case 'v': option_verbose += 1; option_quiet = 0; break;
+	case 'q': option_quiet = 1; option_verbose = 0; break;
+	case 'c': option_stdout = 1; break;
+	case 'd':
+	  if (cmd == CMD_NONE) { cmd = CMD_DECODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+	case 'e':
+#if XD3_ENCODER
+	  if (cmd == CMD_NONE) { cmd = CMD_ENCODE; }
+	  else { ret = main_help (); goto exit; }
+	  break;
+#else
+	  XPR(NT "encoder support not compiled\n");
+	  return EXIT_FAILURE;
+#endif
+
+	case 'n': option_use_checksum = 0; break;
+	case 'N': option_no_compress = 1; break;
+	case 'T': option_use_altcodetable = 1; break;
+	case 'C': option_smatch_config = my_optarg; break;
+	case 'J': option_no_output = 1; break;
+	case 'S': if (my_optarg == NULL)
+	    {
+	      option_use_secondary = 1;
+	      option_secondary = "none";
+	    }
+	  else
+	    {
+	      option_use_secondary = 1;
+	      option_secondary = my_optarg;
+	    }
+	  break;
+	case 'A': if (my_optarg == NULL) { option_use_appheader = 0; }
+	          else { option_appheader = (uint8_t*) my_optarg; } break;
+	case 'B':
+	  if ((ret = main_atou (my_optarg, & option_srcwinsz, XD3_MINSRCWINSZ,
+				0, 'B')))
+	    {
+	      goto exit;
+	    }
+	  break;
+	case 'I':
+	  if ((ret = main_atou (my_optarg, & option_iopt_size, 0,
+				0, 'I')))
+	    {
+	      goto exit;
+	    }
+	  break;
+	case 'P':
+	  if ((ret = main_atou (my_optarg, & option_sprevsz, 0,
+				0, 'P')))
+	    {
+	      goto exit;
+	    }
+	  break;
+	case 'W':
+	  if ((ret = main_atou (my_optarg, & option_winsize, XD3_ALLOCSIZE,
+				XD3_HARDMAXWINSIZE, 'W')))
+	  {
+	    goto exit;
+	  }
+	  break;
+	case 'D':
+#if EXTERNAL_COMPRESSION == 0
+	  if (option_verbose > 0)
+	    {
+	      XPR(NT "warning: -D option ignored, "
+		       "external compression support was not compiled\n");
+	    }
+#else
+	  option_decompress_inputs  = 0;
+#endif
+	  break;
+	case 'R':
+#if EXTERNAL_COMPRESSION == 0
+	  if (option_verbose > 0)
+	    {
+	      XPR(NT "warning: -R option ignored, "
+		       "external compression support was not compiled\n");
+	    }
+#else
+	  option_recompress_outputs = 0;
+#endif
+	  break;
+	case 's':
+	  if (sfilename != NULL)
+	    {
+	      XPR(NT "specify only one source file\n");
+	      goto cleanup;
+	    }
+
+	  sfilename = my_optarg;
+	  break;
+	case 'm':
+	  if ((merge = (main_merge*)
+	       main_malloc (sizeof (main_merge))) == NULL)
+	    {
+	      goto cleanup;
+	    }
+	  main_merge_list_push_back (& merge_order, merge);
+	  merge->filename = my_optarg;
+	  break;
+	case 'V':
+	  ret = main_version (); goto exit;
+	default:
+	  ret = main_help (); goto exit;
+	}
+    }
+
+  option_source_filename = sfilename;
+
+  /* In case there were no arguments, set the default command. */
+  if (cmd == CMD_NONE) { cmd = CMD_DEFAULT; }
+
+  argc -= my_optind;
+  argv += my_optind;
+
+  /* There may be up to two more arguments. */
+  if (argc > 2)
+    {
+      XPR(NT "too many filenames: %s ...\n", argv[2]);
+      goto cleanup;
+    }
+
+  ifile.flags    = RD_FIRST;
+  sfile.flags    = RD_FIRST;
+  sfile.filename = option_source_filename;
+
+  /* The infile takes the next argument, if there is one.  But if not, infile
+   * is set to stdin. */
+  if (argc > 0)
+    {
+      ifile.filename = argv[0];
+
+      if ((ret = main_file_open (& ifile, ifile.filename, XO_READ)))
+	{
+	  goto cleanup;
+	}
+    }
+  else
+    {
+      XSTDIN_XF (& ifile);
+    }
+
+  /* The ofile takes the following argument, if there is one.  But if not, it
+   * is left NULL until the application header is processed.  It will be set
+   * in main_open_output. */
+  if (argc > 1)
+    {
+      /* Check for conflicting arguments. */
+      if (option_stdout && ! option_quiet)
+	{
+	  XPR(NT "warning: -c option overrides output filename: %s\n",
+	      argv[1]);
+	}
+
+      if (! option_stdout) { ofile.filename = argv[1]; }
+    }
+
+#if VCDIFF_TOOLS
+  if (cmd == CMD_MERGE &&
+      (ret = main_merge_arguments (&merge_order)))
+    {
+      goto cleanup;
+    }
+#endif /* VCDIFF_TOOLS */
+
+  switch (cmd)
+    {
+    case CMD_PRINTHDR:
+    case CMD_PRINTHDRS:
+    case CMD_PRINTDELTA:
+#if XD3_ENCODER
+    case CMD_ENCODE:
+    case CMD_RECODE:
+    case CMD_MERGE:
+#endif
+    case CMD_DECODE:
+      ret = main_input (cmd, & ifile, & ofile, & sfile);
+      break;
+
+#if REGRESSION_TEST
+    case CMD_TEST:
+      main_config ();
+      ret = xd3_selftest ();
+      break;
+#endif
+
+    case CMD_CONFIG:
+      ret = main_config ();
+      break;
+
+    default:
+      ret = main_help ();
+      break;
+    }
+
+  if (0)
+    {
+    cleanup:
+      ret = EXIT_FAILURE;
+    exit:
+      (void)0;
+    }
+
+#if EXTERNAL_COMPRESSION
+  if (ext_tmpfile != NULL)
+    {
+      unlink (ext_tmpfile);
+    }
+#endif
+
+  main_file_cleanup (& ifile);
+  main_file_cleanup (& ofile);
+  main_file_cleanup (& sfile);
+
+  while (! main_merge_list_empty (& merge_order))
+    {
+      merge = main_merge_list_pop_front (& merge_order);
+      main_free (merge);
+    }
+
+  main_free (free_argv);
+  main_free (free_value);
+
+  main_cleanup ();
+
+  fflush (stdout);
+  fflush (stderr);
+  return ret;
+}
+
+static int
+main_help (void)
+{
+  main_version();
+
+  /* Note: update wiki when command-line features change */
+  DP(RINT "usage: xdelta3 [command/options] [input [output]]\n");
+  DP(RINT "special command names:\n");
+  DP(RINT "    config      prints xdelta3 configuration\n");
+  DP(RINT "    decode      decompress the input\n");
+  DP(RINT "    encode      compress the input%s\n",
+     XD3_ENCODER ? "" : " [Not compiled]");
+#if REGRESSION_TEST
+  DP(RINT "    test        run the builtin tests\n");
+#endif
+#if VCDIFF_TOOLS
+  DP(RINT "special commands for VCDIFF inputs:\n");
+  DP(RINT "    printdelta  print information about the entire delta\n");
+  DP(RINT "    printhdr    print information about the first window\n");
+  DP(RINT "    printhdrs   print information about all windows\n");
+  DP(RINT "    recode      encode with new application/secondary settings\n");
+  DP(RINT "    merge       merge VCDIFF inputs (see below)\n");
+#endif
+  DP(RINT "standard options:\n");
+  DP(RINT "   -0 .. -9     compression level\n");
+  DP(RINT "   -c           use stdout\n");
+  DP(RINT "   -d           decompress\n");
+  DP(RINT "   -e           compress%s\n",
+     XD3_ENCODER ? "" : " [Not compiled]");
+  DP(RINT "   -f           force overwrite\n");
+  DP(RINT "   -h           show help\n");
+  DP(RINT "   -q           be quiet\n");
+  DP(RINT "   -v           be verbose (max 2)\n");
+  DP(RINT "   -V           show version\n");
+
+  DP(RINT "memory options:\n");
+  DP(RINT "   -B bytes     source window size\n");
+  DP(RINT "   -W bytes     input window size\n");
+  DP(RINT "   -P size      compression duplicates window\n");
+  DP(RINT "   -I size      instruction buffer size (0 = unlimited)\n");
+
+  DP(RINT "compression options:\n");
+  DP(RINT "   -s source    source file to copy from (if any)\n");
+  DP(RINT "   -S [djw|fgk] enable/disable secondary compression\n");
+  DP(RINT "   -N           disable small string-matching compression\n");
+  DP(RINT "   -D           disable external decompression (encode/decode)\n");
+  DP(RINT "   -R           disable external recompression (decode)\n");
+  DP(RINT "   -n           disable checksum (encode/decode)\n");
+  DP(RINT "   -C           soft config (encode, undocumented)\n");
+  DP(RINT "   -A [apphead] disable/provide application header (encode)\n");
+  DP(RINT "   -J           disable output (check/compute only)\n");
+  DP(RINT "   -T           use alternate code table (test)\n");
+  DP(RINT "   -m           arguments for \"merge\"\n");
+
+  DP(RINT "the XDELTA environment variable may contain extra args:\n");
+  DP(RINT "   XDELTA=\"-s source-x.y.tar.gz\" \\\n");
+  DP(RINT "   tar --use-compress-program=xdelta3 \\\n");
+  DP(RINT "       -cf target-x.z.tar.gz.vcdiff target-x.y\n");
+  DP(RINT "the \"merge\" command combines VCDIFF inputs as follows:\n");
+  DP(RINT "   xdelta3 merge -m 1.vcdiff -m 2.vcdiff 3.vcdiff merged.vcdiff\n");
+  return EXIT_FAILURE;
+}
diff --git a/xdelta3-merge.h b/xdelta3-merge.h
new file mode 100644
index 0000000..2253a2c
--- /dev/null
+++ b/xdelta3-merge.h
@@ -0,0 +1,579 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_MERGE_H_
+#define _XDELTA3_MERGE_H_
+
+int xd3_merge_inputs (xd3_stream *stream, 
+		      xd3_whole_state *source,
+		      xd3_whole_state *input);
+
+static int
+xd3_whole_state_init (xd3_stream *stream)
+{
+  XD3_ASSERT (stream->whole_target.adds == NULL);
+  XD3_ASSERT (stream->whole_target.inst == NULL);
+  XD3_ASSERT (stream->whole_target.wininfo == NULL);
+  XD3_ASSERT (stream->whole_target.length == 0);
+
+  stream->whole_target.adds_alloc = XD3_ALLOCSIZE;
+  stream->whole_target.inst_alloc = XD3_ALLOCSIZE;
+  stream->whole_target.wininfo_alloc = XD3_ALLOCSIZE;
+
+  if ((stream->whole_target.adds = (uint8_t*) 
+       xd3_alloc (stream, stream->whole_target.adds_alloc, 1)) == NULL ||
+      (stream->whole_target.inst = (xd3_winst*) 
+       xd3_alloc (stream, stream->whole_target.inst_alloc, 1)) == NULL ||
+      (stream->whole_target.wininfo = (xd3_wininfo*) 
+       xd3_alloc (stream, stream->whole_target.wininfo_alloc, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+  return 0;
+}
+
+static void
+xd3_swap_whole_state (xd3_whole_state *a, 
+		      xd3_whole_state *b)
+{
+  xd3_whole_state tmp;
+  XD3_ASSERT (a->inst != NULL && a->adds != NULL);
+  XD3_ASSERT (b->inst != NULL && b->adds != NULL);
+  XD3_ASSERT (b->wininfo != NULL && b->wininfo != NULL);
+  memcpy (&tmp, a, sizeof (xd3_whole_state));
+  memcpy (a, b, sizeof (xd3_whole_state));
+  memcpy (b, &tmp, sizeof (xd3_whole_state));
+}
+
+static int
+xd3_realloc_buffer (xd3_stream *stream,
+                    usize_t current_units,
+                    usize_t unit_size,
+                    usize_t new_units,
+                    usize_t *alloc_size,
+                    void **alloc_ptr)
+{
+  usize_t needed;
+  usize_t new_alloc;
+  usize_t cur_size;
+  uint8_t *new_buf;
+
+  needed = (current_units + new_units) * unit_size;
+
+  if (needed <= *alloc_size)
+    {
+      return 0;
+    }
+
+  cur_size = current_units * unit_size;
+  new_alloc = xd3_round_blksize (needed * 2, XD3_ALLOCSIZE);
+
+  if ((new_buf = (uint8_t*) xd3_alloc (stream, new_alloc, 1)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  if (cur_size != 0)
+    {
+      memcpy (new_buf, *alloc_ptr, cur_size);
+    }
+
+  if (*alloc_ptr != NULL)
+    {
+      xd3_free (stream, *alloc_ptr);
+    }
+
+  *alloc_size = new_alloc;
+  *alloc_ptr = new_buf;
+
+  return 0;
+}
+
+/* allocate one new output instruction */
+static int
+xd3_whole_alloc_winst (xd3_stream *stream,
+		       xd3_winst **winstp)
+{
+  int ret;
+
+  if ((ret = xd3_realloc_buffer (stream, 
+				 stream->whole_target.instlen, 
+				 sizeof (xd3_winst), 
+				 1, 
+				 & stream->whole_target.inst_alloc, 
+				 (void**) & stream->whole_target.inst))) 
+    { 
+      return ret; 
+    }
+
+  *winstp = &stream->whole_target.inst[stream->whole_target.instlen++];
+
+  return 0;
+}
+
+static int
+xd3_whole_alloc_adds (xd3_stream *stream,
+		      usize_t count)
+{
+  return xd3_realloc_buffer (stream,
+			     stream->whole_target.addslen,
+			     1,
+			     count,
+			     & stream->whole_target.adds_alloc,
+			     (void**) & stream->whole_target.adds);
+}
+
+static int
+xd3_whole_alloc_wininfo (xd3_stream *stream,
+			 xd3_wininfo **wininfop)
+{
+  int ret;
+
+  if ((ret = xd3_realloc_buffer (stream, 
+				 stream->whole_target.wininfolen, 
+				 sizeof (xd3_wininfo),
+				 1,
+				 & stream->whole_target.wininfo_alloc, 
+				 (void**) & stream->whole_target.wininfo))) 
+    { 
+      return ret; 
+    }
+
+  *wininfop = &stream->whole_target.wininfo[stream->whole_target.wininfolen++];
+
+  return 0;
+}
+
+static int
+xd3_whole_append_inst (xd3_stream *stream,
+                       xd3_hinst *inst)
+{
+  int ret;
+  xd3_winst *winst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &winst)))
+    {
+      return ret;
+    }
+
+  winst->type = inst->type;
+  winst->mode = 0;
+  winst->size = inst->size;
+  winst->position = stream->whole_target.length;
+  stream->whole_target.length += inst->size;
+
+  if (((inst->type == XD3_ADD) || (inst->type == XD3_RUN)) &&
+      (ret = xd3_whole_alloc_adds (stream, 
+				   (inst->type == XD3_RUN ? 1 : inst->size))))
+    {
+      return ret;
+    }
+
+  switch (inst->type)
+    {
+    case XD3_RUN:
+      winst->addr = stream->whole_target.addslen;
+      stream->whole_target.adds[stream->whole_target.addslen++] =
+        *stream->data_sect.buf++;
+      break;
+
+    case XD3_ADD:
+      winst->addr = stream->whole_target.addslen;
+      memcpy (stream->whole_target.adds + stream->whole_target.addslen,
+              stream->data_sect.buf,
+              inst->size);
+      stream->data_sect.buf += inst->size;
+      stream->whole_target.addslen += inst->size;
+      break;
+
+    default:
+      if (inst->addr < stream->dec_cpylen)
+	{
+	  winst->mode = SRCORTGT (stream->dec_win_ind);
+	  winst->addr = stream->dec_cpyoff + inst->addr;
+	}
+      else
+	{
+	  winst->addr = (stream->dec_winstart + 
+			 inst->addr - 
+			 stream->dec_cpylen);
+	}
+      break;
+    }
+
+  return 0;
+}
+
+int
+xd3_whole_append_window (xd3_stream *stream)
+{
+  int ret;
+  xd3_wininfo *wininfo;
+
+  if ((ret = xd3_whole_alloc_wininfo (stream, &wininfo))) { return ret; }
+
+  wininfo->length = stream->dec_tgtlen;
+  wininfo->offset = stream->dec_winstart;
+  wininfo->adler32 = stream->dec_adler32;
+
+  while (stream->inst_sect.buf < stream->inst_sect.buf_max)
+    {
+      if ((ret = xd3_decode_instruction (stream)))
+	{
+	  return ret;
+	}
+
+      if ((stream->dec_current1.type != XD3_NOOP) &&
+          (ret = xd3_whole_append_inst (stream,
+					& stream->dec_current1)))
+	{
+	  return ret;
+	}
+
+      if ((stream->dec_current2.type != XD3_NOOP) &&
+	  (ret = xd3_whole_append_inst (stream,
+					& stream->dec_current2)))
+	{
+	  return ret;
+	}
+    }
+
+  return 0;
+}
+
+/* xd3_merge_input_output applies *source to *stream, returns the
+ * result in stream. */
+int xd3_merge_input_output (xd3_stream *stream,
+			    xd3_whole_state *source)
+{
+  int ret;
+  xd3_stream tmp_stream;
+  memset (& tmp_stream, 0, sizeof (tmp_stream));
+  if ((ret = xd3_config_stream (& tmp_stream, NULL)) ||
+      (ret = xd3_whole_state_init (& tmp_stream)) ||
+      (ret = xd3_merge_inputs (& tmp_stream, 
+			       source,
+			       & stream->whole_target)))
+    {
+      XPR(NT XD3_LIB_ERRMSG (&tmp_stream, ret));
+      return ret;
+    }
+
+  /* the output is in tmp_stream.whole_state, swap into input */
+  xd3_swap_whole_state (& stream->whole_target,
+			& tmp_stream.whole_target);
+  /* total allocation counts are preserved */
+  xd3_free_stream (& tmp_stream);
+  return 0;
+}
+
+static int
+xd3_merge_run (xd3_stream *stream,
+	       xd3_whole_state *target,
+	       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)) ||
+      (ret = xd3_whole_alloc_adds (stream, 1)))
+    {
+      return ret;
+    }
+
+  oinst->type = iinst->type;
+  oinst->mode = iinst->mode;
+  oinst->size = iinst->size;
+  oinst->addr = stream->whole_target.addslen;
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+  oinst->position = stream->whole_target.length;
+  stream->whole_target.length += iinst->size;
+
+  stream->whole_target.adds[stream->whole_target.addslen++] = 
+    target->adds[iinst->addr];
+
+  return 0;
+}
+
+static int
+xd3_merge_add (xd3_stream *stream,
+	       xd3_whole_state *target,
+	       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)) ||
+      (ret = xd3_whole_alloc_adds (stream, iinst->size)))
+    {
+      return ret;
+    }
+
+  oinst->type = iinst->type;
+  oinst->mode = iinst->mode;
+  oinst->size = iinst->size;
+  oinst->addr = stream->whole_target.addslen;
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+  oinst->position = stream->whole_target.length;
+  stream->whole_target.length += iinst->size;
+
+  memcpy(stream->whole_target.adds + stream->whole_target.addslen,
+	 target->adds + iinst->addr,
+	 iinst->size);
+
+  stream->whole_target.addslen += iinst->size;
+
+  return 0;
+}
+
+static int
+xd3_merge_target_copy (xd3_stream *stream,
+		       xd3_winst *iinst)
+{
+  int ret;
+  xd3_winst *oinst;
+
+  if ((ret = xd3_whole_alloc_winst (stream, &oinst)))
+    {
+      return ret;
+    }
+
+  XD3_ASSERT (stream->whole_target.length == iinst->position);
+
+  memcpy (oinst, iinst, sizeof (*oinst));
+  return 0;
+}
+
+static int
+xd3_merge_find_position (xd3_stream *stream,
+			 xd3_whole_state *source,
+			 xoff_t address,
+			 usize_t *inst_num)
+{
+  usize_t low;
+  usize_t high;
+
+  if (address >= source->length)
+    {
+      stream->msg = "Invalid copy offset in merge";
+      return XD3_INVALID_INPUT;
+    }
+
+  low = 0;
+  high = source->instlen;
+
+  while (low != high)
+    {
+      xoff_t mid_lpos;
+      xoff_t mid_hpos;
+      usize_t mid = low + (high - low) / 2;
+      mid_lpos = source->inst[mid].position;
+
+      if (address < mid_lpos)
+	{
+	  high = mid;
+	  continue;
+	}
+      
+      mid_hpos = mid_lpos + source->inst[mid].size;
+
+      if (address >= mid_hpos)
+	{
+	  low = mid + 1;
+	  continue;
+	}
+
+      *inst_num = mid;
+      return 0;
+    }
+
+  stream->msg = "Internal error in merge";
+  return XD3_INTERNAL;
+}
+
+static int
+xd3_merge_source_copy (xd3_stream *stream,
+		       xd3_whole_state *source,
+		       const xd3_winst *iinst_orig)
+{
+  int ret;
+  xd3_winst iinst;
+  usize_t sinst_num;
+
+  memcpy (& iinst, iinst_orig, sizeof (iinst));
+
+  XD3_ASSERT (iinst.mode == VCD_SOURCE);
+
+  if ((ret = xd3_merge_find_position (stream, source, 
+				      iinst.addr, &sinst_num)))
+    {
+      return ret;
+    }
+
+  while (iinst.size > 0)
+    {
+      xd3_winst *sinst;
+      xd3_winst *minst;
+      usize_t sinst_offset;
+      usize_t sinst_left;
+      usize_t this_take;
+
+      XD3_ASSERT (sinst_num < source->instlen);
+
+      sinst = &source->inst[sinst_num];
+
+      XD3_ASSERT (iinst.addr >= sinst->position);
+
+      sinst_offset = iinst.addr - sinst->position;
+
+      XD3_ASSERT (sinst->size > sinst_offset);
+
+      sinst_left = sinst->size - sinst_offset;
+      this_take = min (iinst.size, sinst_left);
+
+      XD3_ASSERT (this_take > 0);
+
+      if ((ret = xd3_whole_alloc_winst (stream, &minst)))
+	{
+	  return ret;
+	}
+
+      minst->size = this_take;
+      minst->type = sinst->type;
+      minst->position = iinst.position;
+      minst->mode = 0;
+
+      switch (sinst->type)
+	{
+	case XD3_RUN:
+	  if ((ret = xd3_whole_alloc_adds (stream, 1)))
+	    {
+	      return ret;
+	    }
+
+	  minst->addr = stream->whole_target.addslen;
+	  stream->whole_target.adds[stream->whole_target.addslen++] = 
+	    source->adds[sinst->addr];
+	  break;
+	case XD3_ADD:
+	  if ((ret = xd3_whole_alloc_adds (stream, this_take)))
+	    {
+	      return ret;
+	    }
+
+	  minst->addr = stream->whole_target.addslen;
+	  memcpy(stream->whole_target.adds + stream->whole_target.addslen,
+		 source->adds + sinst->addr + sinst_offset,
+		 this_take);
+	  stream->whole_target.addslen += this_take;
+	  break;
+	default:
+	  if (sinst->mode != 0)
+	    {
+	      minst->mode = sinst->mode;
+	      minst->addr = sinst->addr + sinst_offset;
+	    }
+	  else
+	    {
+	      // TODO: this is slow because of the recursion, which
+	      // could reach a depth equal to the number of target
+	      // copies, and this is compression-inefficient because
+	      // it can produce duplicate adds.
+	      xd3_winst tinst;
+	      tinst.type = XD3_CPY;
+	      tinst.mode = iinst.mode;
+	      tinst.addr = sinst->addr + sinst_offset;
+	      tinst.size = this_take;
+	      tinst.position = iinst.position;
+
+	      // The instruction allocated in this frame will not be used.
+	      stream->whole_target.instlen -= 1;
+
+	      if ((ret = xd3_merge_source_copy (stream, source, &tinst)))
+		{ 
+		  return ret;
+		}
+	    }
+	  break;
+	}
+
+      iinst.position += this_take;
+      iinst.addr += this_take;
+      iinst.size -= this_take;
+      sinst_num += 1;
+    }
+
+  return 0;
+}
+
+/* xd3_merge_inputs() applies *input to *source, returns its result in
+ * stream. */
+int xd3_merge_inputs (xd3_stream *stream, 
+		      xd3_whole_state *source,
+		      xd3_whole_state *input)
+{
+  int ret = 0;
+  usize_t i;
+  size_t input_i;
+
+  for (i = 0; i < input->wininfolen; ++i) {
+    xd3_wininfo *copyinfo;
+
+    if ((ret = xd3_whole_alloc_wininfo (stream, &copyinfo))) { return ret; }
+
+    *copyinfo = input->wininfo[i];
+  }
+
+  /* iterate over each instruction. */
+  for (input_i = 0; ret == 0 && input_i < input->instlen; ++input_i)
+    {
+      xd3_winst *iinst = &input->inst[input_i];
+
+      switch (iinst->type)
+	{
+	case XD3_RUN:
+	  ret = xd3_merge_run (stream, input, iinst);
+	  break;
+	case XD3_ADD:
+	  ret = xd3_merge_add (stream, input, iinst);
+	  break;
+	default:
+	  /* TODO: VCD_TARGET support is completely untested all
+	   * throughout. */
+	  if (iinst->mode == 0 || iinst->mode == VCD_TARGET)
+	    {
+	      ret = xd3_merge_target_copy (stream, iinst);
+	    }
+	  else
+	    {
+	      ret = xd3_merge_source_copy (stream, source, iinst);
+	    }
+
+	  /* The whole_target.length is not updated in the xd3_merge*copy
+	   * routine because of recursion in xd3_merge_source_copy. */
+	  stream->whole_target.length += iinst->size;
+	  break;
+	}
+    }
+  
+  return ret;
+}
+
+#endif
diff --git a/xdelta3-python.h b/xdelta3-python.h
new file mode 100644
index 0000000..4805b17
--- /dev/null
+++ b/xdelta3-python.h
@@ -0,0 +1,88 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include "Python.h"
+
+static PyObject *pyxd3_error;
+
+/* spam: xdelta3.main([string,list,...]) */
+PyObject *xdelta3_main_cmdline (PyObject *self, PyObject *args)
+{
+  int ret, i, nargs;
+  char **argv = NULL;
+  int argc = 0;
+  PyObject *result = NULL;
+  PyObject *o;
+
+  if (! PyArg_ParseTuple (args, "O", &o)
+      || ! PyList_Check (o))
+    {
+      goto cleanup;
+    }
+
+  argc  = PyList_Size (o);
+  nargs = argc + 2;
+
+  if (! (argv = malloc (sizeof(argv[0]) * nargs)))
+    {
+      PyErr_NoMemory ();
+      goto cleanup;
+    }
+  memset (argv, 0, sizeof(argv[0]) * nargs);
+
+  for (i = 1; i < nargs-1; i += 1)
+    {
+      char *ps;
+      PyObject *s;
+      if ((s = PyList_GetItem (o, i-1)) == NULL) { goto cleanup; }
+      ps = PyString_AsString (s);
+      /* TODO: ps is NULL if s is not a string, crashes the interpreter */
+      argv[i] = ps;
+    }
+
+  ret = xd3_main_cmdline (argc+1, argv);
+
+  if (ret == 0)
+    {
+      result = Py_BuildValue ("i", ret);
+    }
+  else
+    {
+      PyErr_SetString (pyxd3_error, "failed :(");
+    }
+ cleanup:
+  if (argv)
+    {
+      free (argv);
+    }
+  return result;
+}
+
+static PyMethodDef xdelta3_methods[] = {
+  { "main", xdelta3_main_cmdline, METH_VARARGS, "xdelta3 main()" },
+  { NULL, NULL }
+};
+
+DL_EXPORT(void) initxdelta3main (void)
+{
+  PyObject *m, *d;
+  m = Py_InitModule ("xdelta3main", xdelta3_methods);
+  d = PyModule_GetDict (m);
+  pyxd3_error = PyErr_NewException ("xdelta3main.error", NULL, NULL);
+  PyDict_SetItemString (d, "error", pyxd3_error);
+}
diff --git a/xdelta3-regtest.py b/xdelta3-regtest.py
new file mode 100755
index 0000000..f9a11bd
--- /dev/null
+++ b/xdelta3-regtest.py
@@ -0,0 +1,1222 @@
+#!/usr/bin/python2.5
+# xdelta 3 - delta compression tools and library
+# Copyright (C) 2003, 2006, 2007, 2008.  Joshua P. MacDonald
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# TODO: test 1.5 vs. greedy
+
+import os, sys, math, re, time, types, array, random
+import xdelta3
+
+#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS'
+#RCSDIR = '/tmp/PRCS_read_copy'
+#SAMPLEDIR = "/tmp/WESNOTH_tmp/diff"
+
+#RCSDIR = 'G:/jmacd/PRCS_copy'
+#SAMPLEDIR = "C:/sample_data/Wesnoth/tar"
+
+#RCSDIR = '/Users/jmacd/src/ftp.kernel.org/pub/scm/linux/kernel/bkcvs/linux-2.4/net/x25'
+RCSDIR = '/Users/jmacd/src/ftp.kernel.org'
+
+#
+MIN_SIZE       = 0
+
+TIME_TOO_SHORT = 0.050
+
+SKIP_TRIALS    = 2
+MIN_TRIALS     = 3
+MAX_TRIALS     = 15
+
+# 10 = fast 1.5 = slow
+MIN_STDDEV_PCT = 1.5
+
+# How many results per round
+MAX_RESULTS = 500
+TEST_ROUNDS = 500
+KEEP_P = (0.5)
+
+# For RCS testing, what percent to select
+FILE_P = (0.50)
+
+# For run-speed tests
+MIN_RUN = 1000 * 1000 * 1
+MAX_RUN = 1000 * 1000 * 10
+
+# Testwide defaults
+ALL_ARGS = [
+    '-vv'
+    ]
+
+# The first 7 args go to -C
+SOFT_CONFIG_CNT = 7
+
+CONFIG_ORDER = [ 'large_look',
+                 'large_step',
+                 'small_look',
+                 'small_chain',
+                 'small_lchain',
+                 'max_lazy',
+                 'long_enough',
+
+                 # > SOFT_CONFIG_CNT
+                 'nocompress',
+                 'winsize',
+                 'srcwinsize',
+                 'sprevsz',
+                 'iopt',
+                 'djw',
+                 'altcode',
+                 ]
+
+CONFIG_ARGMAP = {
+    'winsize'    : '-W',
+    'srcwinsize' : '-B',
+    'sprevsz'    : '-P',
+    'iopt'       : '-I',
+    'nocompress' : '-N',
+    'djw'        : '-Sdjw',
+    'altcode'    : '-T',
+    }
+
+def INPUT_SPEC(rand):
+    return {
+
+    # Time/space costs:
+
+    # -C 1,2,3,4,5,6,7
+    'large_look' : lambda d: rand.choice([9, 10, 11, 12]),
+    'large_step' : lambda d: rand.choice([25, 26, 27, 28, 29, 30]),
+    'small_look'   : lambda d: rand.choice([4]),
+    'small_chain'  : lambda d: rand.choice([1]),
+    'small_lchain' : lambda d: rand.choice([1]),
+    'max_lazy'     : lambda d: rand.choice([4, 5, 6, 7, 8, 9, 10 ]),
+
+    # Note: long_enough only refers to small matching and has no effect if
+    # small_chain == 1.
+    'long_enough'  : lambda d: rand.choice([4]),
+
+    # -N
+    'nocompress'   : lambda d: rand.choice(['false']),
+
+    # -T
+    'altcode'      : lambda d: rand.choice(['false']),
+
+    # -S djw
+    'djw'          : lambda d: rand.choice(['false']),
+
+    # Memory costs:
+
+    # -W
+    'winsize'      : lambda d: 8 * (1<<20),
+
+    # -B
+    'srcwinsize'   : lambda d: 64 * (1<<20),
+
+    # -I 0 is unlimited
+    'iopt'         : lambda d: 0,
+
+    # -P only powers of two
+    'sprevsz'      : lambda d: rand.choice([x * (1<<16) for x in [4]]),
+  }
+#end
+
+#
+TMPDIR = '/tmp/xd3regtest.%d' % os.getpid()
+
+RUNFILE = os.path.join(TMPDIR, 'run')
+DFILE   = os.path.join(TMPDIR, 'output')
+RFILE   = os.path.join(TMPDIR, 'recon')
+
+HEAD_STATE = 0
+BAR_STATE  = 1
+REV_STATE  = 2
+DATE_STATE = 3
+
+#
+IGNORE_FILENAME  = re.compile('.*\\.(gif|jpg).*')
+
+# rcs output
+RE_TOTREV  = re.compile('total revisions: (\\d+)')
+RE_BAR     = re.compile('----------------------------')
+RE_REV     = re.compile('revision (.+)')
+RE_DATE    = re.compile('date: ([^;]+);.*')
+# xdelta output
+RE_HDRSZ   = re.compile('VCDIFF header size: +(\\d+)')
+RE_EXTCOMP = re.compile('XDELTA ext comp.*')
+
+def c2str(c):
+    return ' '.join(['%s' % x for x in c])
+#end
+
+def SumList(l):
+    return reduce(lambda x,y: x+y, l)
+#end
+
+# returns (total, mean, stddev, q2 (median),
+#          (q3-q1)/2 ("semi-interquartile range"), max-min (spread))
+class StatList:
+    def __init__(self,l,desc):
+        cnt = len(l)
+        assert(cnt > 1)
+        l.sort()
+        self.cnt    = cnt
+        self.l      = l
+        self.total  = SumList(l)
+        self.mean   = self.total / float(self.cnt)
+        self.s      = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1))
+        self.q0     = l[0]
+        self.q1     = l[int(self.cnt/4.0+0.5)]
+        self.q2     = l[int(self.cnt/2.0+0.5)]
+        self.q3     = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))]
+        self.q4     = l[self.cnt-1]+1
+        self.siqr   = (self.q3-self.q1)/2.0;
+        self.spread = (self.q4-self.q0)
+        self.str    = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \
+                      (desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread)
+    #end
+#end
+
+def RunCommand(args, ok = [0]):
+    #print 'run command %s' % (' '.join(args))
+    p = os.spawnvp(os.P_WAIT, args[0], args)
+    if p not in ok:
+        raise CommandError(args, 'exited %d' % p)
+    #end
+#end
+
+def RunCommandIO(args,infn,outfn):
+    p = os.fork()
+    if p == 0:
+        os.dup2(os.open(infn,os.O_RDONLY),0)
+        os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1)
+        os.execvp(args[0], args)
+    else:
+        s = os.waitpid(p,0)
+        o = os.WEXITSTATUS(s[1])
+        if not os.WIFEXITED(s[1]) or o != 0:
+            raise CommandError(args, 'exited %d' % o)
+        #end
+    #end
+#end
+
+class TimedTest:
+    def __init__(self, target, source, runnable,
+                 skip_trials = SKIP_TRIALS,
+                 min_trials = MIN_TRIALS,
+                 max_trials = MAX_TRIALS,
+                 min_stddev_pct = MIN_STDDEV_PCT):
+        self.target = target
+        self.source = source
+        self.runnable = runnable
+
+        self.skip_trials = skip_trials
+        self.min_trials = min(min_trials, max_trials)
+        self.max_trials = max_trials
+        self.min_stddev_pct = min_stddev_pct
+
+        self.encode_time = self.DoTest(DFILE,
+                                       lambda x: x.Encode(self.target, self.source, DFILE))
+        self.encode_size = runnable.EncodeSize(DFILE)
+
+        self.decode_time = self.DoTest(RFILE,
+                                       lambda x: x.Decode(DFILE, self.source, RFILE),
+                                       )
+
+        # verify
+        runnable.Verify(self.target, RFILE)
+    #end
+
+    def DoTest(self, fname, func):
+        trials   = 0
+        measured = []
+
+        while 1:
+            try:
+                os.remove(fname)
+            except OSError:
+                pass
+
+            start_time  = time.time()
+            start_clock = time.clock()
+
+            func(self.runnable)
+
+            total_clock = (time.clock() - start_clock)
+            total_time  = (time.time() - start_time)
+
+            elap_time  = max(total_time,  0.0000001)
+            elap_clock = max(total_clock, 0.0000001)
+
+            trials = trials + 1
+
+            # skip some of the first trials
+            if trials > self.skip_trials:
+                measured.append((elap_clock, elap_time))
+                #print 'measurement total: %.1f ms' % (total_time * 1000.0)
+
+            # at least so many
+            if trials < (self.skip_trials + self.min_trials):
+                #print 'continue: need more trials: %d' % trials
+                continue
+
+            # compute %variance
+            done = 0
+            if self.skip_trials + self.min_trials <= 2:
+                measured = measured + measured;
+                done = 1
+            #end
+
+            time_stat = StatList([x[1] for x in measured], 'elap time')
+            sp = float(time_stat.s) / float(time_stat.mean)
+
+            # what if MAX_TRIALS is exceeded?
+            too_many = (trials - self.skip_trials) >= self.max_trials
+            good = (100.0 * sp) < self.min_stddev_pct
+            if done or too_many or good:
+                trials = trials - self.skip_trials
+                if not done and not good:
+                    #print 'too many trials: %d' % trials
+                    pass
+                #clock = StatList([x[0] for x in measured], 'elap clock')
+                return time_stat
+            #end
+        #end
+    #end
+#end
+
+def Decimals(start, end):
+    l = []
+    step = start
+    while 1:
+        r = range(step, step * 10, step)
+        l = l + r
+        if step * 10 >= end:
+            l.append(step * 10)
+            break
+        step = step * 10
+    return l
+#end
+
+# This tests the raw speed of 0-byte inputs
+def RunSpeedTest():
+    for L in Decimals(MIN_RUN, MAX_RUN):
+        SetFileSize(RUNFILE, L)
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)]))
+        ReportSpeed(L, trx, '1MB ')
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)]))
+        ReportSpeed(L, trx, '512k')
+
+        trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)]))
+        ReportSpeed(L, trx, '256k')
+
+        trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE))
+        ReportSpeed(L, trm, 'swig')
+
+        trg = TimedTest(RUNFILE, None, GzipRun1())
+        ReportSpeed(L,trg,'gzip')
+    #end
+#end
+
+def SetFileSize(F,L):
+    fd = os.open(F, os.O_CREAT | os.O_WRONLY)
+    os.ftruncate(fd,L)
+    assert os.fstat(fd).st_size == L
+    os.close(fd)
+#end
+
+def ReportSpeed(L,tr,desc):
+    print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \
+          (desc, L,
+           tr.encode_size,
+           tr.encode_time.mean * 1000.0,
+           tr.decode_time.mean * 1000.0)
+#end
+
+class Xdelta3RunClass:
+    def __init__(self, extra):
+        self.extra = extra
+    #end
+
+    def __str__(self):
+        return ' '.join(self.extra)
+    #end
+
+    def New(self):
+        return Xdelta3Runner(self.extra)
+    #end
+#end
+
+class Xdelta3Runner:
+    def __init__(self, extra):
+        self.extra = extra
+    #end
+
+    def Encode(self, target, source, output):
+        args = (ALL_ARGS +
+                self.extra +
+                ['-e'])
+        if source:
+            args.append('-s')
+            args.append(source)
+        #end
+        args = args + [target, output]
+        self.Main(args)
+    #end
+
+    def Decode(self, input, source, output):
+        args = (ALL_ARGS +
+                ['-d'])
+        if source:
+            args.append('-s')
+            args.append(source)
+        #end
+        args = args + [input, output]
+        self.Main(args)
+    #end
+
+    def Verify(self, target, recon):
+        RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+
+    def Main(self, args):
+        try:
+            #print 'Run %s' % (' '.join(args))
+            xdelta3.xd3_main_cmdline(args)
+        except Exception, e:
+            raise CommandError(args, "xdelta3.main exception: %s" % e)
+        #end
+    #end
+#end
+
+class Xdelta3Mod1:
+    def __init__(self, file):
+        self.target_data = open(file, 'r').read()
+    #end
+
+    def Encode(self, ignore1, ignore2, ignore3):
+        r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10)
+        if r1 != 0:
+            raise CommandError('memory', 'encode failed: %s' % r1)
+        #end
+        self.encoded = encoded
+    #end
+
+    def Decode(self, ignore1, ignore2, ignore3):
+        r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data))
+        if r2 != 0:
+            raise CommandError('memory', 'decode failed: %s' % r1)
+        #end
+        self.decoded = data1
+    #end
+
+    def Verify(self, ignore1, ignore2):
+        if self.target_data != self.decoded:
+            raise CommandError('memory', 'bad decode')
+        #end
+    #end
+
+    def EncodeSize(self, ignore1):
+        return len(self.encoded)
+    #end
+#end
+
+class GzipRun1:
+    def Encode(self, target, source, output):
+        assert source == None
+        RunCommandIO(['gzip', '-cf'], target, output)
+    #end
+
+    def Decode(self, input, source, output):
+        assert source == None
+        RunCommandIO(['gzip', '-dcf'], input, output)
+    #end
+
+    def Verify(self, target, recon):
+        RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+#end
+
+class Xdelta1RunClass:
+    def __str__(self):
+        return 'xdelta1'
+    #end
+
+    def New(self):
+        return Xdelta1Runner()
+    #end
+#end
+
+class Xdelta1Runner:
+    def Encode(self, target, source, output):
+        assert source != None
+        args = ['xdelta1', 'delta', '-q', source, target, output]
+        RunCommand(args, [0, 1])
+    #end
+
+    def Decode(self, input, source, output):
+        assert source != None
+        args = ['xdelta1', 'patch', '-q', input, source, output]
+        # Note: for dumb historical reasons, xdelta1 returns 1 or 0
+        RunCommand(args)
+    #end
+
+    def Verify(self, target, recon):
+        RunCommand(('cmp', target, recon))
+    #end
+
+    def EncodeSize(self, output):
+        return os.stat(output).st_size
+    #end
+#end
+
+# exceptions
+class SkipRcsException:
+    def __init__(self,reason):
+        self.reason = reason
+    #end
+#end
+
+class NotEnoughVersions:
+    def __init__(self):
+        pass
+    #end
+#end
+
+class CommandError:
+    def __init__(self,cmd,str):
+        if type(cmd) is types.TupleType or \
+           type(cmd) is types.ListType:
+            cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd)
+        #end
+        print 'command was: ',cmd
+        print 'command failed: ',str
+        print 'have fun debugging'
+    #end
+#end
+
+class RcsVersion:
+    def __init__(self,vstr):
+        self.vstr = vstr
+    #end
+    def __cmp__(self,other):
+        return cmp(self.date, other.date)
+    #end
+    def __str__(self):
+        return str(self.vstr)
+    #end
+#end
+
+class RcsFile:
+
+    def __init__(self, fname):
+        self.fname    = fname
+        self.versions = []
+        self.state    = HEAD_STATE
+    #end
+
+    def SetTotRev(self,s):
+        self.totrev = int(s)
+    #end
+
+    def Rev(self,s):
+        self.rev = RcsVersion(s)
+        if len(self.versions) >= self.totrev:
+            raise SkipRcsException('too many versions (in log messages)')
+        #end
+        self.versions.append(self.rev)
+    #end
+
+    def Date(self,s):
+        self.rev.date = s
+    #end
+
+    def Match(self, line, state, rx, gp, newstate, f):
+        if state == self.state:
+            m = rx.match(line)
+            if m:
+                if f:
+                    f(m.group(gp))
+                #end
+                self.state = newstate
+                return 1
+            #end
+        #end
+        return None
+    #end
+
+    def Sum1Rlog(self):
+        f = os.popen('rlog '+self.fname, "r")
+        l = f.readline()
+        while l:
+            if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev):
+                pass
+            elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None):
+                pass
+            elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev):
+                pass
+            elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date):
+                pass
+            #end
+            l = f.readline()
+        #end
+        c = f.close()
+        if c != None:
+            raise c
+        #end
+    #end
+
+    def Sum1(self):
+        st = os.stat(self.fname)
+        self.rcssize = st.st_size
+        self.Sum1Rlog()
+        if self.totrev != len(self.versions):
+            raise SkipRcsException('wrong version count')
+        #end
+        self.versions.sort()
+    #end
+
+    def Checkout(self,n):
+        v      = self.versions[n]
+        out    = open(self.Verf(n), "w")
+        cmd    = 'co -ko -p%s %s' % (v.vstr, self.fname)
+        total  = 0
+        (inf,
+         stream,
+         err)  = os.popen3(cmd, "r")
+        inf.close()
+        buf    = stream.read()
+        while buf:
+            total = total + len(buf)
+            out.write(buf)
+            buf = stream.read()
+        #end
+        v.vsize = total
+        estr = ''
+        buf = err.read()
+        while buf:
+            estr = estr + buf
+            buf = err.read()
+        #end
+        if stream.close():
+            raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr))
+        #end
+        out.close()
+        err.close()
+    #end
+
+    def Vdate(self,n):
+        return self.versions[n].date
+    #end
+
+    def Vstr(self,n):
+        return self.versions[n].vstr
+    #end
+
+    def Verf(self,n):
+        return os.path.join(TMPDIR, 'input.%d' % n)
+    #end
+
+    def FilePairsByDate(self, runclass):
+        if self.totrev < 2:
+            raise NotEnoughVersions()
+        #end
+        self.Checkout(0)
+        ntrials = []
+        if self.totrev < 2:
+            return vtrials
+        #end
+        for v in range(0,self.totrev-1):
+            if v > 1:
+                os.remove(self.Verf(v-1))
+            #end
+            self.Checkout(v+1)
+            if os.stat(self.Verf(v)).st_size < MIN_SIZE or \
+               os.stat(self.Verf(v+1)).st_size < MIN_SIZE:
+                continue
+            #end
+
+            result = TimedTest(self.Verf(v+1),
+                               self.Verf(v),
+                               runclass.New())
+
+            target_size = os.stat(self.Verf(v+1)).st_size
+
+            ntrials.append(result)
+        #end
+
+        os.remove(self.Verf(self.totrev-1))
+        os.remove(self.Verf(self.totrev-2))
+        return ntrials
+    #end
+
+    def AppendVersion(self, f, n):
+        self.Checkout(n)
+        rf = open(self.Verf(n), "r")
+        data = rf.read()
+        f.write(data)
+        rf.close()
+        return len(data)
+    #end
+
+class RcsFinder:
+    def __init__(self):
+        self.subdirs  = []
+        self.rcsfiles = []
+        self.others   = []
+        self.skipped  = []
+        self.biground = 0
+    #end
+
+    def Scan1(self,dir):
+        dents = os.listdir(dir)
+        subdirs  = []
+        rcsfiles = []
+        others   = []
+        for dent in dents:
+            full = os.path.join(dir, dent)
+            if os.path.isdir(full):
+                subdirs.append(full)
+            elif dent[len(dent)-2:] == ",v":
+                rcsfiles.append(RcsFile(full))
+            else:
+                others.append(full)
+            #end
+        #end
+        self.subdirs  = self.subdirs  + subdirs
+        self.rcsfiles = self.rcsfiles + rcsfiles
+        self.others   = self.others   + others
+        return subdirs
+    #end
+
+    def Crawl(self, dir):
+        subdirs = [dir]
+        while subdirs:
+            s1 = self.Scan1(subdirs[0])
+            subdirs = subdirs[1:] + s1
+        #end
+    #end
+
+    def Summarize(self):
+        good = []
+        for rf in self.rcsfiles:
+            try:
+                rf.Sum1()
+                if rf.totrev < 2:
+                    raise SkipRcsException('too few versions (< 2)')
+                #end
+            except SkipRcsException, e:
+                #print 'skipping file %s: %s' % (rf.fname, e.reason)
+                self.skipped.append(rf)
+            else:
+                good.append(rf)
+            #end
+        self.rcsfiles = good
+    #end
+
+    def AllPairsByDate(self, runclass):
+        results = []
+        good = []
+        for rf in self.rcsfiles:
+            try:
+                results = results + rf.FilePairsByDate(runclass)
+            except SkipRcsException:
+                print 'file %s has compressed versions: skipping' % (rf.fname)
+            except NotEnoughVersions:
+                print 'testing %s on %s: not enough versions' % (runclass, rf.fname)
+            else:
+                good.append(rf)
+            #end
+        self.rcsfiles = good
+        self.ReportPairs(runclass, results)
+        return results
+    #end
+
+    def ReportPairs(self, name, results):
+        encode_time = 0
+        decode_time = 0
+        encode_size = 0
+        for r in results:
+            encode_time += r.encode_time.mean
+            decode_time += r.decode_time.mean
+            encode_size += r.encode_size
+        #end
+        print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \
+              (name, encode_time, decode_time, encode_size)
+    #end
+
+    def MakeBigFiles(self, rand):
+        f1 = open(TMPDIR + "/big.1", "w")
+        f2 = open(TMPDIR + "/big.2", "w")
+        population = []
+        for file in self.rcsfiles:
+            if len(file.versions) < 2:
+                continue
+            population.append(file)
+        #end
+        f1sz = 0
+        f2sz = 0
+        fcount = int(len(population) * FILE_P)
+        assert fcount > 0
+        for file in rand.sample(population, fcount):
+            m = IGNORE_FILENAME.match(file.fname)
+            if m != None:
+                continue
+            #end
+            r1, r2 = rand.sample(xrange(0, len(file.versions)), 2)
+            f1sz += file.AppendVersion(f1, r1)
+            f2sz += file.AppendVersion(f2, r2)
+            #m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2)))
+        #end
+        testkey = 'rcs%d' % self.biground
+        self.biground = self.biground + 1
+
+        print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz)
+        f1.close()
+        f2.close()
+        return (TMPDIR + "/big.1",
+                TMPDIR + "/big.2",
+                testkey)
+    #end
+
+    def Generator(self):
+        return lambda rand: self.MakeBigFiles(rand)
+    #end
+#end
+
+# find a set of RCS files for testing
+def GetTestRcsFiles():
+    rcsf = RcsFinder()
+    rcsf.Crawl(RCSDIR)
+    if len(rcsf.rcsfiles) == 0:
+        raise CommandError('', 'no RCS files')
+    #end
+    rcsf.Summarize()
+    print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles),
+                                                                        len(rcsf.subdirs),
+                                                                        len(rcsf.others),
+                                                                        len(rcsf.skipped))
+    print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str
+    print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str
+    return rcsf
+#end
+
+class SampleDataTest:
+    def __init__(self, dirs):
+        self.pairs = []
+        while dirs:
+            d = dirs[0]
+            dirs = dirs[1:]
+            l = os.listdir(d)
+            files = []
+            for e in l:
+                p = os.path.join(d, e)
+                if os.path.isdir(p):
+                    dirs.append(p)
+                else:
+                    files.append(p)
+                #end
+            #end
+            if len(files) > 1:
+                files.sort()
+                for x in xrange(len(files) - 1):
+                    self.pairs.append((files[x], files[x+1],
+                                       '%s-%s' % (files[x], files[x+1])))
+                #end
+            #end
+        #end
+    #end
+
+    def Generator(self):
+        return lambda rand: rand.choice(self.pairs)
+    #end
+#end
+
+# configs are represented as a list of values,
+# program takes a list of strings:
+def ConfigToArgs(config):
+    args = [ '-C',
+             ','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])]
+    for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)):
+        key = CONFIG_ARGMAP[CONFIG_ORDER[i]]
+        val = config[i]
+        if val == 'true' or val == 'false':
+            if val == 'true':
+                args.append('%s' % key)
+            #end
+        else:
+            args.append('%s=%s' % (key, val))
+        #end
+    #end
+    return args
+#end
+
+#
+class RandomTest:
+    def __init__(self, tnum, tinput, config, syntuple = None):
+        self.mytinput = tinput[2]
+        self.myconfig = config
+        self.tnum = tnum
+
+        if syntuple != None:
+            self.runtime = syntuple[0]
+            self.compsize = syntuple[1]
+            self.decodetime = None
+        else:
+            args = ConfigToArgs(config)
+            result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args))
+
+            self.runtime = result.encode_time.mean
+            self.compsize = result.encode_size
+            self.decodetime = result.decode_time.mean
+        #end
+
+        self.score = None
+        self.time_pos = None
+        self.size_pos = None
+        self.score_pos = None
+    #end
+
+    def __str__(self):
+        decodestr = ' %.6f' % self.decodetime
+        return 'time %.6f%s size %d%s << %s >>%s' % (
+            self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""),
+            self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""),
+            c2str(self.config()),
+            decodestr)
+    #end
+
+    def time(self):
+        return self.runtime
+    #end
+
+    def size(self):
+        return self.compsize
+    #end
+
+    def config(self):
+        return self.myconfig
+    #end
+
+    def score(self):
+        return self.score
+    #end
+
+    def tinput(self):
+        return self.mytinput
+    #end
+#end
+
+def PosInAlist(l, e):
+    for i in range(0, len(l)):
+        if l[i][1] == e:
+            return i;
+        #end
+    #end
+    return -1
+#end
+
+# Generates a set of num_results test configurations, given the list of
+# retest-configs.
+def RandomTestConfigs(rand, input_configs, num_results):
+
+    outputs = input_configs[:]
+    have_set = dict([(c,c) for c in input_configs])
+
+    # Compute a random configuration
+    def RandomConfig():
+        config = []
+        cmap = {}
+        for key in CONFIG_ORDER:
+            val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap)
+            config.append(val)
+        #end
+        return tuple(config)
+    #end
+
+    while len(outputs) < num_results:
+        newc = None
+        for i in xrange(100):
+            c = RandomConfig()
+            if have_set.has_key(c):
+                continue
+            #end
+            have_set[c] = c
+            newc = c
+            break
+        if newc is None:
+            print 'stopped looking for configs at %d' % len(outputs)
+            break
+        #end
+        outputs.append(c)
+    #end
+    outputs.sort()
+    return outputs
+#end
+
+def RunTestLoop(rand, generator, rounds):
+    configs = []
+    for rnum in xrange(rounds):
+        configs = RandomTestConfigs(rand, configs, MAX_RESULTS)
+        tinput = generator(rand)
+        tests = []
+        for x in xrange(len(configs)):
+            t = RandomTest(x, tinput, configs[x])
+            print 'Round %d test %d: %s' % (rnum, x, t)
+            tests.append(t)
+        #end
+        results = ScoreTests(tests)
+
+        for r in results:
+            c = r.config()
+            if not test_all_config_results.has_key(c):
+                test_all_config_results[c] = [r]
+            else:
+                test_all_config_results[c].append(r)
+            #end
+        #end
+
+        GraphResults('expt%d' % rnum, results)
+        GraphSummary('sum%d' % rnum, results)
+
+        # re-test some fraction
+        configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]]
+    #end
+#end
+
+# TODO: cleanup
+test_all_config_results = {}
+
+def ScoreTests(results):
+    scored = []
+    timed = []
+    sized = []
+
+    t_min = float(min([test.time() for test in results]))
+    #t_max = float(max([test.time() for test in results]))
+    s_min = float(min([test.size() for test in results]))
+    #s_max = float(max([test.size() for test in results]))
+
+    for test in results:
+
+        # Hyperbolic function. Smaller scores still better
+        red = 0.999  # minimum factors for each dimension are 1/1000
+        test.score = ((test.size() - s_min * red) *
+                      (test.time() - t_min * red))
+
+        scored.append((test.score, test))
+        timed.append((test.time(), test))
+        sized.append((test.size(), test))
+    #end
+
+    scored.sort()
+    timed.sort()
+    sized.sort()
+
+    best_by_size = []
+    best_by_time = []
+
+    pos = 0
+    for (score, test) in scored:
+        pos += 1
+        test.score_pos = pos
+    #end
+
+    scored = [x[1] for x in scored]
+
+    for test in scored:
+        test.size_pos = PosInAlist(sized, test)
+        test.time_pos = PosInAlist(timed, test)
+    #end
+
+    for test in scored:
+        c = test.config()
+        s = 0.0
+        print 'H-Score: %0.9f %s' % (test.score, test)
+    #end
+
+    return scored
+#end
+
+def GraphResults(desc, results):
+    f = open("data-%s.csv" % desc, "w")
+    for r in results:
+        f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r))
+    #end
+    f.close()
+    os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc))
+#end
+
+def GraphSummary(desc, results_ignore):
+    test_population = 0
+    config_ordered = []
+
+    # drops duplicate test/config pairs (TODO: don't retest them)
+    for config, cresults in test_all_config_results.items():
+        input_config_map = {}
+        uniq = []
+        for test in cresults:
+            assert test.config() == config
+            test_population += 1
+            key = test.tinput()
+            if not input_config_map.has_key(key):
+                input_config_map[key] = {}
+            #end
+            if input_config_map[key].has_key(config):
+                print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test)
+                continue
+            #end
+            input_config_map[key][config] = test
+            uniq.append(test)
+        #end
+        config_ordered.append(uniq)
+    #end
+
+    # sort configs descending by number of tests
+    config_ordered.sort(lambda x, y: len(y) - len(x))
+
+    print 'population %d: %d configs %d results' % \
+          (test_population,
+           len(config_ordered),
+           len(config_ordered[0]))
+
+    if config_ordered[0] == 1:
+        return
+    #end
+
+    # a map from test-key to test-list w/ various configs
+    input_set = {}
+    osize = len(config_ordered)
+
+    for i in xrange(len(config_ordered)):
+        config = config_ordered[i][0].config()
+        config_tests = config_ordered[i]
+
+        #print '%s has %d tested inputs' % (config, len(config_tests))
+
+        if len(input_set) == 0:
+            input_set = dict([(t.tinput(), [t]) for t in config_tests])
+            continue
+        #end
+
+        # a map from test-key to test-list w/ various configs
+        update_set = {}
+        for r in config_tests:
+            t = r.tinput()
+            if input_set.has_key(t):
+                update_set[t] = input_set[t] + [r]
+            else:
+                #print 'config %s does not have test %s' % (config, t)
+                pass
+            #end
+        #end
+
+        if len(update_set) <= 1:
+            break
+        #end
+
+        input_set = update_set
+
+        # continue if there are more w/ the same number of inputs
+        if i < (len(config_ordered) - 1) and \
+           len(config_ordered[i + 1]) == len(config_tests):
+            continue
+        #end
+
+        # synthesize results for multi-test inputs
+        config_num = None
+
+        # map of config to sum(various test-keys)
+        smap = {}
+        for (key, tests) in input_set.items():
+            if config_num == None:
+                # config_num should be the same in all elements
+                config_num = len(tests)
+                smap = dict([(r.config(),
+                              (r.time(),
+                               r.size()))
+                             for r in tests])
+            else:
+                # compuate the per-config sum of time/size
+                assert config_num == len(tests)
+                smap = dict([(r.config(),
+                              (smap[r.config()][0] + r.time(),
+                               smap[r.config()][1] + r.size()))
+                             for r in tests])
+            #end
+        #end
+
+        if config_num == 1:
+            continue
+        #end
+
+        if len(input_set) == osize:
+            break
+        #end
+
+        summary = '%s-%d' % (desc, len(input_set))
+        osize = len(input_set)
+
+        print 'generate %s w/ %d configs' % (summary, config_num)
+        syn = [RandomTest(0, (None, None, summary), config,
+                          syntuple = (smap[config][0], smap[config][1]))
+               for config in smap.keys()]
+        syn = ScoreTests(syn)
+        #print 'smap is %s' % (smap,)
+        #print 'syn is %s' % (' and '.join([str(x) for x in syn]))
+        GraphResults(summary, syn)
+    #end
+#end
+
+if __name__ == "__main__":
+    try:
+        RunCommand(['rm', '-rf', TMPDIR])
+        os.mkdir(TMPDIR)
+
+        rcsf = GetTestRcsFiles()
+        #generator = rcsf.Generator()
+
+        #sample = SampleDataTest([SAMPLEDIR])
+        #generator = sample.Generator()
+
+        #rand = random.Random(135135135135135)
+        #RunTestLoop(rand, generator, TEST_ROUNDS)
+
+        #RunSpeedTest()
+
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9']))
+        x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw']))
+        x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-1', '-S', 'djw']))
+        #x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T']))
+
+        #x1r = rcsf.AllPairsByDate(Xdelta1RunClass())
+
+    except CommandError:
+        pass
+    else:
+        RunCommand(['rm', '-rf', TMPDIR])
+        pass
+    #end
+#end
diff --git a/xdelta3-second.h b/xdelta3-second.h
new file mode 100644
index 0000000..9096d0f
--- /dev/null
+++ b/xdelta3-second.h
@@ -0,0 +1,315 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2002, 2003, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _XDELTA3_SECOND_H_
+#define _XDELTA3_SECOND_H_
+
+static inline void xd3_bit_state_encode_init (bit_state *bits)
+{
+  bits->cur_byte = 0;
+  bits->cur_mask = 1;
+}
+
+static inline int xd3_decode_bits (xd3_stream     *stream,
+				   bit_state      *bits,
+				   const uint8_t **input,
+				   const uint8_t  *input_max,
+				   usize_t         nbits,
+				   usize_t        *valuep)
+{
+  usize_t value = 0;
+  usize_t vmask = 1 << nbits;
+
+  if (bits->cur_mask == 0x100) { goto next_byte; }
+
+  for (;;)
+    {
+      do
+	{
+	  vmask >>= 1;
+
+	  if (bits->cur_byte & bits->cur_mask)
+	    {
+	      value |= vmask;
+	    }
+
+	  bits->cur_mask <<= 1;
+
+	  if (vmask == 1) { goto done; }
+	}
+      while (bits->cur_mask != 0x100);
+
+    next_byte:
+
+      if (*input == input_max)
+	{
+	  stream->msg = "secondary decoder end of input";
+	  return XD3_INTERNAL;
+	}
+
+      bits->cur_byte = *(*input)++;
+      bits->cur_mask = 1;
+    }
+
+ done:
+
+  IF_DEBUG2 (DP(RINT "(d) %u ", value));
+
+  (*valuep) = value;
+  return 0;
+}
+
+#if REGRESSION_TEST
+/* There may be extra bits at the end of secondary decompression, this macro
+ * checks for non-zero bits.  This is overly strict, but helps pass the
+ * single-bit-error regression test. */
+static int
+xd3_test_clean_bits (xd3_stream *stream, bit_state *bits)
+{
+  for (; bits->cur_mask != 0x100; bits->cur_mask <<= 1)
+    {
+      if (bits->cur_byte & bits->cur_mask)
+	{
+	  stream->msg = "secondary decoder garbage";
+	  return XD3_INTERNAL;
+	}
+    }
+
+  return 0;
+}
+#endif
+
+static xd3_sec_stream*
+xd3_get_secondary (xd3_stream *stream, xd3_sec_stream **sec_streamp)
+{
+  xd3_sec_stream *sec_stream;
+
+  if ((sec_stream = *sec_streamp) == NULL)
+    {
+      if ((*sec_streamp = stream->sec_type->alloc (stream)) == NULL)
+	{
+	  return NULL;
+	}
+
+      sec_stream = *sec_streamp;
+
+      /* If cuumulative stats, init once. */
+      stream->sec_type->init (sec_stream);
+    }
+
+  return sec_stream;
+}
+
+static int
+xd3_decode_secondary (xd3_stream      *stream,
+		      xd3_desect      *sect,
+		      xd3_sec_stream **sec_streamp)
+{
+  xd3_sec_stream *sec_stream;
+  uint32_t dec_size;
+  uint8_t *out_used;
+  int ret;
+
+  if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  /* Decode the size, allocate the buffer. */
+  if ((ret = xd3_read_size (stream, & sect->buf,
+			    sect->buf_max, & dec_size)) ||
+      (ret = xd3_decode_allocate (stream, dec_size,
+				  & sect->copied2, & sect->alloc2)))
+    {
+      return ret;
+    }
+
+  out_used = sect->copied2;
+
+  if ((ret = stream->sec_type->decode (stream, sec_stream,
+				       & sect->buf, sect->buf_max,
+				       & out_used, out_used + dec_size)))
+    {
+      return ret;
+    }
+
+  if (sect->buf != sect->buf_max)
+    {
+      stream->msg = "secondary decoder finished with unused input";
+      return XD3_INTERNAL;
+    }
+
+  if (out_used != sect->copied2 + dec_size)
+    {
+      stream->msg = "secondary decoder short output";
+      return XD3_INTERNAL;
+    }
+
+  sect->buf = sect->copied2;
+  sect->buf_max = sect->copied2 + dec_size;
+  sect->size = dec_size;
+
+  return 0;
+}
+
+#if XD3_ENCODER
+static inline int xd3_encode_bit (xd3_stream      *stream,
+				  xd3_output     **output,
+				  bit_state       *bits,
+				  int              bit)
+{
+  int ret;
+
+  if (bit)
+    {
+      bits->cur_byte |= bits->cur_mask;
+    }
+
+  /* OPT: Might help to buffer more than 8 bits at once. */
+  if (bits->cur_mask == 0x80)
+    {
+      if ((ret = xd3_emit_byte (stream, output, bits->cur_byte)) != 0)
+	{
+	  return ret;
+	}
+
+      bits->cur_mask = 1;
+      bits->cur_byte = 0;
+    }
+  else
+    {
+      bits->cur_mask <<= 1;
+    }
+
+  return 0;
+}
+
+static inline int xd3_flush_bits (xd3_stream      *stream,
+				  xd3_output     **output,
+				  bit_state       *bits)
+{
+  return (bits->cur_mask == 1) ? 0 :
+    xd3_emit_byte (stream, output, bits->cur_byte);
+}
+
+static inline int xd3_encode_bits (xd3_stream      *stream,
+				   xd3_output     **output,
+				   bit_state       *bits,
+				   usize_t           nbits,
+				   usize_t           value)
+{
+  int ret;
+  usize_t mask = 1 << nbits;
+
+  XD3_ASSERT (nbits > 0);
+  XD3_ASSERT (nbits < sizeof (usize_t) * 8);
+  XD3_ASSERT (value < mask);
+
+  do
+    {
+      mask >>= 1;
+
+      if ((ret = xd3_encode_bit (stream, output, bits, value & mask)))
+	{
+	  return ret;
+	}
+    }
+  while (mask != 1);
+
+  IF_DEBUG2 (DP(RINT "(e) %u ", value));
+
+  return 0;
+}
+
+static int
+xd3_encode_secondary (xd3_stream      *stream,
+		      xd3_output     **head,
+		      xd3_output     **tail,
+		      xd3_sec_stream **sec_streamp,
+		      xd3_sec_cfg     *cfg,
+		      int             *did_it)
+{
+  xd3_sec_stream *sec_stream;
+  xd3_output     *tmp_head;
+  xd3_output     *tmp_tail;
+
+  usize_t comp_size;
+  usize_t orig_size;
+
+  int ret;
+
+  orig_size = xd3_sizeof_output (*head);
+
+  if (orig_size < SECONDARY_MIN_INPUT) { return 0; }
+
+  if ((sec_stream = xd3_get_secondary (stream, sec_streamp)) == NULL)
+    {
+      return ENOMEM;
+    }
+
+  tmp_head = xd3_alloc_output (stream, NULL);
+
+  /* Encode the size, encode the data.  Encoding the size makes it
+   * simpler, but is a little gross.  Should not need the entire
+   * section in contiguous memory, but it is much easier this way. */
+  if ((ret = xd3_emit_size (stream, & tmp_head, orig_size)) ||
+      (ret = stream->sec_type->encode (stream, sec_stream, *head,
+				       tmp_head, cfg)))
+    {
+      goto getout;
+    }
+
+  /* If the secondary compressor determines it's no good, it returns
+   * XD3_NOSECOND. */
+
+  /* Setup tmp_tail, comp_size */
+  tmp_tail  = tmp_head;
+  comp_size = tmp_head->next;
+
+  while (tmp_tail->next_page != NULL)
+    {
+      tmp_tail = tmp_tail->next_page;
+      comp_size += tmp_tail->next;
+    }
+
+  XD3_ASSERT (comp_size == xd3_sizeof_output (tmp_head));
+  XD3_ASSERT (tmp_tail != NULL);
+
+  if (comp_size < (orig_size - SECONDARY_MIN_SAVINGS))
+    {
+      IF_DEBUG1(DP(RINT "secondary saved %u bytes: %u -> %u (%0.2f%%)\n",
+		   orig_size - comp_size, orig_size, comp_size,
+	       100.0 * (double) comp_size / (double) orig_size));
+
+      xd3_free_output (stream, *head);
+
+      *head = tmp_head;
+      *tail = tmp_tail;
+      *did_it = 1;
+    }
+  else
+    {
+    getout:
+      if (ret == XD3_NOSECOND) { ret = 0; }
+      xd3_free_output (stream, tmp_head);
+    }
+
+  return ret;
+}
+#endif /* XD3_ENCODER */
+#endif /* _XDELTA3_SECOND_H_ */
diff --git a/xdelta3-test.h b/xdelta3-test.h
new file mode 100644
index 0000000..979683f
--- /dev/null
+++ b/xdelta3-test.h
@@ -0,0 +1,2827 @@
+/* xdelta 3 - delta compression tools and library
+ * Copyright (C) 2001, 2003, 2004, 2005, 2006, 2007.  Joshua P. MacDonald
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+/* This is public-domain Mersenne Twister code,
+ * attributed to Michael Brundage.  Thanks!
+ * http://www.qbrundage.com/michaelb/pubs/essays/random_number_generation.html
+ */
+static const uint32_t TEST_SEED1 = 5489UL;
+#define MT_LEN 624
+#define MT_IA 397
+static const uint32_t UPPER_MASK = 0x80000000;
+static const uint32_t LOWER_MASK = 0x7FFFFFFF;
+static const uint32_t MATRIX_A = 0x9908B0DF;
+
+typedef struct mtrand mtrand;
+
+struct mtrand {
+  int mt_index_;
+  uint32_t mt_buffer_[MT_LEN];
+};
+
+void mt_init(mtrand *mt, uint32_t seed) {
+  int i;
+  mt->mt_buffer_[0] = seed;
+  mt->mt_index_ = MT_LEN;
+  for (i = 1; i < MT_LEN; i++) {
+    /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+    /* In the previous versions, MSBs of the seed affect   */
+    /* only MSBs of the array mt[].                        */
+    /* 2002/01/09 modified by Makoto Matsumoto             */
+    mt->mt_buffer_[i] = 
+	(1812433253UL * (mt->mt_buffer_[i-1] ^ (mt->mt_buffer_[i-1] >> 30)) + i);
+  }
+}
+
+
+uint32_t mt_random (mtrand *mt) {
+  uint32_t y;
+  unsigned long mag01[2];
+  mag01[0] = 0;
+  mag01[1] = MATRIX_A;
+
+  if (mt->mt_index_ >= MT_LEN) {
+    int kk;
+
+    for (kk = 0; kk < MT_LEN - MT_IA; kk++) {
+      y = (mt->mt_buffer_[kk] & UPPER_MASK) | (mt->mt_buffer_[kk + 1] & LOWER_MASK);
+      mt->mt_buffer_[kk] = mt->mt_buffer_[kk + MT_IA] ^ (y >> 1) ^ mag01[y & 0x1UL];
+    }
+    for (;kk < MT_LEN - 1; kk++) {
+      y = (mt->mt_buffer_[kk] & UPPER_MASK) | (mt->mt_buffer_[kk + 1] & LOWER_MASK);
+      mt->mt_buffer_[kk] = mt->mt_buffer_[kk + (MT_IA - MT_LEN)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+    }
+    y = (mt->mt_buffer_[MT_LEN - 1] & UPPER_MASK) | (mt->mt_buffer_[0] & LOWER_MASK);
+    mt->mt_buffer_[MT_LEN - 1] = mt->mt_buffer_[MT_IA - 1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+    mt->mt_index_ = 0;
+  }
+  
+  y = mt->mt_buffer_[mt->mt_index_++];
+  
+  y ^= (y >> 11);
+  y ^= (y << 7) & 0x9d2c5680UL;
+  y ^= (y << 15) & 0xefc60000UL;
+  y ^= (y >> 18);
+  
+  return y;
+}
+
+static mtrand static_mtrand;
+
+#include <math.h>
+
+static uint32_t
+mt_exp_rand (uint32_t mean, uint32_t max_value)
+{
+  double mean_d = mean;
+  double erand  = log (1.0 / (mt_random (&static_mtrand) / 
+			      (double)UINT32_MAX));
+  uint32_t x = (uint32_t) (mean_d * erand + 0.5);
+
+  return min (x, max_value);
+}
+
+#ifndef WIN32
+#include <sys/wait.h>
+#endif
+
+#define MSG_IS(x) (stream->msg != NULL && strcmp ((x), stream->msg) == 0)
+
+static const usize_t TWO_MEGS_AND_DELTA = (2 << 20) + (1 << 10);
+static const usize_t ADDR_CACHE_ROUNDS = 10000;
+
+static const usize_t TEST_FILE_MEAN   = 16384;
+static const double TEST_ADD_MEAN     = 128;
+static const double TEST_ADD_MAX      = 512;
+static const double TEST_ADD_RATIO    = 0.1;
+static const double TEST_EPSILON      = 0.25;
+
+#define TESTBUFSIZE (1024 * 16)
+
+#define TESTFILESIZE (1024)
+
+static char   TEST_TARGET_FILE[TESTFILESIZE];
+static char   TEST_SOURCE_FILE[TESTFILESIZE];
+static char   TEST_DELTA_FILE[TESTFILESIZE];
+static char   TEST_RECON_FILE[TESTFILESIZE];
+static char   TEST_RECON2_FILE[TESTFILESIZE];
+static char   TEST_COPY_FILE[TESTFILESIZE];
+static char   TEST_NOPERM_FILE[TESTFILESIZE];
+
+#define CHECK(cond) if (!(cond)) { DP(RINT "check failure: " #cond); abort(); }
+
+/* Use a fixed soft config so that test values are fixed.  See also
+ * test_compress_text(). */
+static const char* test_softcfg_str = "-C9,3,4,8,2,36,70";
+
+static int test_setup (void);
+
+/***********************************************************************
+ TEST HELPERS
+ ***********************************************************************/
+
+static void DOT (void) { DP(RINT "."); }
+static int do_cmd (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  if ((ret = system (buf)) != 0)
+    {
+      if (WIFEXITED (ret))
+	{
+	  stream->msg = "command exited non-zero";
+	}
+      else
+	{
+	  stream->msg = "abnormal command termination";
+	}
+      return XD3_INTERNAL;
+    }
+  return 0;
+}
+
+static int do_fail (xd3_stream *stream, const char *buf)
+{
+  int ret;
+  ret = system (buf);
+  if (! WIFEXITED (ret) || WEXITSTATUS (ret) != 1)
+    {
+      stream->msg = "command should have not succeeded";
+      DP(RINT "command was %s", buf);
+      return XD3_INTERNAL;
+    }
+  return 0;
+}
+
+/* Test that the exponential distribution actually produces its mean. */
+static int
+test_random_numbers (xd3_stream *stream, int ignore)
+{
+  usize_t i;
+  usize_t sum = 0;
+  usize_t mean = 50;
+  usize_t n_rounds = 1000000;
+  double average, error;
+  double allowed_error = 0.1;
+
+  mt_init (& static_mtrand, 0x9f73f7fe);
+
+  for (i = 0; i < n_rounds; i += 1)
+    {
+      sum += mt_exp_rand (mean, USIZE_T_MAX);
+    }
+
+  average = (double) sum / (double) n_rounds;
+  error   = average - (double) mean;
+
+  if (error < allowed_error && error > -allowed_error)
+    {
+      return 0;
+    }
+
+  /*DP(RINT "error is %f\n", error);*/
+  stream->msg = "random distribution looks broken";
+  return XD3_INTERNAL;
+}
+
+static void
+test_unlink (char* file)
+{
+  char buf[TESTBUFSIZE];
+  while (unlink (file) != 0)
+    {
+      if (errno == ENOENT)
+	{
+	  break;
+	}
+      sprintf (buf, "rm -f %s", file);
+      system (buf);
+    }
+}
+
+static void
+test_cleanup (void)
+{
+  test_unlink (TEST_TARGET_FILE);
+  test_unlink (TEST_SOURCE_FILE);
+  test_unlink (TEST_DELTA_FILE);
+  test_unlink (TEST_RECON_FILE);
+  test_unlink (TEST_RECON2_FILE);
+  test_unlink (TEST_COPY_FILE);
+  test_unlink (TEST_NOPERM_FILE);
+}
+
+static int
+test_setup (void)
+{
+  static int x = 0;
+  x++;
+  sprintf (TEST_TARGET_FILE, "/tmp/xdtest.target.%d", x);
+  sprintf (TEST_SOURCE_FILE, "/tmp/xdtest.source.%d", x);
+  sprintf (TEST_DELTA_FILE, "/tmp/xdtest.delta.%d", x);
+  sprintf (TEST_RECON_FILE, "/tmp/xdtest.recon.%d", x);
+  sprintf (TEST_RECON2_FILE, "/tmp/xdtest.recon2.%d", x);
+  sprintf (TEST_COPY_FILE, "/tmp/xdtest.copy.%d", x);
+  sprintf (TEST_NOPERM_FILE, "/tmp/xdtest.noperm.%d", x);
+  test_cleanup();
+  return 0;
+}
+
+static int
+test_make_inputs (xd3_stream *stream, xoff_t *ss_out, xoff_t *ts_out)
+{
+  usize_t ts = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + TEST_FILE_MEAN / 2;
+  usize_t ss = (mt_random (&static_mtrand) % TEST_FILE_MEAN) + TEST_FILE_MEAN / 2;
+  uint8_t *buf = (uint8_t*) malloc (ts + ss), *sbuf = buf, *tbuf = buf + ss;
+  usize_t sadd = 0, sadd_max = ss * TEST_ADD_RATIO;
+  FILE  *tf = NULL, *sf = NULL;
+  usize_t i, j;
+  int ret;
+
+  if (buf == NULL) { return ENOMEM; }
+
+  if ((tf = fopen (TEST_TARGET_FILE, "w")) == NULL ||
+      (ss_out != NULL && (sf = fopen (TEST_SOURCE_FILE, "w")) == NULL))
+    {
+      stream->msg =